jmx-exporter+prometheus+grafana监控weblogic配置
- DevOps
- 2023-03-16
- 198热度
- 1评论
背景
生产weblogic服务总是出现异常,目前是有通过spug做站点监测,http状态码4或5开头会发告警,想针对weblogic本身的数据源和jvm层面的相关指标进行监控告警,spug有灵活的脚本进行一些指标监测和告警,但是脚本开发需要设计和编写,并且没有很好的面板能够展示这些指标,所以调研了其他方案,因为核心系统目前做数据检查是通过 oracle_exporter + prometheus+grafana 进行实现,所以调研了 jmx_exporter 结合现有的框架进行监控,节约学习成本,并且能很好的展示服务器的性能指标。
配置
weblogic配置jmx_exporter
下载
README.md 里有详细的安装说明,我们选用 javaAgent 方式启用,因为核心服务是 jdk1.6+weblogic11g,所以这里下载jmx_prometheus_javaagent_java6 的 jar 包,下载 example_configs/weblogic.yml,在 ${DOMAIN_HOME}下新建 jmxexp目录,放入上面两个文件。
修改 ${DOMAIN_HOME}/bin/startWebLogic.sh 文件中的 JAVA_OPTION 选项,改为以下内容:
# 18001是jmx暴露的端口,对外提供性能指标的获取
JAVA_OPTIONS="${SAVE_JAVA_OPTIONS} -DUseSunHttpHandler=true -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Djavax.management.builder.initial=weblogic.management.jmx.mbeanserver.WLSMBeanServerBuilder -javaagent:${DOMAIN_HOME}/jmxexp/jmx_prometheus_javaagent6.jar=18001:${DOMAIN_HOME}/jmxexp/weblogic.yml"
[weblogic@prd-wangxiao-car-1 jmxexp]$ cat weblogic.yml
---
ssl: false
attrNameSnakeCase: true
lowercaseOutputName: true
lowercaseOutputLabelNames: true
whitelistObjectNames:
- "java.lang:type=OperatingSystem"
- "com.bea:ServerRuntime=*,Name=*,Type=JVMRuntime"
- "com.bea:Name=*,Type=ServerRuntime"
- "com.bea:ServerRuntime=*,Type=ApplicationRuntime,*"
- "com.bea:ServerRuntime=*,Type=JDBCDataSourceRuntime,*"
- "com.bea:ServerRuntime=*,Type=JMSDestinationRuntime,*"
- "com.bea:ServerRuntime=*,Type=JDBCStoreRuntime,*"
- "com.bea:ServerRuntime=*,Type=FileStoreRuntime,*"
- "com.bea:ServerRuntime=*,Type=SAFRemoteEndpointRuntime,*"
- "com.bea:ServerRuntime=*,Type=ThreadPoolRuntime,*"
- "com.bea:ServerRuntime=*,Type=JMSRuntime,*"
- "com.bea:ServerRuntime=*,Type=SAFRuntime,*"
- "com.bea:ServerRuntime=*,Type=WorkManagerRuntime,*"
- "com.bea:ServerRuntime=*,Type=MessagingBridgeRuntime,*"
- "com.bea:ServerRuntime=*,Type=PersistentStoreRuntime,*"
- "com.bea:ServerRuntime=*,Type=WebServerRuntime,*"
- "com.bea:ServerRuntime=*,Type=WebAppComponentRuntime,*"
- "com.bea:ServerRuntime=*,Type=ServletRuntime,*"
rules:
#com.bea<ServerRuntime=AdminServer, Name=AdminServer_/bea_wls_internal, ApplicationRuntime=bea_wls_internal, Type=WebAppComponentRuntime><>SessionsOpenedTotalCount)
# com.bea<ServerRuntime=AdminServer, Name=ClientClose, ApplicationRuntime=bea_wls_internal, Type=ServletRuntime, WebAppComponentRuntime=AdminServer_/bea_wls_internal><>ExecutionTimeTotal
- pattern: "^com.bea<ServerRuntime=(.+), Name=(.+), (.+)Runtime=(.*), Type=ServletRuntime, WebAppComponentRuntime=(.+)><>(.+):"
name: weblogic_$3_$6
attrNameSnakeCase: true
labels:
runtime: $1
name: $2
application: $4
# ex: java.lang<type=OperatingSystem><>ProcessCpuTime
- pattern: "^java.lang<type=OperatingSystem><>(.+):"
name: weblogic_$1
attrNameSnakeCase: true
# ex: com.bea<Name=Server-0, Type=ServerRuntime><>StableState
- pattern: "^com.bea<Name=(.+), Type=(.+)Runtime><>(.+):"
name: weblogic_$2_$3
attrNameSnakeCase: true
labels:
name: $1
# com.bea<ServerRuntime=AdminServer, Name=ClientClose, ApplicationRuntime=bea_wls_internal, Type=ServletRuntime, WebAppComponentRuntime=AdminServer_/bea_wls_internal><>InvocationTotalCount
# ex: com.bea<ServerRuntime=AdminServer, Name=default, ApplicationRuntime=moduleJMS, Type=WorkManagerRuntime><>CompletedRequests
- pattern: "^com.bea<ServerRuntime=(.+), Name=(.+), (.+)Runtime=(.*), Type=(.+)Runtime><>(.+):"
name: weblogic_$3_$5_$6
attrNameSnakeCase: true
labels:
runtime: $1
name: $2
application: $4
# ex: com.bea<ServerRuntime=AdminServer, Name=dsName, Type=JDBCDataSourceRuntime><>Metric
- pattern: "^com.bea<ServerRuntime=(.+), Name=(.+), Type=(.+)Runtime><>(.+):"
name: weblogic_$3_$4
attrNameSnakeCase: true
labels:
runtime: $1
name: $2
# ex: com.bea<ServerRuntime=AdminServer, Name=bea_wls_cluster_internal, Type=ApplicationRuntime><OverallHealthStateJMX>IsCritical
- pattern: "^com.bea<ServerRuntime=(.+), Name=(.+), Type=(.+)Runtime><(.+)>(.+):"
name: weblogic_$3_$4_$5
attrNameSnakeCase: true
labels:
runtime: $1
name: $2
prometheus 配置
[root@prd-dbyw-app-01 prometheus]# cat prometheus.yml
# 添加以下job内容
- job_name: "cbs-weblogic"
scrape_interval: 2s
static_configs:
- targets: ["88.4.70.20:18001"]
labels:
instance: 88.4.70.20
- targets: ["88.4.70.51:18001"]
labels:
instance: 88.4.70.51
- targets: ["88.4.70.34:18001"]
labels:
instance: 88.4.70.34
- targets: ["88.4.70.35:18001"]
labels:
instance: 88.4.70.35
- targets: ["88.4.70.47:18001"]
labels:
instance: 88.4.70.47
- targets: ["88.4.70.48:18001"]
labels:
instance: 88.4.70.48
重启 prometheus 服务。
grafana面板配置
通用设置:
变量设置:
面板panel配置
服务状态panel
- Panel title:服务状态
- metrics:up{job=\~"\$job",instance=\~"$ip"}
- Visualization:Stat
- Display-Calculation:Last(not null)
- [Field]-Value mappings
- Mapping type:Value
- Value:1
- Text:Active
- Mapping type:Value
- Value:
- Text:Down
- Mapping type:Value
堆内存使用率panel
- Panel title:堆内存使用率
- metrics:jvm_memory_bytes_used{instance=\~"\$ip",job=\~"\$job",area="heap"}/jvm_memory_bytes_committed{instance=\~"\$ip",job=\~"\$job",area="heap"} * 100
- Visualization:Gauge
- Display-Calculation:Last(not null)
- [Field]-Standard options
- Unit:Percent(0-100)
- Decimals:2
堆内存使用情况panel
- Panel title:堆内存使用情况
- metrics:jvm_memory_bytes_used{instance=\~"\$ip",job=\~"\$job",area="heap"}
- Legend:{{instance}}_\${server}_堆内存已使用
- Visualization:Graph
panel配置中的 \ 因为 markdown 显示原因,实际不用加