天天看点

Prometheus监控之Alertmanager告警插件

作者:面向百度工作

Alertmanager默认是支持邮件和企业微信告警通知功能,邮件不常用,企业微信需要验证域名回调连接,这里使用钉钉群机器人做了演示

部署服务

编写配置文件

mkdir -p /data/alertmanager/conf

vim /data/alertmanager/conf/alertmanager.yml

global:
  resolve_timeout: 5m

route:
  group_by: ['alertname']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 1h
  receiver: 'dingtalk'  #默认通知组,即没有匹配到相关规则使用的通知组
  routes:
  - receiver: 'dingtalk'
    group_wait: 10s
      
receivers:
- name: 'dingtalk'
  webhook_configs:
  - url: 'http://10.4.7.102:8060/dingtalk/webhook-dingding/send'   ##钉钉web-hook地址
    send_resolved: true
    
templates:
- /etc/alertmanager/template/*.tmpl

inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']           

Docker启动

#!/bin/bash
docker stop alertmanager
docker rm alertmanager
docker run -d -p 9093:9093 \
           --restart=always \
           --name alertmanager \
           -v "/etc/localtime:/etc/localtime" \
           -v /data/alertmanager/conf:/etc/alertmanager \
           -v /data/alertmanager/data:/alertmanager/data \
           prom/alertmanager
docker logs -f alertmanager           

访问管理页面

通过浏览器访问{服务器ip}:9093端口

Prometheus监控之Alertmanager告警插件

配置钉钉告警

创建钉钉群机器人

创建方法自行查看,需配置安全设置为加签方式

编辑配置文件

vim /data/prometheus/conf/dingtalk.yml

timeout: 5s
templates:
  - /etc/dingding.tmpl
targets:
  webhook-dingding:   ##名称与alertmaenager配置中webhook名称保持一致
    url: https://oapi.dingtalk.com/robot/send?access_token=xxxxx
    secret: xxxxx
    message:
      title: '{{ template "ding.link.title" . }}'
      text: '{{ template "ding.link.content" . }}'           

编辑模板文件

vim /data/prometheus/conf/dingding.tmpl

{{ define "__subject" }}[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .GroupLabels.SortedPairs.Values | join " " }} {{ if gt (len .CommonLabels) (len .GroupLabels) }}({{ with .CommonLabels.Remove .GroupLabels.Names }}{{ .Values | join " " }}{{ end }}){{ end }}{{ end }}
{{ define "__alertmanagerURL" }}{{ .ExternalURL }}/#/alerts?receiver={{ .Receiver }}{{ end }}

{{ define "__text_alert_list" }}{{ range . }}
**Labels**
{{ range .Labels.SortedPairs }}> - {{ .Name }}: {{ .Value | markdown | html }}
{{ end }}
**Annotations**
{{ range .Annotations.SortedPairs }}> - {{ .Name }}: {{ .Value | markdown | html }}
{{ end }}
**Source:** [{{ .GeneratorURL }}]({{ .GeneratorURL }})
{{ end }}{{ end }}

{{ define "default.__text_alert_list" }}{{ range . }}
---
**告警级别:** {{ .Labels.severity | upper }}

**触发时间:** {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}

**事件信息:** 
{{ range .Annotations.SortedPairs }}> - {{ .Name }}: {{ .Value | markdown | html }}


{{ end }}

**事件标签:**
{{ range .Labels.SortedPairs }}{{ if and (ne (.Name) "severity") (ne (.Name) "summary") (ne (.Name) "team") }}> - {{ .Name }}: {{ .Value | markdown | html }}
{{ end }}{{ end }}
{{ end }}
{{ end }}
{{ define "default.__text_alertresovle_list" }}{{ range . }}
---
**告警级别:** {{ .Labels.severity | upper }}

**触发时间:** {{ dateInZone "2006.01.02 15:04:05" (.StartsAt) "Asia/Shanghai" }}

**结束时间:** {{ dateInZone "2006.01.02 15:04:05" (.EndsAt) "Asia/Shanghai" }}

**事件信息:**
{{ range .Annotations.SortedPairs }}> - {{ .Name }}: {{ .Value | markdown | html }}


{{ end }}

**事件标签:**
{{ range .Labels.SortedPairs }}{{ if and (ne (.Name) "severity") (ne (.Name) "summary") (ne (.Name) "team") }}> - {{ .Name }}: {{ .Value | markdown | html }}
{{ end }}{{ end }}
{{ end }}
{{ end }}

{{/* Default */}}
{{ define "default.title" }}{{ template "__subject" . }}{{ end }}
{{ define "default.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})**
{{ if gt (len .Alerts.Firing) 0 -}}

**====侦测到故障====**
{{ template "default.__text_alert_list" .Alerts.Firing }}


{{- end }}

{{ if gt (len .Alerts.Resolved) 0 -}}
**====故障恢复====**
{{ template "default.__text_alertresovle_list" .Alerts.Resolved }}


{{- end }}
{{- end }}

{{/* Legacy */}}
{{ define "legacy.title" }}{{ template "__subject" . }}{{ end }}
{{ define "legacy.content" }}#### \[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}\] **[{{ index .GroupLabels "alertname" }}]({{ template "__alertmanagerURL" . }})**
{{ template "__text_alert_list" .Alerts.Firing }}
{{- end }}

{{/* Following names for compatibility */}}
{{ define "ding.link.title" }}{{ template "default.title" . }}{{ end }}
{{ define "ding.link.content" }}{{ template "default.content" . }}{{ end }}           

启动钉钉告警插件

docker run --name dingtalk --restart always  \
	-d -p 8060:8060 \
	-v /data/prometheus/conf/dingtalk.yml:/etc/dingtalk.yml \
  -v /data/prometheus/conf/dingding.tmpl:/etc/dingding.tmpl \
  timonwong/prometheus-webhook-dingtalk \
	--config.file=/etc/dingtalk.yml \
	--web.enable-ui \
	--web.enable-lifecycle           

测试钉钉告警通知

创建测试脚本

#!/usr/bin/env bash

alerts_message='[
  {
    "labels": {
       "severity": "warning",
       "alertname": "DiskRunningFull",
       "dev": "/data",
       "instance": "example1",
       "msgtype": "testing"
     },
     "annotations": {
        "info": "The disk /data is running full",
        "summary": "please check the instance example1"
      }
  }
]'

curl -XPOST -d"$alerts_message" http://127.0.0.1:9093/api/v1/alerts           

钉钉收到通知

Prometheus监控之Alertmanager告警插件

继续阅读