下载地址

https://prometheus.io/download/

node_exporter: 1.6.1

  1. 下载:
    wget https://github.com/prometheus/node_exporter/releases/download/v1.6.1/node_exporter-1.6.1.linux-amd64.tar.gz

  2. 解压:
    tar -xzf node_exporter-1.6.1.linux-amd64.tar.gz

  3. 修改名称:
    mv node_exporter-1.6.1.linux-amd64 node_exporter

  4. 启动:

    cd node_exporter
    ./node_exporter --web.listen-address=:9100 &
  5. 访问地址:
    xx.xx.xx.xx:9100

blackbox_exporter: 0.24.0

  1. 下载:
    wget https://github.com/prometheus/blackbox_exporter/releases/download/v0.24.0/blackbox_exporter-0.24.0.linux-amd64.tar.gz

  2. 解压:
    tar -xzf blackbox_exporter-0.24.0.linux-amd64.tar.gz

  3. 修改名称:
    mv blackbox_exporter-0.24.0.linux-amd64 blackbox_exporter

  4. 进入目录:
    cd blackbox_exporter

  5. 修改配置文件:
    vim blackbox.yml

     modules:
         http_2xx:
             prober: http
             http:
                 preferred_ip_protocol: "ip4"
         http_post_2xx:
             prober: http
             http:
                 method: POST
         tcp_connect:
             prober: tcp
         pop3s_banner:
             prober: tcp
             tcp:
                 query_response:
                     -   expect: "^+OK"
                 tls: true
                 tls_config:
                     insecure_skip_verify: false
         grpc:
             prober: grpc
             grpc:
                 tls: true
                 preferred_ip_protocol: "ip4"
         grpc_plain:
             prober: grpc
             grpc:
                 tls: false
                 service: "service1"
         ssh_banner:
             prober: tcp
             tcp:
                 query_response:
                     -   expect: "^SSH-2.0-"
                     -   send: "SSH-2.0-blackbox-ssh-check"
         irc_banner:
             prober: tcp
             tcp:
                 query_response:
                     -   send: "NICK prober"
                     -   send: "USER prober prober prober :prober"
                     -   expect: "PING :([^ ]+)"
                         send: "PONG ${1}"
                     -   expect: "^:[^ ]+ 001"
         icmp:
             prober: icmp
         icmp_ttl5:
             prober: icmp
             timeout: 5s
             icmp:
                 ttl: 5
  6. 启动:
    ./blackbox_exporter --config.file="blackbox.yml" --web.listen-address=:9115 &

  7. 访问地址:
    xx.xx.xx.xx:9115

prometheus-webhook-dingtalk: v2.1.0

  1. 下载:
    wget https://github.com/timonwong/prometheus-webhook-dingtalk/releases/download/v2.1.0/prometheus-webhook-dingtalk-2.1.0.linux-amd64.tar.gz

  2. 解压:
    tar -xzf prometheus-webhook-dingtalk-2.1.0.linux-amd64.tar.gz

  3. 修改名称:
    mv prometheus-webhook-dingtalk-2.1.0.linux-amd64 webhook-dingtalk

  4. 进入目录:
    cd webhook-dingtalk

  5. 修改配置文件:
    vim config.yml

     #templates:
     #    - /path/to/my-template.tmpl
     timeout: 5s
     targets:
         webhook1:
             url: https://oapi.dingtalk.com/robot/send?access_token=54382ed3844a71a77253135926386d6ff949f81cc7d9923f8cd08fb072ec4cdf
             secret: SEC38b2acf25e7c0e9c179846cbc8d79a4ec333c2b39035e28c144be1751e4b75ae
             mention:
                 #all: true
                 mobiles: [ '15072392025' ]
             message:
                 title: '{{ template "ding.link.title" . }}'
                 text: '{{ template "ding.link.content" . }}'
  6. 启动:
    ./prometheus-webhook-dingtalk --config.file="config.yml" --web.listen-address=:8060 --web.enable-ui &

  7. 访问地址:
    xx.xx.xx.xx:8060/ui

alertmanager: 0.26.0

  1. 下载:
    wget https://github.com/prometheus/alertmanager/releases/download/v0.26.0-rc.0/alertmanager-0.26.0-rc.0.linux-amd64.tar.gz

  2. 解压:
    tar -xzf alertmanager-0.26.0-rc.0.linux-amd64.tar.gz

  3. 修改名称:
    mv alertmanager-0.26.0-rc.0.linux-amd64 alertmanager

  4. 进入目录:
    cd alertmanager

  5. 修改配置文件:
    vim alertmanager.yml

     global:
         resolve_timeout: 1m
     route:
         group_by: [ 'alertname' ]
         group_wait: 30s
         group_interval: 5m
         repeat_interval: 1h
         receiver: 'web.hook'
     receivers:
         -   name: 'web.hook'
             webhook_configs:
                 -   url: 'http://xx.xx.xx.xx:8060/dingtalk/webhook1/send'
                     send_resolved: true
     inhibit_rules:
         -   source_match:
                 severity: 'critical'
             target_match:
                 severity: 'warning'
             equal: [ 'alertname', 'dev', 'instance' ]
  6. 启动:
    ./alertmanager --config.file="alertmanager.yml" --web.listen-address=:9093 &

  7. 访问地址:
    xx.xx.xx.xx:9093

prometheus: v2.37.9

  1. 下载:
    wget https://github.com/prometheus/prometheus/releases/download/v2.37.9/prometheus-2.37.9.linux-amd64.tar.gz

  2. 解压:
    tar -xzf prometheus-2.37.9.linux-amd64.tar.gz

  3. 修改名称:
    mv prometheus-2.37.9.linux-amd64 prometheus

  4. 进入目录:
    cd prometheus

  5. 修改配置文件:
    vim prometheus.yml

     global:
         scrape_interval: 10s
         evaluation_interval: 10s
     alerting:
         alertmanagers:
             -   static_configs:
                     -   targets:
                             - xx.xx.xx.xx:9093
     rule_files:
         - "/path/to/prometheus/rules/*.yml"
     scrape_configs:
         -   job_name: "prometheus"
             scheme: http
             static_configs:
                 -   targets: [ "xx.xx.xx.xx:9090" ]
         -   job_name: 'node-exporter'
             static_configs:
                 -   targets: [ 'xx.xx.xx.xx:9100' ]
         -   job_name: 'web-server'
             metrics_path: /probe
             params:
                 module: [ http_2xx ]
             static_configs:
                 -   targets:
                         - http://xx.xx.xx.xx:80
             relabel_configs:
                 -   source_labels: [ __address__ ]
                     target_label: __param_target
                 -   source_labels: [ __param_target ]
                     target_label: instance
                 -   target_label: __address__
                     replacement: xx.xx.xx.xx:9115
  6. 创建告警规则目录:
    mkdir rules

  7. 创建告警规则文件:

    1. 监控 http 服务器是否存活:
      vim rules/http.yml
       groups:
           -   name: web-server
               rules:
                   -   alert: web-server
                       expr: probe_success{job="web-server"} == 0
                       for: 10s
                       labels:
                           severity: 1
                           team: backend
                       annotations:
                           summary: "{{ $labels.instance }} 已停止运行超过 10s!"
    2. 监控服务器 cpu 负载:
      vim rules/cpu.yml
       groups:
           -   name: cpu-over
               rules:
                   -   alert: cpu-over
                       expr: 100 - (avg by (instance)(irate(node_cpu_seconds_total{mode="idle"}[1m]) )) * 100 > 90
                       for: 10s
                       labels:
                           severity: 1
                           team: server
                       annotations:
                           description: "服务器: CPU 使用超过 90%!(当前值: {{ $value }}%)"
    3. 监控服务器内存使用率:
      vim rules/memory.yml
       groups:
           -   name: memory-over
               rules:
                   -   alert: memory-over
                       expr: (node_memory_MemTotal_bytes - (node_memory_MemFree_bytes+node_memory_Buffers_bytes+node_memory_Cached_bytes )) / node_memory_MemTotal_bytes * 100 > 80
                       for: 10s
                       labels:
                           severity: 1
                           team: server
                       annotations:
                           description: "服务器: 内存使用超过 80%!(当前值: {{ $value }}%)"
    4. 监控服务器磁盘使用率:
      vim rules/disk.yml
       groups:
           -   name: disk-over
               rules:
                   -   alert: disk-over
                       expr: (node_filesystem_size_bytes - node_filesystem_avail_bytes) / node_filesystem_size_bytes * 100 > 80
                       for: 10s
                       labels:
                           severity: 1
                           team: server
                       annotations:
                           description: "服务器: 磁盘设备: 使用超过 80%!(挂载点: {{ $labels.mountpoint }} 当前值: {{ $value }}%)"
  8. 启动:
    ./prometheus --config.file="prometheus.yml" --web.listen-address=:9090 &

  9. 访问地址:
    xx.xx.xx.xx:9090

  10. PromQL 根据标签查找指标命令:
    {label-key="label-value"}

grafana: 10.0.3

  1. 下载地址:
    https://grafana.com/grafana/download

  2. 下载:
    wget https://dl.grafana.com/enterprise/release/grafana-enterprise-10.0.3-1.x86_64.rpm

  3. 安装:
    yum -y localinstall grafana-enterprise-10.0.3-1.x86_64.rpm

  4. 修改配置文件:
    vim /etc/grafana/grafana.ini
    http_port = 3000

  5. 启动:
    systemctl start grafana-server && systemctl enable grafana-server

  6. 访问地址:
    xx.xx.xx.xx:3000

    用户名:admin
    密码:admin
  7. 添加数据源–Prometheus(略)

  8. 导入模板:

    1. 模板地址:
      https://grafana.com/grafana/dashboards
    2. 导入 node_exporter 相关模板:
      模板名称:Node Exporter Full
      编号:1860
    3. 导入 blackbox_exporter 相关模板:
      模板名称:Prometheus Blackbox Exporter
      编号:7587
文档更新时间: 2024-04-20 10:57   作者:lee