global:# How frequently to scrape targets by default.[ scrape_interval:<duration> | default = 1m ]# How long until a scrape request times out.[ scrape_timeout:<duration> | default = 10s ]# How frequently to evaluate rules.[ evaluation_interval:<duration> | default = 1m ]# The labels to add to any time series or alerts when communicating with# external systems (federation, remote storage, Alertmanager).external_labels:[ <labelname>:<labelvalue> ... ]# File to which PromQL queries are logged.# Reloading the configuration will reopen the file.[ query_log_file:<string> ]# Rule files specifies a list of globs. Rules and alerts are read from# all matching files.rule_files:[- <filepath_glob> ... ]# A list of scrape configurations.scrape_configs:[- <scrape_config> ... ]# Alerting specifies settings related to the Alertmanager.alerting:alert_relabel_configs:[- <relabel_config> ... ]alertmanagers:[- <alertmanager_config> ... ]# Settings related to the remote write feature.remote_write:[- <remote_write> ... ]# Settings related to the remote read feature.remote_read:[- <remote_read> ... ]
示例:
# my global configglobal:scrape_interval:15s# 抓取metrics的时间间隔. Default is every 1 minute.evaluation_interval:15s# 评估rules的时间间隔. The default is every 1 minute.scrape_timeout:5s #抓取metric的超时时间。 default (10s).# Alertmanager configuration(配置如何连接alertmanager,进行推送告警,可以使用静态配置,也可以使用动态发现的方式)alerting:alertmanagers:- static_configs:- targets:# - alertmanager_ip:9093# 文件发现的方式 #- file_sd_configs:# - files:# - "targets/alertmanager-*.yaml"# 加载rules并周期性评估,可以使用相对路径。rule分为:记录规则和告警规则rule_files:# - "first_rules.yml"# - "second_rules.yml"# A scrape configuration containing exactly one endpoint to scrape:# Here it's Prometheus itself.scrape_configs:- job_name:'prometheus'###这个必须配置,这个地址抓取的所有数据会自动加上`job=prometheus`的标签# metrics_path defaults to '/metrics' #抓取监控目标的路径,默认是`/metrics`,可以根据自己业务的需要进行修改# scheme defaults to 'http'.static_configs:#这是通过静态文件的配置方法:这种方法直接指定要抓去目标的ip和端口- targets:['localhost:9090']#抓取的所有数据会自动加上`instance=${TARGET}`的标签- job_name:'node'static_configs:- targets:['192.168.5.10:9100','192.168.5.11:9100','192.168.5.12:9100']lables:app:nodeparams:collect[]:- cpu- meminfo- diskstats- netdev- netstat- filefd- filesystem- xfs- systemd- job_name:'docker'static_configs:- targets:['192.168.5.10:8080','192.168.5.11:8080','192.168.5.12:8080']metric_relabel_configs:# 删除label- source_labels:'[__name__]'#逗号分隔多个separator:','# 默认为分号regex:'(container_tasks_state|container_memory_failures_total)'action:drop#默认值为replace# 替换标签- source_labels:[id]target_label:container_idregex:'/kubepods/([a-z0-9]+)'replacement:'$1'action:replace# 删除标签- regex:'kernelVersion'action:labeldrop
groups:- name:主机状态rules:# Alert for any instance that is unreachable for >5 minutes.- alert:InstanceDownexpr:up == 0for:5mlabels:severity:pageannotations:summary:"Instance {{ $labels.instance }} down"description:"{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."# Alert for any instance that has a median request latency >1s.- alert:APIHighRequestLatencyexpr:api_http_request_latencies_second{quantile="0.5"} > 1for:10mannotations:summary:"High request latency on {{ $labels.instance }}"description:"{{ $labels.instance }} has a median request latency above 1s (current value: {{ $value }}s)"
# The job name assigned to scraped metrics by default.# 抓取任务名称,同时会在对应抓取的指标中加了一个 label(job=job_name)job_name:<job_name># How frequently to scrape targets from this job.# 抓取任务时间间隔[ scrape_interval:<duration> | default = <global_config.scrape_interval> ]# Per-scrape timeout when scraping this job.# 抓取请求超时时间[ scrape_timeout:<duration> | default = <global_config.scrape_timeout> ]# The HTTP resource path on which to fetch metrics from targets.# 抓取任务请求 URI 路径[ metrics_path:<path> | default = /metrics ]# Optional HTTP URL parameters.params:[ <string>:[<string>, ...] ]# List of labeled statically configured targets for this job.static_configs:[- <static_config> ... ]# List of file service discovery configurations.file_sd_configs:[- <file_sd_config> ... ]# List of Kubernetes service discovery configurations.kubernetes_sd_configs:[- <kubernetes_sd_config> ... ]# List of target relabel configurations.relabel_configs:[- <relabel_config> ... ]# List of metric relabel configurations.metric_relabel_configs:[- <relabel_config> ... ]
# The targets specified by the static config.targets:[- '<host>']# Labels assigned to all metrics scraped from the targets. 给抓取的metric进行打标签labels:[ <labelname>:<labelvalue> ... ]
# my global config# Author: MageEdu <mage@magedu.com># Repo: http://gitlab.magedu.com/MageEdu/prometheus-configs/global:scrape_interval:15s# Set the scrape interval to every 15 seconds. Default is every 1 minute.evaluation_interval:15s# Evaluate rules every 15 seconds. The default is every 1 minute.# scrape_timeout is set to the global default (10s).# Alertmanager configurationalerting:alertmanagers:- static_configs:- targets:# - alertmanager:9093# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.rule_files:# - "first_rules.yml"# - "second_rules.yml"# A scrape configuration containing exactly one endpoint to scrape:# Here it's Prometheus itself.scrape_configs:# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.- job_name:'prometheus'# metrics_path defaults to '/metrics'# scheme defaults to 'http'.# 基于文件的服务发现file_sd_configs:- files:# 指定要加载的文件列表 - targets/prometheus-*.yaml # 文件加载支持glob通配符refresh_interval:2m # 每隔2min重新加载一次文件中定义的target,默认为5min# All nodes- job_name:'nodes'file_sd_configs:- files:- targets/nodes-*.yaml refresh_interval:2m
# A list of DNS domain names to be queried. 此处要指定SRV资源记录的名称(SRV解析端口号),例如'_prometheus._tcp.ljzsdut.com'names:[- <domain_name> ]# The type of DNS query to perform. 取值:SRV, A, AAAA.[ type:<query_type> | default = 'SRV' ]# The port number used if the query type is not SRV.[ port:<number>]# The time after which the provided names are refreshed.[ refresh_interval:<duration> | default = 30s ]
---kind:ConfigMapapiVersion:v1metadata:labels:app:prometheusname:prometheus-confignamespace:promdata:prometheus.yml:| # A scrape configuration for running Prometheus on a Kubernetes cluster.
# This uses separate scrape configs for cluster components (i.e. API server, node)
# and services to allow each to use different authentication configs.
#
# Kubernetes labels will be added as Prometheus labels on metrics via the
# `labelmap` relabeling action.
#
# If you are using Kubernetes 1.7.2 or earlier, please take note of the comments
# for the kubernetes-cadvisor job; you will need to edit or remove this job.
# Scrape config for API servers.
#
# Kubernetes exposes API servers as endpoints to the default/kubernetes
# service so this uses `endpoints` role and uses relabelling to only keep
# the endpoints associated with the default/kubernetes service using the
# default named port `https`. This works for single API server deployments as
# well as HA API server deployments.
global:
scrape_interval: 15s
scrape_timeout: 10s
evaluation_interval: 1m
scrape_configs:
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
# If your node certificates are self-signed or use a different CA to the
# master CA, then disable certificate verification below. Note that
# certificate verification is an integral part of a secure infrastructure
# so this should only be disabled in a controlled environment. You can
# disable certificate verification by uncommenting the line below.
#
# insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
# Keep only the default/kubernetes service endpoints for the https port. This
# will add targets for each API server which Kubernetes adds an endpoint to
# the default/kubernetes service.
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
# Scrape config for nodes (kubelet).
#
# Rather than connecting directly to the node, the scrape is proxied though the
# Kubernetes apiserver. This means it will work if Prometheus is running out of
# cluster, or can't connect to nodes for some other reason (e.g. because of
# firewalling).
- job_name: 'kubernetes-nodes'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics
# Scrape config for Kubelet cAdvisor.
#
# This is required for Kubernetes 1.7.3 and later, where cAdvisor metrics
# (those whose names begin with 'container_') have been removed from the
# Kubelet metrics endpoint. This job scrapes the cAdvisor endpoint to
# retrieve those metrics.
#
# In Kubernetes 1.7.0-1.7.2, these metrics are only exposed on the cAdvisor
# HTTP endpoint; use "replacement: /api/v1/nodes/${1}:4194/proxy/metrics"
# in that case (and ensure cAdvisor's HTTP server hasn't been disabled with
# the --cadvisor-port=0 Kubelet flag).
#
# This job is not necessary and should be removed in Kubernetes 1.6 and
# earlier versions, or it will cause the metrics to be scraped twice.
- job_name: 'kubernetes-cadvisor'
# Default to scraping over https. If required, just disable this or change to
# `http`.
scheme: https
# This TLS & bearer token file config is used to connect to the actual scrape
# endpoints for cluster components. This is separate to discovery auth
# configuration because discovery & scraping are two separate concerns in
# Prometheus. The discovery auth config is automatic if Prometheus runs inside
# the cluster. Otherwise, more config options have to be provided within the
# <kubernetes_sd_config>.
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
# Scrape config for service endpoints.
#
# The relabeling allows the actual service scrape endpoint to be configured
# via the following annotations:
#
# * `prometheus.io/scrape`: Only scrape services that have a value of `true`
# * `prometheus.io/scheme`: If the metrics endpoint is secured then you will need
# to set this to `https` & most likely set the `tls_config` of the scrape config.
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: If the metrics are exposed on a different port to the
# service then set this appropriately.
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
# Example scrape config for pods
#
# The relabeling allows the actual pod scrape endpoint to be configured via the
# following annotations:
#
# * `prometheus.io/scrape`: Only scrape pods that have a value of `true`
# * `prometheus.io/path`: If the metrics path is not `/metrics` override this.
# * `prometheus.io/port`: Scrape the pod on the indicated port instead of the
# pod's declared ports (default is a port-free target if none are declared).
- job_name: 'kubernetes-pods'
# if you want to use metrics on jobs, set the below field to
# true to prevent Prometheus from setting the `job` label
# automatically.
honor_labels: false
kubernetes_sd_configs:
- role: pod
# skip verification so you can do HTTPS to pods
tls_config:
insecure_skip_verify: true
# make sure your labels are in order
relabel_configs:
# these labels tell Prometheus to automatically attach source
# pod and namespace information to each collected sample, so
# that they'll be exposed in the custom metrics API automatically.
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod
# these labels tell Prometheus to look for
# prometheus.io/{scrape,path,port} annotations to configure
# how to scrape
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (.+)
scrape_configs:# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.- job_name:'prometheus'# metrics_path defaults to '/metrics'# scheme defaults to 'http'.static_configs:- targets:['localhost:9090']- job_name:"node"file_sd_configs:- refresh_interval:1mfiles:- "/usr/local/prometheus/prometheus/conf/node*.yml"[root@node00 prometheus]# cat conf/node-dis.yml - targets:- "192.168.100.10:20001"labels:__hostname__:node00__businees_line__:"line_a"__region_id__:"cn-beijing"__availability_zone__:"a"- targets:- "192.168.100.11:20001"labels:__hostname__:node01__businees_line__:"line_a"__region_id__:"cn-beijing"__availability_zone__:"a"- targets:- "192.168.100.12:20001"labels:__hostname__:node02__businees_line__:"line_c"__region_id__:"cn-beijing"__availability_zone__:"b"
此时如果查看target信息,如下图。
因为我们的label都是以__开头的,目标重新标签之后,以__开头的标签将从标签集中删除的。
replace示例1
将labels中的__hostname__替换为node_name。
scrape_configs:# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.- job_name:'prometheus'# metrics_path defaults to '/metrics'# scheme defaults to 'http'.static_configs:- targets:['localhost:9090']- job_name:"node"file_sd_configs:- refresh_interval:1mfiles:- "/usr/local/prometheus/prometheus/conf/node*.yml"relabel_configs:- source_labels:- "__hostname__"regex:"(.*)"target_label:"nodename"action:replacereplacement:"$1"
scrape_configs:# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.- job_name:'prometheus'# metrics_path defaults to '/metrics'# scheme defaults to 'http'.static_configs:- targets:['localhost:9090']- job_name:"node"file_sd_configs:- refresh_interval:1mfiles:- "/usr/local/prometheus/prometheus/conf/node*.yml"relabel_configs:- source_labels:- "__region_id__"- "__availability_zone__"separator:"-"regex:"(.*)"target_label:"region_zone"action:replacereplacement:"$1"
target如下图:
keep示例:过滤Target实例
修改配置文件
scrape_configs:# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.- job_name:'prometheus'# metrics_path defaults to '/metrics'# scheme defaults to 'http'.static_configs:- targets:['localhost:9090']- job_name:"node"file_sd_configs:- refresh_interval:1mfiles:- "/usr/local/prometheus/prometheus/conf/node*.yml"
target如下图
修改配置文件如下
scrape_configs:# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.- job_name:'prometheus'# metrics_path defaults to '/metrics'# scheme defaults to 'http'.static_configs:- targets:['localhost:9090']- job_name:"node"file_sd_configs:- refresh_interval:1mfiles:- "/usr/local/prometheus/prometheus/conf/node*.yml"relabel_configs:- source_labels:- "__hostname__"regex:"node00"action:keep
scrape_configs:# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.- job_name:'prometheus'# metrics_path defaults to '/metrics'# scheme defaults to 'http'.static_configs:- targets:['localhost:9090']- job_name:"node"file_sd_configs:- refresh_interval:1mfiles:- "/usr/local/prometheus/prometheus/conf/node*.yml"relabel_configs:- source_labels:- "__hostname__"regex:"(.*)"target_label:"nodename"action:replacereplacement:"$1"- source_labels:- "__businees_line__"regex:"(.*)"target_label:"businees_line"action:replacereplacement:"$1"- source_labels:- "__datacenter__"regex:"(.*)"target_label:"datacenter"action:replacereplacement:"$1"
target如下图
修改配置文件如下
scrape_configs:# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.- job_name:'prometheus'# metrics_path defaults to '/metrics'# scheme defaults to 'http'.static_configs:- targets:['localhost:9090']- job_name:"node"file_sd_configs:- refresh_interval:1mfiles:- "/usr/local/prometheus/prometheus/conf/node*.yml"relabel_configs:- source_labels:- "__hostname__"regex:"(.*)"target_label:"nodename"action:replacereplacement:"$1"- source_labels:- "__businees_line__"regex:"(.*)"target_label:"businees_line"action:replacereplacement:"$1"- source_labels:- "__datacenter__"regex:"(.*)"target_label:"datacenter"action:replacereplacement:"$1"- regex:"(nodename|datacenter)"action:labeldrop
scrape_configs:- job_name:my_job# Service discovery etc. goes here.relabel_configs:#prometheus从特定targets随机抓取数据量的10%- source_labels:[__address__]modulus:10target_label:__tmp_hashaction:hashmod- source_labels:[__tmp_hash]regex:0#指定抓取的目标action:keep