4-Prometheus配置文件

prometheus

Prometheus配置文件

在前面章节Prometheus安装在/usr/local/prometheus中,Prometheus的配置文件为prometheus.yml。
Prometheus启动是可以使用参数 -config.file 指定配置文件。

配置文件主体

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# 此片段指定的是prometheus的全局配置, 比如采集间隔,抓取超时时间等.
global:
# 抓取时间间隔,
[ scrape_interval: <duration> | default = 1m ]

# 抓取超时时间
[ scrape_timeout: <duration> | default = 10s ]

# 执行判断 rules 的时间间隔
[ evaluation_interval: <duration> | default = 1m ]

# 外部一些标签设置
external_labels:
[ <labelname>: <labelvalue> ... ]

# 记录 PromQL 查询的文件。
# 重新加载配置将重新打开文件。
[ query_log_file: <string> ]

# 此片段指定报警规则文件, prometheus根据这些规则信息,会推送报警信息到alertmanager中。
rule_files:
[ - <filepath_glob> ... ]

# 此片段指定抓取配置,prometheus的数据采集通过此片段配置。
scrape_configs:
[ - <scrape_config> ... ]

# 此片段指定报警配置, 这里主要是指定prometheus将报警规则推送到指定的alertmanager实例地址。
alerting:
alert_relabel_configs:
[ - <relabel_config> ... ]
alertmanagers:
[ - <alertmanager_config> ... ]

# 指定后端的存储的写入api地址。
remote_write:
[ - <remote_write> ... ]

# 指定后端的存储的读取api地址。
remote_read:
[ - <remote_read> ... ]

scrape_configs 配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
scrape_configs:
# 抓取指标的作业名称
job_name: <job_name>
# 抓取间隔,默认继承global值。
[ scrape_interval: <duration> | default = <global_config.scrape_interval> ]
# 抓取超时时间,默认继承global值。
[ scrape_timeout: <duration> | default = <global_config.scrape_timeout> ]
# 抓取路径, 默认是/metrics
[ metrics_path: <path> | default = /metrics ]
# 指定采集使用的协议,http或者https。
[ scheme: <scheme> | default = http ]
# 指定url参数。
params:
[ <string>: [<string>, ...] ]
# 指定认证信息。
basic_auth:
[ username: <string> ]
[ password: <secret> ]
[ password_file: <string> ]
# 指定token的数值, 用户get metrics认证使用
[ bearer_token: <secret> ]
# 指定获取token的文件, 用户get metrics认证使用
[ bearer_token_file: /path/to/bearer/token/file ]
# 指定获取metrics时需要的tls证书
tls_config:
[ <tls_config> ]
# 指定静态配置
static_configs:
[ - targets:
[ - '<host>' ]... ]
# Consul服务发现配置列表,_sd_即为service discovery 服务发现
consul_sd_configs:
[ - <consul_sd_config> ... ]

Prometheus官网示例配置文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
# my global config
global:
scrape_interval: 15s
evaluation_interval: 30s
# scrape_timeout is set to the global default (10s).

external_labels:
monitor: codelab
foo: bar

rule_files:
- "first.rules"
- "my/*.rules"

remote_write:
- url: http://remote1/push
name: drop_expensive
write_relabel_configs:
- source_labels: [__name__]
regex: expensive.*
action: drop
oauth2:
client_id: "123"
client_secret: "456"
token_url: "http://remote1/auth"
tls_config:
cert_file: valid_cert_file
key_file: valid_key_file

- url: http://remote2/push
name: rw_tls
tls_config:
cert_file: valid_cert_file
key_file: valid_key_file
headers:
name: value

remote_read:
- url: http://remote1/read
read_recent: true
name: default
enable_http2: false
- url: http://remote3/read
read_recent: false
name: read_special
required_matchers:
job: special
tls_config:
cert_file: valid_cert_file
key_file: valid_key_file

scrape_configs:
- job_name: prometheus

honor_labels: true
# scrape_interval is defined by the configured global (15s).
# scrape_timeout is defined by the global default (10s).

# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.

file_sd_configs:
- files:
- foo/*.slow.json
- foo/*.slow.yml
- single/file.yml
refresh_interval: 10m
- files:
- bar/*.yaml

static_configs:
- targets: ["localhost:9090", "localhost:9191"]
labels:
my: label
your: label

relabel_configs:
- source_labels: [job, __meta_dns_name]
regex: (.*)some-[regex]
target_label: job
replacement: foo-${1}
# action defaults to 'replace'
- source_labels: [abc]
target_label: cde
- replacement: static
target_label: abc
- regex:
replacement: static
target_label: abc

authorization:
credentials_file: valid_token_file

tls_config:
min_version: TLS10

- job_name: service-x

basic_auth:
username: admin_name
password: "multiline\nmysecret\ntest"

scrape_interval: 50s
scrape_timeout: 5s

body_size_limit: 10MB
sample_limit: 1000

metrics_path: /my_path
scheme: https

dns_sd_configs:
- refresh_interval: 15s
names:
- first.dns.address.domain.com
- second.dns.address.domain.com
- names:
- first.dns.address.domain.com

relabel_configs:
- source_labels: [job]
regex: (.*)some-[regex]
action: drop
- source_labels: [__address__]
modulus: 8
target_label: __tmp_hash
action: hashmod
- source_labels: [__tmp_hash]
regex: 1
action: keep
- action: labelmap
regex: 1
- action: labeldrop
regex: d
- action: labelkeep
regex: k

metric_relabel_configs:
- source_labels: [__name__]
regex: expensive_metric.*
action: drop

- job_name: service-y

consul_sd_configs:
- server: "localhost:1234"
token: mysecret
services: ["nginx", "cache", "mysql"]
tags: ["canary", "v1"]
node_meta:
rack: "123"
allow_stale: true
scheme: https
tls_config:
ca_file: valid_ca_file
cert_file: valid_cert_file
key_file: valid_key_file
insecure_skip_verify: false

relabel_configs:
- source_labels: [__meta_sd_consul_tags]
separator: ","
regex: label:([^=]+)=([^,]+)
target_label: ${1}
replacement: ${2}

- job_name: service-z

tls_config:
cert_file: valid_cert_file
key_file: valid_key_file

authorization:
credentials: mysecret

- job_name: service-kubernetes

kubernetes_sd_configs:
- role: endpoints
api_server: "https://localhost:1234"
tls_config:
cert_file: valid_cert_file
key_file: valid_key_file

basic_auth:
username: "myusername"
password: "mysecret"

- job_name: service-kubernetes-namespaces

kubernetes_sd_configs:
- role: endpoints
api_server: "https://localhost:1234"
namespaces:
names:
- default

basic_auth:
username: "myusername"
password_file: valid_password_file

- job_name: service-kuma

kuma_sd_configs:
- server: http://kuma-control-plane.kuma-system.svc:5676

- job_name: service-marathon
marathon_sd_configs:
- servers:
- "https://marathon.example.com:443"

auth_token: "mysecret"
tls_config:
cert_file: valid_cert_file
key_file: valid_key_file

- job_name: service-nomad
nomad_sd_configs:
- server: 'http://localhost:4646'

- job_name: service-ec2
ec2_sd_configs:
- region: us-east-1
access_key: access
secret_key: mysecret
profile: profile
filters:
- name: tag:environment
values:
- prod

- name: tag:service
values:
- web
- db

- job_name: service-lightsail
lightsail_sd_configs:
- region: us-east-1
access_key: access
secret_key: mysecret
profile: profile

- job_name: service-azure
azure_sd_configs:
- environment: AzurePublicCloud
authentication_method: OAuth
subscription_id: 11AAAA11-A11A-111A-A111-1111A1111A11
resource_group: my-resource-group
tenant_id: BBBB222B-B2B2-2B22-B222-2BB2222BB2B2
client_id: 333333CC-3C33-3333-CCC3-33C3CCCCC33C
client_secret: mysecret
port: 9100

- job_name: service-nerve
nerve_sd_configs:
- servers:
- localhost
paths:
- /monitoring

- job_name: 0123service-xxx
metrics_path: /metrics
static_configs:
- targets:
- localhost:9090

- job_name: badfederation
honor_timestamps: false
metrics_path: /federate
static_configs:
- targets:
- localhost:9090

- job_name: 測試
metrics_path: /metrics
static_configs:
- targets:
- localhost:9090

- job_name: httpsd
http_sd_configs:
- url: "http://example.com/prometheus"

- job_name: service-triton
triton_sd_configs:
- account: "testAccount"
dns_suffix: "triton.example.com"
endpoint: "triton.example.com"
port: 9163
refresh_interval: 1m
version: 1
tls_config:
cert_file: valid_cert_file
key_file: valid_key_file

- job_name: digitalocean-droplets
digitalocean_sd_configs:
- authorization:
credentials: abcdef

- job_name: docker
docker_sd_configs:
- host: unix:///var/run/docker.sock

- job_name: dockerswarm
dockerswarm_sd_configs:
- host: http://127.0.0.1:2375
role: nodes

- job_name: service-openstack
openstack_sd_configs:
- role: instance
region: RegionOne
port: 80
refresh_interval: 1m
tls_config:
ca_file: valid_ca_file
cert_file: valid_cert_file
key_file: valid_key_file

- job_name: service-puppetdb
puppetdb_sd_configs:
- url: https://puppetserver/
query: 'resources { type = "Package" and title = "httpd" }'
include_parameters: true
port: 80
refresh_interval: 1m
tls_config:
ca_file: valid_ca_file
cert_file: valid_cert_file
key_file: valid_key_file

- job_name: hetzner
relabel_configs:
- action: uppercase
source_labels: [instance]
target_label: instance
hetzner_sd_configs:
- role: hcloud
authorization:
credentials: abcdef
- role: robot
basic_auth:
username: abcdef
password: abcdef

- job_name: service-eureka
eureka_sd_configs:
- server: "http://eureka.example.com:8761/eureka"

- job_name: scaleway
scaleway_sd_configs:
- role: instance
project_id: 11111111-1111-1111-1111-111111111112
access_key: SCWXXXXXXXXXXXXXXXXX
secret_key: 11111111-1111-1111-1111-111111111111
- role: baremetal
project_id: 11111111-1111-1111-1111-111111111112
access_key: SCWXXXXXXXXXXXXXXXXX
secret_key: 11111111-1111-1111-1111-111111111111

- job_name: linode-instances
linode_sd_configs:
- authorization:
credentials: abcdef

- job_name: uyuni
uyuni_sd_configs:
- server: https://localhost:1234
username: gopher
password: hole

- job_name: ionos
ionos_sd_configs:
- datacenter_id: 8feda53f-15f0-447f-badf-ebe32dad2fc0
authorization:
credentials: abcdef

- job_name: vultr
vultr_sd_configs:
- authorization:
credentials: abcdef

alerting:
alertmanagers:
- scheme: https
static_configs:
- targets:
- "1.2.3.4:9093"
- "1.2.3.5:9093"
- "1.2.3.6:9093"

storage:
tsdb:
out_of_order_time_window: 30m

tracing:
endpoint: "localhost:4317"
client_type: "grpc"
headers:
foo: "bar"
timeout: 5s
compression: "gzip"
tls_config:
cert_file: valid_cert_file
key_file: valid_key_file
insecure_skip_verify: true