Skip to content

Commit a1d89e1

Browse files
committed
fix
1 parent 23c002a commit a1d89e1

File tree

4 files changed

+127
-50
lines changed

4 files changed

+127
-50
lines changed

packages/nvidia_gpu/data_stream/stats/elasticsearch/ingest_pipeline/default.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ processors:
2727
ignore_missing: true
2828
- rename:
2929
field: prometheus.labels.instance
30-
target_field: gpu.labels.instance
30+
target_field: server.address
3131
ignore_missing: true
3232
- rename:
3333
field: prometheus.labels.job

packages/nvidia_gpu/data_stream/stats/fields/fields.yml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,6 @@
2323
type: keyword
2424
description: >
2525
Nvidia GPU
26-
- name: instance
27-
type: keyword
28-
description: >
29-
Nvidia GPU instance name
3026
- name: job
3127
type: keyword
3228
description: >
@@ -50,7 +46,7 @@
5046
- name: uuid
5147
type: keyword
5248
description: |
53-
Nvidia GPU uuid
49+
Nvidia GPU UUID
5450
dimension: true
5551
- name: clock
5652
type: group

packages/nvidia_gpu/data_stream/stats/sample_event.json

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,90 @@
11
{
2-
"@timestamp": "2025-06-19T11:01:58.508Z",
2+
"@timestamp": "2025-06-23T12:25:23.237Z",
33
"agent": {
4-
"ephemeral_id": "5da1c762-479c-4a72-8abb-960a3dcbeae3",
5-
"id": "6682006f-8763-47e4-af1d-37a1fb49ce4c",
6-
"name": "elastic-agent-62642",
4+
"ephemeral_id": "32e01a49-8a7c-4ab3-a27c-10f28999caf7",
5+
"id": "d2565d99-5e10-498c-a669-34c328de3746",
6+
"name": "docker-fleet-agent",
77
"type": "metricbeat",
88
"version": "8.17.0"
99
},
1010
"data_stream": {
1111
"dataset": "nvidia_gpu.stats",
12-
"namespace": "85686",
12+
"namespace": "default",
1313
"type": "metrics"
1414
},
1515
"ecs": {
1616
"version": "8.17.0"
1717
},
1818
"elastic_agent": {
19-
"id": "6682006f-8763-47e4-af1d-37a1fb49ce4c",
19+
"id": "d2565d99-5e10-498c-a669-34c328de3746",
2020
"snapshot": false,
2121
"version": "8.17.0"
2222
},
2323
"event": {
2424
"agent_id_status": "verified",
2525
"dataset": "nvidia_gpu.stats",
26-
"duration": 18410680,
27-
"ingested": "2025-06-19T11:02:01Z",
26+
"duration": 84026367,
27+
"ingested": "2025-06-23T12:25:24Z",
2828
"module": "prometheus"
2929
},
3030
"gpu": {
31+
"clock": {
32+
"mem_frequency": 405,
33+
"streaming_multiprocessor_frequency": 300
34+
},
3135
"labels": {
32-
"instance": "svc-nvidia_gpu:9400",
33-
"job": "prometheus"
36+
"device": "nvidia0",
37+
"driver_version": "525.105.17",
38+
"gpu": "0",
39+
"hostname": "924e17218b6f",
40+
"job": "prometheus",
41+
"model_name": "Tesla T4",
42+
"pci_bus_id": "00000000:00:04.0",
43+
"uuid": "GPU-2492e3fa-2252-1730-0d1a-8d12ab32cdf0"
44+
},
45+
"license_vgpu_status": 0,
46+
"memory": {
47+
"framebuffer": {
48+
"free_size": 14923,
49+
"used_size": 5
50+
}
51+
},
52+
"nvlink": {
53+
"bandwidth_total": 0
54+
},
55+
"pcie": {
56+
"replay": 0
3457
},
35-
"up": "1"
58+
"power": {
59+
"energy_consumption_total": 27649212030,
60+
"usage": 12.239
61+
},
62+
"temperature": {
63+
"gpu": 36,
64+
"memory": 0
65+
},
66+
"utilization": {
67+
"decoder": {
68+
"pct": 0
69+
},
70+
"encoder": {
71+
"pct": 0
72+
},
73+
"gpu": {
74+
"pct": 0
75+
},
76+
"memory_copy": {
77+
"pct": 0
78+
}
79+
}
3680
},
3781
"host": {
3882
"architecture": "x86_64",
3983
"containerized": true,
40-
"hostname": "elastic-agent-62642",
41-
"ip": [
42-
"172.18.0.6",
43-
"172.25.0.2"
44-
],
45-
"mac": [
46-
"1A-E2-5B-13-20-FD",
47-
"E2-78-59-F3-DA-3E"
48-
],
49-
"name": "elastic-agent-62642",
84+
"hostname": "docker-fleet-agent",
85+
"ip": "172.18.0.8",
86+
"mac": "22-18-A8-89-5C-AE",
87+
"name": "docker-fleet-agent",
5088
"os": {
5189
"family": "",
5290
"kernel": "5.15.153.1-microsoft-standard-WSL2",
@@ -60,6 +98,9 @@
6098
"name": "collector",
6199
"period": 10000
62100
},
101+
"server": {
102+
"address": "svc-nvidia_gpu:9400"
103+
},
63104
"service": {
64105
"address": "http://svc-nvidia_gpu:9400/metrics",
65106
"type": "prometheus"

packages/nvidia_gpu/docs/README.md

Lines changed: 63 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -56,54 +56,92 @@ An example event for `stats` looks as following:
5656

5757
```json
5858
{
59-
"@timestamp": "2025-06-19T11:01:58.508Z",
59+
"@timestamp": "2025-06-23T12:25:23.237Z",
6060
"agent": {
61-
"ephemeral_id": "5da1c762-479c-4a72-8abb-960a3dcbeae3",
62-
"id": "6682006f-8763-47e4-af1d-37a1fb49ce4c",
63-
"name": "elastic-agent-62642",
61+
"ephemeral_id": "32e01a49-8a7c-4ab3-a27c-10f28999caf7",
62+
"id": "d2565d99-5e10-498c-a669-34c328de3746",
63+
"name": "docker-fleet-agent",
6464
"type": "metricbeat",
6565
"version": "8.17.0"
6666
},
6767
"data_stream": {
6868
"dataset": "nvidia_gpu.stats",
69-
"namespace": "85686",
69+
"namespace": "default",
7070
"type": "metrics"
7171
},
7272
"ecs": {
7373
"version": "8.17.0"
7474
},
7575
"elastic_agent": {
76-
"id": "6682006f-8763-47e4-af1d-37a1fb49ce4c",
76+
"id": "d2565d99-5e10-498c-a669-34c328de3746",
7777
"snapshot": false,
7878
"version": "8.17.0"
7979
},
8080
"event": {
8181
"agent_id_status": "verified",
8282
"dataset": "nvidia_gpu.stats",
83-
"duration": 18410680,
84-
"ingested": "2025-06-19T11:02:01Z",
83+
"duration": 84026367,
84+
"ingested": "2025-06-23T12:25:24Z",
8585
"module": "prometheus"
8686
},
8787
"gpu": {
88+
"clock": {
89+
"mem_frequency": 405,
90+
"streaming_multiprocessor_frequency": 300
91+
},
8892
"labels": {
89-
"instance": "svc-nvidia_gpu:9400",
90-
"job": "prometheus"
93+
"device": "nvidia0",
94+
"driver_version": "525.105.17",
95+
"gpu": "0",
96+
"hostname": "924e17218b6f",
97+
"job": "prometheus",
98+
"model_name": "Tesla T4",
99+
"pci_bus_id": "00000000:00:04.0",
100+
"uuid": "GPU-2492e3fa-2252-1730-0d1a-8d12ab32cdf0"
101+
},
102+
"license_vgpu_status": 0,
103+
"memory": {
104+
"framebuffer": {
105+
"free_size": 14923,
106+
"used_size": 5
107+
}
108+
},
109+
"nvlink": {
110+
"bandwidth_total": 0
111+
},
112+
"pcie": {
113+
"replay": 0
91114
},
92-
"up": "1"
115+
"power": {
116+
"energy_consumption_total": 27649212030,
117+
"usage": 12.239
118+
},
119+
"temperature": {
120+
"gpu": 36,
121+
"memory": 0
122+
},
123+
"utilization": {
124+
"decoder": {
125+
"pct": 0
126+
},
127+
"encoder": {
128+
"pct": 0
129+
},
130+
"gpu": {
131+
"pct": 0
132+
},
133+
"memory_copy": {
134+
"pct": 0
135+
}
136+
}
93137
},
94138
"host": {
95139
"architecture": "x86_64",
96140
"containerized": true,
97-
"hostname": "elastic-agent-62642",
98-
"ip": [
99-
"172.18.0.6",
100-
"172.25.0.2"
101-
],
102-
"mac": [
103-
"1A-E2-5B-13-20-FD",
104-
"E2-78-59-F3-DA-3E"
105-
],
106-
"name": "elastic-agent-62642",
141+
"hostname": "docker-fleet-agent",
142+
"ip": "172.18.0.8",
143+
"mac": "22-18-A8-89-5C-AE",
144+
"name": "docker-fleet-agent",
107145
"os": {
108146
"family": "",
109147
"kernel": "5.15.153.1-microsoft-standard-WSL2",
@@ -117,6 +155,9 @@ An example event for `stats` looks as following:
117155
"name": "collector",
118156
"period": 10000
119157
},
158+
"server": {
159+
"address": "svc-nvidia_gpu:9400"
160+
},
120161
"service": {
121162
"address": "http://svc-nvidia_gpu:9400/metrics",
122163
"type": "prometheus"
@@ -158,11 +199,10 @@ An example event for `stats` looks as following:
158199
| gpu.labels.err_msg | Nvidia GPU error message | keyword | | |
159200
| gpu.labels.gpu | Nvidia GPU | keyword | | |
160201
| gpu.labels.hostname | Nvidia GPU hostname | keyword | | |
161-
| gpu.labels.instance | Nvidia GPU instance name | keyword | | |
162202
| gpu.labels.job | Nvidia GPU job | keyword | | |
163203
| gpu.labels.model_name | Nvidia GPU model name | keyword | | |
164204
| gpu.labels.pci_bus_id | Nvidia GPU pci bus id | keyword | | |
165-
| gpu.labels.uuid | Nvidia GPU uuid | keyword | | |
205+
| gpu.labels.uuid | Nvidia GPU UUID | keyword | | |
166206
| gpu.license_vgpu_status | vGPU License status. | long | | gauge |
167207
| gpu.memory.framebuffer.free_size | Free size of the framebuffer (in MiB). | float | | gauge |
168208
| gpu.memory.framebuffer.used_size | Used size of the framebuffer (in MiB). | float | | gauge |

0 commit comments

Comments
 (0)