90 lines
2.1 KiB
YAML
90 lines
2.1 KiB
YAML
image:
|
|
repository: utkuozdemir/nvidia_gpu_exporter
|
|
pullPolicy: IfNotPresent
|
|
tag: 1.2.0@sha256:cc407f77ab017101ce233a0185875ebc75d2a0911381741b20ad91f695e488c7
|
|
securityContext:
|
|
container:
|
|
privileged: true
|
|
readOnlyRootFilesystem: false
|
|
runAsUser: 0
|
|
runAsGroup: 0
|
|
service:
|
|
main:
|
|
ports:
|
|
main:
|
|
protocol: http
|
|
port: 9835
|
|
workload:
|
|
main:
|
|
type: DaemonSet
|
|
podSpec:
|
|
containers:
|
|
main:
|
|
args:
|
|
- --web.listen-address
|
|
- :{{ .Values.service.main.ports.main.port }}
|
|
- --web.telemetry-path
|
|
- "{{ .Values.metricsEndpoint }}"
|
|
- --nvidia-smi-command
|
|
- nvidia-smi
|
|
- --log.level
|
|
- "{{ .Values.logs.general.level }}"
|
|
- --log.format
|
|
- "{{ .Values.logs.general.format }}"
|
|
probes:
|
|
liveness:
|
|
path: "{{ .Values.metricsEndpoint }}"
|
|
port: main
|
|
readiness:
|
|
path: "{{ .Values.metricsEndpoint }}"
|
|
port: main
|
|
startup:
|
|
type: tcp
|
|
port: main
|
|
persistence:
|
|
nviaictl:
|
|
enabled: true
|
|
type: hostPath
|
|
hostPath: /dev/nvidiactl
|
|
mountPath: /dev/nvidiactl
|
|
readOnly: true
|
|
nvidia0:
|
|
enabled: true
|
|
type: hostPath
|
|
hostPath: /dev/nvidia0
|
|
mountPath: /dev/nvidia0
|
|
readOnly: true
|
|
nvidiasmi:
|
|
enabled: true
|
|
type: hostPath
|
|
hostPath: /usr/bin/nvidia-smi
|
|
mountPath: /usr/bin/nvidia-smi
|
|
readOnly: true
|
|
libnvidiamlso:
|
|
enabled: true
|
|
type: hostPath
|
|
hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-ml.so
|
|
mountPath: /usr/lib/x86_64-linux-gnu/libnvidia-ml.so
|
|
readOnly: true
|
|
libnvidiamlso1:
|
|
enabled: true
|
|
type: hostPath
|
|
hostPath: /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1
|
|
mountPath: /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1
|
|
readOnly: true
|
|
metrics:
|
|
main:
|
|
enabled: true
|
|
type: "servicemonitor"
|
|
endpoints:
|
|
- port: main
|
|
path: "{{ .Values.metricsEndpoint }}"
|
|
portal:
|
|
open:
|
|
enabled: false
|
|
metricsEndpoint: "/metrics"
|
|
logs:
|
|
general:
|
|
level: info
|
|
format: logfmt
|