# Apply: helm upgrade --install kube-prometheus-stack prometheus-community/kube-prometheus-stack -f values/kube-prometheus-stack.yaml -n monitoring --create-namespace # Description: Helm values for Prometheus, Grafana, and Alertmanager monitoring stack grafana: admin: existingSecret: grafana-admin-secret userKey: admin-user passwordKey: admin-password "grafana.ini": server: root_url: https://grafana.nik4nao.com auth.generic_oauth: enabled: true name: Authentik allow_sign_up: true client_id: $__file{/etc/secrets/authentik-grafana-oauth/client-id} client_secret: $__file{/etc/secrets/authentik-grafana-oauth/client-secret} scopes: openid email profile auth_url: https://auth.nik4nao.com/application/o/authorize/ token_url: https://auth.nik4nao.com/application/o/token/ api_url: https://auth.nik4nao.com/application/o/userinfo/ role_attribute_path: contains(groups, 'authentik Admins') && 'Admin' || 'Viewer' auth: disable_login_form: true auth.basic: enabled: false ingress: enabled: true ingressClassName: traefik annotations: traefik.ingress.kubernetes.io/router.entrypoints: websecure traefik.ingress.kubernetes.io/router.tls: "true" cert-manager.io/cluster-issuer: letsencrypt-prod hosts: - grafana.nik4nao.com tls: - secretName: grafana-tls hosts: - grafana.nik4nao.com extraSecretMounts: - name: authentik-grafana-oauth secretName: authentik-grafana-oauth mountPath: /etc/secrets/authentik-grafana-oauth readOnly: true persistence: enabled: true size: 2Gi initChownData: true securityContext: runAsNonRoot: false runAsUser: 0 fsGroup: 472 additionalDataSources: - name: Tempo type: tempo uid: tempo access: proxy url: http://tempo.monitoring.svc.cluster.local:3200 isDefault: false version: 1 jsonData: httpMethod: GET tracesToLogsV2: datasourceUid: loki spanStartTimeShift: '-1m' spanEndTimeShift: '1m' serviceMap: datasourceUid: prometheus nodeGraph: enabled: true lokiSearch: datasourceUid: loki prometheus: prometheusSpec: enableRemoteWriteReceiver: true retention: 15d retentionSize: 8GB storageSpec: volumeClaimTemplate: metadata: annotations: helm.sh/resource-policy: keep spec: storageClassName: "" accessModes: - ReadWriteOnce resources: requests: storage: 20Gi volumeName: prometheus-pv alertmanager: alertmanagerSpec: storage: volumeClaimTemplate: spec: storageClassName: local-path accessModes: - ReadWriteOnce resources: requests: storage: 1Gi ## Drop high-cardinality metrics from kube-state-metrics to reduce Prometheus load and storage requirements kubeApiServer: serviceMonitor: metricRelabelings: - sourceLabels: [__name__] regex: "apiserver_request_duration_seconds_bucket|apiserver_request_sli_duration_seconds_bucket|apiserver_request_body_size_bytes_bucket|apiserver_response_sizes_bucket|apiserver_watch_cache_read_wait_seconds_bucket|apiserver_watch_events_sizes_bucket" action: drop kubeEtcd: serviceMonitor: metricRelabelings: - sourceLabels: [__name__] regex: "etcd_request_duration_seconds_bucket" action: drop kubeScheduler: serviceMonitor: metricRelabelings: - sourceLabels: [__name__] regex: "scheduler_plugin_execution_duration_seconds_bucket" action: drop kubeControllerManager: serviceMonitor: metricRelabelings: - sourceLabels: [__name__] regex: "workqueue_queue_duration_seconds_bucket|workqueue_work_duration_seconds_bucket" action: drop kubelet: serviceMonitor: metricRelabelings: - sourceLabels: [__name__] regex: "kubelet_runtime_operations_duration_seconds_bucket|prober_probe_duration_seconds_bucket" action: drop