diff --git a/argocd/apps/otel-collector.yaml b/argocd/apps/otel-collector.yaml new file mode 100644 index 0000000..221b67c --- /dev/null +++ b/argocd/apps/otel-collector.yaml @@ -0,0 +1,27 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: otel-collector + namespace: argocd +spec: + project: default + sources: + - repoURL: https://open-telemetry.github.io/opentelemetry-helm-charts + chart: opentelemetry-collector + targetRevision: "0.147.1" + helm: + releaseName: otel-collector + valueFiles: + - $values/values/otel-collector.yaml + - repoURL: https://gitea.nik4nao.com/nik/homelab.git + targetRevision: HEAD + ref: values + destination: + server: https://kubernetes.default.svc + namespace: monitoring + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=false diff --git a/argocd/apps/tempo.yaml b/argocd/apps/tempo.yaml new file mode 100644 index 0000000..a5fc981 --- /dev/null +++ b/argocd/apps/tempo.yaml @@ -0,0 +1,27 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: tempo + namespace: argocd +spec: + project: default + sources: + - repoURL: https://grafana.github.io/helm-charts + chart: tempo + targetRevision: "1.24.4" + helm: + releaseName: tempo + valueFiles: + - $values/values/tempo.yaml + - repoURL: https://gitea.nik4nao.com/nik/homelab.git + targetRevision: HEAD + ref: values + destination: + server: https://kubernetes.default.svc + namespace: monitoring + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=false diff --git a/manifests/monitoring/tempo-datasource.yaml b/manifests/monitoring/tempo-datasource.yaml new file mode 100644 index 0000000..38703b4 --- /dev/null +++ b/manifests/monitoring/tempo-datasource.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: grafana-tempo-datasource + namespace: monitoring + labels: + grafana_datasource: "1" +data: + tempo-datasource.yaml: | + apiVersion: 1 + datasources: + - name: Tempo + type: tempo + access: proxy + url: http://tempo.monitoring.svc.cluster.local:3200 + isDefault: false + jsonData: + tracesToLogsV2: + datasourceUid: loki + spanStartTimeShift: '-1m' + spanEndTimeShift: '1m' + serviceMap: + datasourceUid: prometheus + nodeGraph: + enabled: true + search: + hide: false diff --git a/values/kube-prometheus-stack.yaml b/values/kube-prometheus-stack.yaml index 79b8b1e..4d34eaf 100644 --- a/values/kube-prometheus-stack.yaml +++ b/values/kube-prometheus-stack.yaml @@ -53,6 +53,7 @@ grafana: prometheus: prometheusSpec: + enableRemoteWriteReceiver: true retention: 15d storageSpec: volumeClaimTemplate: diff --git a/values/otel-collector.yaml b/values/otel-collector.yaml new file mode 100644 index 0000000..8e0742e --- /dev/null +++ b/values/otel-collector.yaml @@ -0,0 +1,61 @@ +# yaml-language-server: $schema=false +mode: deployment + +replicaCount: 1 + +config: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + + processors: + memory_limiter: + limit_mib: 200 + check_interval: 1s + batch: + timeout: 1s + send_batch_size: 1024 + + exporters: + otlp/tempo: + endpoint: tempo.monitoring.svc.cluster.local:4317 + tls: + insecure: true + prometheusremotewrite: + endpoint: http://kube-prometheus-stack-prometheus.monitoring.svc.cluster.local:9090/api/v1/write + tls: + insecure_skip_verify: true + + service: + pipelines: + traces: + receivers: [otlp] + processors: [memory_limiter, batch] + exporters: [otlp/tempo] + metrics: + receivers: [otlp] + processors: [memory_limiter, batch] + exporters: [prometheusremotewrite] + +ports: + otlp: + enabled: true + containerPort: 4317 + servicePort: 4317 + protocol: TCP + otlp-http: + enabled: true + containerPort: 4318 + servicePort: 4318 + protocol: TCP + +resources: + limits: + memory: 256Mi + requests: + memory: 128Mi + cpu: 50m diff --git a/values/tempo.yaml b/values/tempo.yaml new file mode 100644 index 0000000..78dd842 --- /dev/null +++ b/values/tempo.yaml @@ -0,0 +1,19 @@ +tempo: + storage: + trace: + backend: local + local: + path: /var/tempo/traces + retention: 72h + +persistence: + enabled: true + storageClassName: local-path + size: 10Gi + +serviceMonitor: + enabled: true + namespace: monitoring + +service: + type: ClusterIP diff --git a/values/traefik.yaml b/values/traefik.yaml index f668484..230a7e1 100644 --- a/values/traefik.yaml +++ b/values/traefik.yaml @@ -40,6 +40,10 @@ additionalArguments: - "--certificatesresolvers.letsencrypt.acme.httpchallenge.entrypoint=web" - "--certificatesresolvers.letsencrypt.acme.email=nik.afiq98@ymail.com" - "--certificatesresolvers.letsencrypt.acme.storage=/data/traefik/acme.json" + - "--tracing.otlp=true" + - "--tracing.otlp.grpc=true" + - "--tracing.otlp.grpc.endpoint=otel-collector.monitoring.svc.cluster.local:4317" + - "--tracing.otlp.grpc.insecure=true" providers: kubernetesCRD: