feat: add OTel Collector, Tempo, Traefik OTLP tracing

This commit is contained in:
Nik Afiq 2026-03-25 20:17:47 +09:00
parent 34e358ebcc
commit 14f0e7c43c
7 changed files with 166 additions and 0 deletions

View File

@ -0,0 +1,27 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: otel-collector
namespace: argocd
spec:
project: default
sources:
- repoURL: https://open-telemetry.github.io/opentelemetry-helm-charts
chart: opentelemetry-collector
targetRevision: "0.147.1"
helm:
releaseName: otel-collector
valueFiles:
- $values/values/otel-collector.yaml
- repoURL: https://gitea.nik4nao.com/nik/homelab.git
targetRevision: HEAD
ref: values
destination:
server: https://kubernetes.default.svc
namespace: monitoring
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=false

27
argocd/apps/tempo.yaml Normal file
View File

@ -0,0 +1,27 @@
apiVersion: argoproj.io/v1alpha1
kind: Application
metadata:
name: tempo
namespace: argocd
spec:
project: default
sources:
- repoURL: https://grafana.github.io/helm-charts
chart: tempo
targetRevision: "1.24.4"
helm:
releaseName: tempo
valueFiles:
- $values/values/tempo.yaml
- repoURL: https://gitea.nik4nao.com/nik/homelab.git
targetRevision: HEAD
ref: values
destination:
server: https://kubernetes.default.svc
namespace: monitoring
syncPolicy:
automated:
prune: true
selfHeal: true
syncOptions:
- CreateNamespace=false

View File

@ -0,0 +1,27 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-tempo-datasource
namespace: monitoring
labels:
grafana_datasource: "1"
data:
tempo-datasource.yaml: |
apiVersion: 1
datasources:
- name: Tempo
type: tempo
access: proxy
url: http://tempo.monitoring.svc.cluster.local:3200
isDefault: false
jsonData:
tracesToLogsV2:
datasourceUid: loki
spanStartTimeShift: '-1m'
spanEndTimeShift: '1m'
serviceMap:
datasourceUid: prometheus
nodeGraph:
enabled: true
search:
hide: false

View File

@ -53,6 +53,7 @@ grafana:
prometheus:
prometheusSpec:
enableRemoteWriteReceiver: true
retention: 15d
storageSpec:
volumeClaimTemplate:

View File

@ -0,0 +1,61 @@
# yaml-language-server: $schema=false
mode: deployment
replicaCount: 1
config:
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
processors:
memory_limiter:
limit_mib: 200
check_interval: 1s
batch:
timeout: 1s
send_batch_size: 1024
exporters:
otlp/tempo:
endpoint: tempo.monitoring.svc.cluster.local:4317
tls:
insecure: true
prometheusremotewrite:
endpoint: http://kube-prometheus-stack-prometheus.monitoring.svc.cluster.local:9090/api/v1/write
tls:
insecure_skip_verify: true
service:
pipelines:
traces:
receivers: [otlp]
processors: [memory_limiter, batch]
exporters: [otlp/tempo]
metrics:
receivers: [otlp]
processors: [memory_limiter, batch]
exporters: [prometheusremotewrite]
ports:
otlp:
enabled: true
containerPort: 4317
servicePort: 4317
protocol: TCP
otlp-http:
enabled: true
containerPort: 4318
servicePort: 4318
protocol: TCP
resources:
limits:
memory: 256Mi
requests:
memory: 128Mi
cpu: 50m

19
values/tempo.yaml Normal file
View File

@ -0,0 +1,19 @@
tempo:
storage:
trace:
backend: local
local:
path: /var/tempo/traces
retention: 72h
persistence:
enabled: true
storageClassName: local-path
size: 10Gi
serviceMonitor:
enabled: true
namespace: monitoring
service:
type: ClusterIP

View File

@ -40,6 +40,10 @@ additionalArguments:
- "--certificatesresolvers.letsencrypt.acme.httpchallenge.entrypoint=web"
- "--certificatesresolvers.letsencrypt.acme.email=nik.afiq98@ymail.com"
- "--certificatesresolvers.letsencrypt.acme.storage=/data/traefik/acme.json"
- "--tracing.otlp=true"
- "--tracing.otlp.grpc=true"
- "--tracing.otlp.grpc.endpoint=otel-collector.monitoring.svc.cluster.local:4317"
- "--tracing.otlp.grpc.insecure=true"
providers:
kubernetesCRD: