From b5592a17057590a243e14465c86ffe7fcaec275a Mon Sep 17 00:00:00 2001 From: Nik Afiq Date: Thu, 9 Apr 2026 05:18:47 +0900 Subject: [PATCH] feat: enhance telemetry setup and command tracing in discord-bot and ha-gateway --- discord-bot/cmd/bot/main.go | 7 +++- .../adapters/primary/discord/handler.go | 23 +++++++++++ discord-bot/internal/telemetry/telemetry.go | 34 ++++++++++------ ha-gateway/cmd/gateway/main.go | 7 +++- .../internal/adapters/secondary/ha/client.go | 17 ++++---- ha-gateway/internal/telemetry/telemetry.go | 39 +++++++++++-------- 6 files changed, 89 insertions(+), 38 deletions(-) diff --git a/discord-bot/cmd/bot/main.go b/discord-bot/cmd/bot/main.go index dd78584..30fbdc2 100644 --- a/discord-bot/cmd/bot/main.go +++ b/discord-bot/cmd/bot/main.go @@ -42,11 +42,16 @@ func main() { defer stop() ctx = logger.WithLogger(ctx, log) - shutdown, err := telemetry.Setup(ctx, cfg, version) + shutdown, err := telemetry.Setup(ctx, "discord-bot", version, cfg) if err != nil { log.Error("telemetry setup failed", "err", err) os.Exit(1) } + if cfg.OTELEndpoint != "" { + log.Info("telemetry enabled", "endpoint", cfg.OTELEndpoint) + } else { + log.Debug("telemetry disabled") + } haClient, err := gateway.New(ctx, cfg.HAGatewayAddr, log) if err != nil { diff --git a/discord-bot/internal/adapters/primary/discord/handler.go b/discord-bot/internal/adapters/primary/discord/handler.go index 27ffb2b..d11d999 100644 --- a/discord-bot/internal/adapters/primary/discord/handler.go +++ b/discord-bot/internal/adapters/primary/discord/handler.go @@ -10,8 +10,13 @@ import ( apppkg "gitea.nik4nao.com/nik/home-services/discord-bot/internal/app" "gitea.nik4nao.com/nik/home-services/discord-bot/internal/logger" "github.com/bwmarrin/discordgo" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" ) +var tracer = otel.Tracer("discord-bot/commands") + type commandHandler interface { HandleLightList(ctx context.Context) (string, error) HandleLightOn(ctx context.Context, entityID string, brightnessPct *uint32, colorTempKelvin *uint32) (string, error) @@ -53,6 +58,24 @@ func (h *Handler) handleApplicationCommand(ctx context.Context, s *discordgo.Ses log.Debug("command received") data := i.ApplicationCommandData() + command := data.Name + if len(data.Options) > 0 { + command += "." + data.Options[0].Name + } + user := "" + if i.Member != nil && i.Member.User != nil { + user = i.Member.User.Username + } else if i.User != nil { + user = i.User.Username + } + ctx, span := tracer.Start(ctx, "command."+strings.ReplaceAll(command, ".", "_"), + trace.WithAttributes( + attribute.String("discord.command", command), + attribute.String("discord.user", user), + attribute.String("discord.guild", i.GuildID), + ), + ) + defer span.End() if len(data.Options) == 0 { h.respondError(ctx, s, i.Interaction, true, start, fmt.Errorf("missing subcommand")) return diff --git a/discord-bot/internal/telemetry/telemetry.go b/discord-bot/internal/telemetry/telemetry.go index 8589c71..232d02b 100644 --- a/discord-bot/internal/telemetry/telemetry.go +++ b/discord-bot/internal/telemetry/telemetry.go @@ -2,34 +2,37 @@ package telemetry import ( "context" + "errors" "time" + "gitea.nik4nao.com/nik/home-services/discord-bot/internal/logger" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/metric/noop" "go.opentelemetry.io/otel/propagation" sdkmetric "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" sdktrace "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.26.0" + tracenoop "go.opentelemetry.io/otel/trace/noop" "gitea.nik4nao.com/nik/home-services/discord-bot/internal/config" ) -func Setup(ctx context.Context, cfg *config.Config, version string) (shutdown func(context.Context) error, err error) { +func Setup(ctx context.Context, serviceName, version string, cfg *config.Config) (shutdown func(context.Context) error, err error) { if cfg.OTELEndpoint == "" { + otel.SetTracerProvider(tracenoop.NewTracerProvider()) + otel.SetMeterProvider(noop.NewMeterProvider()) + logger.FromContext(ctx).Debug("otel disabled — OTEL_ENDPOINT not set") return func(context.Context) error { return nil }, nil } - res, err := resource.New(ctx, - resource.WithAttributes( - semconv.ServiceName("discord-bot"), - semconv.ServiceVersion(version), - ), + res := resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceNameKey.String(serviceName), + semconv.ServiceVersionKey.String(version), ) - if err != nil { - return nil, err - } traceExp, err := otlptracegrpc.New(ctx, otlptracegrpc.WithEndpoint(cfg.OTELEndpoint), @@ -41,6 +44,7 @@ func Setup(ctx context.Context, cfg *config.Config, version string) (shutdown fu tp := sdktrace.NewTracerProvider( sdktrace.WithBatcher(traceExp), sdktrace.WithResource(res), + sdktrace.WithSampler(sdktrace.AlwaysSample()), ) otel.SetTracerProvider(tp) otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( @@ -64,9 +68,15 @@ func Setup(ctx context.Context, cfg *config.Config, version string) (shutdown fu otel.SetMeterProvider(mp) return func(ctx context.Context) error { - if err := tp.Shutdown(ctx); err != nil { - return err + shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + var shutdownErr error + if err := tp.Shutdown(shutdownCtx); err != nil { + shutdownErr = errors.Join(shutdownErr, err) } - return mp.Shutdown(ctx) + if err := mp.Shutdown(shutdownCtx); err != nil { + shutdownErr = errors.Join(shutdownErr, err) + } + return shutdownErr }, nil } diff --git a/ha-gateway/cmd/gateway/main.go b/ha-gateway/cmd/gateway/main.go index fe622ca..b696e54 100644 --- a/ha-gateway/cmd/gateway/main.go +++ b/ha-gateway/cmd/gateway/main.go @@ -59,11 +59,16 @@ func main() { defer stop() ctx = logger.WithLogger(ctx, log) - shutdown, err := telemetry.Setup(ctx, cfg, version) + shutdown, err := telemetry.Setup(ctx, "ha-gateway", version, cfg) if err != nil { log.Error("telemetry setup failed", "err", err) os.Exit(1) } + if cfg.OTELEndpoint != "" { + log.Info("telemetry enabled", "endpoint", cfg.OTELEndpoint) + } else { + log.Debug("telemetry disabled") + } haClient := ha.NewClient(cfg, log) diff --git a/ha-gateway/internal/adapters/secondary/ha/client.go b/ha-gateway/internal/adapters/secondary/ha/client.go index 358cf63..c905c7c 100644 --- a/ha-gateway/internal/adapters/secondary/ha/client.go +++ b/ha-gateway/internal/adapters/secondary/ha/client.go @@ -16,6 +16,7 @@ import ( "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" + "go.opentelemetry.io/otel/trace" ) var tracer = otel.Tracer("ha-gateway/ha-client") @@ -37,9 +38,10 @@ func NewClient(cfg *config.Config, log *slog.Logger) *Client { } func (c *Client) GetState(ctx context.Context, entityID string) (*driven.HAState, error) { - ctx, span := tracer.Start(ctx, "ha.GetState") + ctx, span := tracer.Start(ctx, "ha.GetState", + trace.WithAttributes(attribute.String("ha.entity_id", entityID)), + ) defer span.End() - span.SetAttributes(attribute.String("entity_id", entityID)) var raw haStateRaw if err := c.get(ctx, "/api/states/"+entityID, &raw); err != nil { @@ -73,12 +75,13 @@ func (c *Client) ListStates(ctx context.Context) ([]*driven.HAState, error) { } func (c *Client) CallService(ctx context.Context, domain, service string, payload map[string]any) ([]*driven.HAState, error) { - ctx, span := tracer.Start(ctx, "ha.CallService") - defer span.End() - span.SetAttributes( - attribute.String("ha.domain", domain), - attribute.String("ha.service", service), + ctx, span := tracer.Start(ctx, "ha.CallService", + trace.WithAttributes( + attribute.String("ha.domain", domain), + attribute.String("ha.service", service), + ), ) + defer span.End() body, err := json.Marshal(payload) if err != nil { diff --git a/ha-gateway/internal/telemetry/telemetry.go b/ha-gateway/internal/telemetry/telemetry.go index f8e1136..f6aee62 100644 --- a/ha-gateway/internal/telemetry/telemetry.go +++ b/ha-gateway/internal/telemetry/telemetry.go @@ -2,40 +2,38 @@ package telemetry import ( "context" + "errors" "time" + "gitea.nik4nao.com/nik/home-services/ha-gateway/internal/logger" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + metricnoop "go.opentelemetry.io/otel/metric/noop" "go.opentelemetry.io/otel/propagation" sdkmetric "go.opentelemetry.io/otel/sdk/metric" "go.opentelemetry.io/otel/sdk/resource" sdktrace "go.opentelemetry.io/otel/sdk/trace" semconv "go.opentelemetry.io/otel/semconv/v1.26.0" + tracenoop "go.opentelemetry.io/otel/trace/noop" "gitea.nik4nao.com/nik/home-services/ha-gateway/internal/config" ) -// Setup initialises OTel trace and metric providers. If cfg.OTELEndpoint is -// empty, no-op providers are installed and Setup returns immediately. The -// returned shutdown func flushes and closes both exporters. -func Setup(ctx context.Context, cfg *config.Config, version string) (shutdown func(context.Context) error, err error) { +func Setup(ctx context.Context, serviceName, version string, cfg *config.Config) (shutdown func(context.Context) error, err error) { if cfg.OTELEndpoint == "" { - // Local dev — no telemetry. + otel.SetTracerProvider(tracenoop.NewTracerProvider()) + otel.SetMeterProvider(metricnoop.NewMeterProvider()) + logger.FromContext(ctx).Debug("otel disabled — OTEL_ENDPOINT not set") return func(context.Context) error { return nil }, nil } - res, err := resource.New(ctx, - resource.WithAttributes( - semconv.ServiceName("ha-gateway"), - semconv.ServiceVersion(version), - ), + res := resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceNameKey.String(serviceName), + semconv.ServiceVersionKey.String(version), ) - if err != nil { - return nil, err - } - // Trace exporter. traceExp, err := otlptracegrpc.New(ctx, otlptracegrpc.WithEndpoint(cfg.OTELEndpoint), otlptracegrpc.WithInsecure(), @@ -46,6 +44,7 @@ func Setup(ctx context.Context, cfg *config.Config, version string) (shutdown fu tp := sdktrace.NewTracerProvider( sdktrace.WithBatcher(traceExp), sdktrace.WithResource(res), + sdktrace.WithSampler(sdktrace.AlwaysSample()), ) otel.SetTracerProvider(tp) otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( @@ -70,9 +69,15 @@ func Setup(ctx context.Context, cfg *config.Config, version string) (shutdown fu otel.SetMeterProvider(mp) return func(ctx context.Context) error { - if err := tp.Shutdown(ctx); err != nil { - return err + shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + var shutdownErr error + if err := tp.Shutdown(shutdownCtx); err != nil { + shutdownErr = errors.Join(shutdownErr, err) } - return mp.Shutdown(ctx) + if err := mp.Shutdown(shutdownCtx); err != nil { + shutdownErr = errors.Join(shutdownErr, err) + } + return shutdownErr }, nil }