feat: enhance telemetry setup and command tracing in discord-bot and ha-gateway
All checks were successful
CI / test (push) Successful in 9s
CI / build-ha-gateway (push) Successful in 2m17s
CI / build-discord-bot (push) Successful in 59s

This commit is contained in:
Nik Afiq 2026-04-09 05:18:47 +09:00
parent 1d3e223dbb
commit b5592a1705
6 changed files with 89 additions and 38 deletions

View File

@ -42,11 +42,16 @@ func main() {
defer stop() defer stop()
ctx = logger.WithLogger(ctx, log) ctx = logger.WithLogger(ctx, log)
shutdown, err := telemetry.Setup(ctx, cfg, version) shutdown, err := telemetry.Setup(ctx, "discord-bot", version, cfg)
if err != nil { if err != nil {
log.Error("telemetry setup failed", "err", err) log.Error("telemetry setup failed", "err", err)
os.Exit(1) os.Exit(1)
} }
if cfg.OTELEndpoint != "" {
log.Info("telemetry enabled", "endpoint", cfg.OTELEndpoint)
} else {
log.Debug("telemetry disabled")
}
haClient, err := gateway.New(ctx, cfg.HAGatewayAddr, log) haClient, err := gateway.New(ctx, cfg.HAGatewayAddr, log)
if err != nil { if err != nil {

View File

@ -10,8 +10,13 @@ import (
apppkg "gitea.nik4nao.com/nik/home-services/discord-bot/internal/app" apppkg "gitea.nik4nao.com/nik/home-services/discord-bot/internal/app"
"gitea.nik4nao.com/nik/home-services/discord-bot/internal/logger" "gitea.nik4nao.com/nik/home-services/discord-bot/internal/logger"
"github.com/bwmarrin/discordgo" "github.com/bwmarrin/discordgo"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
) )
var tracer = otel.Tracer("discord-bot/commands")
type commandHandler interface { type commandHandler interface {
HandleLightList(ctx context.Context) (string, error) HandleLightList(ctx context.Context) (string, error)
HandleLightOn(ctx context.Context, entityID string, brightnessPct *uint32, colorTempKelvin *uint32) (string, error) HandleLightOn(ctx context.Context, entityID string, brightnessPct *uint32, colorTempKelvin *uint32) (string, error)
@ -53,6 +58,24 @@ func (h *Handler) handleApplicationCommand(ctx context.Context, s *discordgo.Ses
log.Debug("command received") log.Debug("command received")
data := i.ApplicationCommandData() data := i.ApplicationCommandData()
command := data.Name
if len(data.Options) > 0 {
command += "." + data.Options[0].Name
}
user := ""
if i.Member != nil && i.Member.User != nil {
user = i.Member.User.Username
} else if i.User != nil {
user = i.User.Username
}
ctx, span := tracer.Start(ctx, "command."+strings.ReplaceAll(command, ".", "_"),
trace.WithAttributes(
attribute.String("discord.command", command),
attribute.String("discord.user", user),
attribute.String("discord.guild", i.GuildID),
),
)
defer span.End()
if len(data.Options) == 0 { if len(data.Options) == 0 {
h.respondError(ctx, s, i.Interaction, true, start, fmt.Errorf("missing subcommand")) h.respondError(ctx, s, i.Interaction, true, start, fmt.Errorf("missing subcommand"))
return return

View File

@ -2,34 +2,37 @@ package telemetry
import ( import (
"context" "context"
"errors"
"time" "time"
"gitea.nik4nao.com/nik/home-services/discord-bot/internal/logger"
"go.opentelemetry.io/otel" "go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/metric/noop"
"go.opentelemetry.io/otel/propagation" "go.opentelemetry.io/otel/propagation"
sdkmetric "go.opentelemetry.io/otel/sdk/metric" sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource" "go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace" sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.26.0" semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
tracenoop "go.opentelemetry.io/otel/trace/noop"
"gitea.nik4nao.com/nik/home-services/discord-bot/internal/config" "gitea.nik4nao.com/nik/home-services/discord-bot/internal/config"
) )
func Setup(ctx context.Context, cfg *config.Config, version string) (shutdown func(context.Context) error, err error) { func Setup(ctx context.Context, serviceName, version string, cfg *config.Config) (shutdown func(context.Context) error, err error) {
if cfg.OTELEndpoint == "" { if cfg.OTELEndpoint == "" {
otel.SetTracerProvider(tracenoop.NewTracerProvider())
otel.SetMeterProvider(noop.NewMeterProvider())
logger.FromContext(ctx).Debug("otel disabled — OTEL_ENDPOINT not set")
return func(context.Context) error { return nil }, nil return func(context.Context) error { return nil }, nil
} }
res, err := resource.New(ctx, res := resource.NewWithAttributes(
resource.WithAttributes( semconv.SchemaURL,
semconv.ServiceName("discord-bot"), semconv.ServiceNameKey.String(serviceName),
semconv.ServiceVersion(version), semconv.ServiceVersionKey.String(version),
),
) )
if err != nil {
return nil, err
}
traceExp, err := otlptracegrpc.New(ctx, traceExp, err := otlptracegrpc.New(ctx,
otlptracegrpc.WithEndpoint(cfg.OTELEndpoint), otlptracegrpc.WithEndpoint(cfg.OTELEndpoint),
@ -41,6 +44,7 @@ func Setup(ctx context.Context, cfg *config.Config, version string) (shutdown fu
tp := sdktrace.NewTracerProvider( tp := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(traceExp), sdktrace.WithBatcher(traceExp),
sdktrace.WithResource(res), sdktrace.WithResource(res),
sdktrace.WithSampler(sdktrace.AlwaysSample()),
) )
otel.SetTracerProvider(tp) otel.SetTracerProvider(tp)
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
@ -64,9 +68,15 @@ func Setup(ctx context.Context, cfg *config.Config, version string) (shutdown fu
otel.SetMeterProvider(mp) otel.SetMeterProvider(mp)
return func(ctx context.Context) error { return func(ctx context.Context) error {
if err := tp.Shutdown(ctx); err != nil { shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
return err defer cancel()
var shutdownErr error
if err := tp.Shutdown(shutdownCtx); err != nil {
shutdownErr = errors.Join(shutdownErr, err)
} }
return mp.Shutdown(ctx) if err := mp.Shutdown(shutdownCtx); err != nil {
shutdownErr = errors.Join(shutdownErr, err)
}
return shutdownErr
}, nil }, nil
} }

View File

@ -59,11 +59,16 @@ func main() {
defer stop() defer stop()
ctx = logger.WithLogger(ctx, log) ctx = logger.WithLogger(ctx, log)
shutdown, err := telemetry.Setup(ctx, cfg, version) shutdown, err := telemetry.Setup(ctx, "ha-gateway", version, cfg)
if err != nil { if err != nil {
log.Error("telemetry setup failed", "err", err) log.Error("telemetry setup failed", "err", err)
os.Exit(1) os.Exit(1)
} }
if cfg.OTELEndpoint != "" {
log.Info("telemetry enabled", "endpoint", cfg.OTELEndpoint)
} else {
log.Debug("telemetry disabled")
}
haClient := ha.NewClient(cfg, log) haClient := ha.NewClient(cfg, log)

View File

@ -16,6 +16,7 @@ import (
"go.opentelemetry.io/otel" "go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes" "go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"
) )
var tracer = otel.Tracer("ha-gateway/ha-client") var tracer = otel.Tracer("ha-gateway/ha-client")
@ -37,9 +38,10 @@ func NewClient(cfg *config.Config, log *slog.Logger) *Client {
} }
func (c *Client) GetState(ctx context.Context, entityID string) (*driven.HAState, error) { func (c *Client) GetState(ctx context.Context, entityID string) (*driven.HAState, error) {
ctx, span := tracer.Start(ctx, "ha.GetState") ctx, span := tracer.Start(ctx, "ha.GetState",
trace.WithAttributes(attribute.String("ha.entity_id", entityID)),
)
defer span.End() defer span.End()
span.SetAttributes(attribute.String("entity_id", entityID))
var raw haStateRaw var raw haStateRaw
if err := c.get(ctx, "/api/states/"+entityID, &raw); err != nil { if err := c.get(ctx, "/api/states/"+entityID, &raw); err != nil {
@ -73,12 +75,13 @@ func (c *Client) ListStates(ctx context.Context) ([]*driven.HAState, error) {
} }
func (c *Client) CallService(ctx context.Context, domain, service string, payload map[string]any) ([]*driven.HAState, error) { func (c *Client) CallService(ctx context.Context, domain, service string, payload map[string]any) ([]*driven.HAState, error) {
ctx, span := tracer.Start(ctx, "ha.CallService") ctx, span := tracer.Start(ctx, "ha.CallService",
defer span.End() trace.WithAttributes(
span.SetAttributes( attribute.String("ha.domain", domain),
attribute.String("ha.domain", domain), attribute.String("ha.service", service),
attribute.String("ha.service", service), ),
) )
defer span.End()
body, err := json.Marshal(payload) body, err := json.Marshal(payload)
if err != nil { if err != nil {

View File

@ -2,40 +2,38 @@ package telemetry
import ( import (
"context" "context"
"errors"
"time" "time"
"gitea.nik4nao.com/nik/home-services/ha-gateway/internal/logger"
"go.opentelemetry.io/otel" "go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
metricnoop "go.opentelemetry.io/otel/metric/noop"
"go.opentelemetry.io/otel/propagation" "go.opentelemetry.io/otel/propagation"
sdkmetric "go.opentelemetry.io/otel/sdk/metric" sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource" "go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace" sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.26.0" semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
tracenoop "go.opentelemetry.io/otel/trace/noop"
"gitea.nik4nao.com/nik/home-services/ha-gateway/internal/config" "gitea.nik4nao.com/nik/home-services/ha-gateway/internal/config"
) )
// Setup initialises OTel trace and metric providers. If cfg.OTELEndpoint is func Setup(ctx context.Context, serviceName, version string, cfg *config.Config) (shutdown func(context.Context) error, err error) {
// empty, no-op providers are installed and Setup returns immediately. The
// returned shutdown func flushes and closes both exporters.
func Setup(ctx context.Context, cfg *config.Config, version string) (shutdown func(context.Context) error, err error) {
if cfg.OTELEndpoint == "" { if cfg.OTELEndpoint == "" {
// Local dev — no telemetry. otel.SetTracerProvider(tracenoop.NewTracerProvider())
otel.SetMeterProvider(metricnoop.NewMeterProvider())
logger.FromContext(ctx).Debug("otel disabled — OTEL_ENDPOINT not set")
return func(context.Context) error { return nil }, nil return func(context.Context) error { return nil }, nil
} }
res, err := resource.New(ctx, res := resource.NewWithAttributes(
resource.WithAttributes( semconv.SchemaURL,
semconv.ServiceName("ha-gateway"), semconv.ServiceNameKey.String(serviceName),
semconv.ServiceVersion(version), semconv.ServiceVersionKey.String(version),
),
) )
if err != nil {
return nil, err
}
// Trace exporter.
traceExp, err := otlptracegrpc.New(ctx, traceExp, err := otlptracegrpc.New(ctx,
otlptracegrpc.WithEndpoint(cfg.OTELEndpoint), otlptracegrpc.WithEndpoint(cfg.OTELEndpoint),
otlptracegrpc.WithInsecure(), otlptracegrpc.WithInsecure(),
@ -46,6 +44,7 @@ func Setup(ctx context.Context, cfg *config.Config, version string) (shutdown fu
tp := sdktrace.NewTracerProvider( tp := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(traceExp), sdktrace.WithBatcher(traceExp),
sdktrace.WithResource(res), sdktrace.WithResource(res),
sdktrace.WithSampler(sdktrace.AlwaysSample()),
) )
otel.SetTracerProvider(tp) otel.SetTracerProvider(tp)
otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
@ -70,9 +69,15 @@ func Setup(ctx context.Context, cfg *config.Config, version string) (shutdown fu
otel.SetMeterProvider(mp) otel.SetMeterProvider(mp)
return func(ctx context.Context) error { return func(ctx context.Context) error {
if err := tp.Shutdown(ctx); err != nil { shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
return err defer cancel()
var shutdownErr error
if err := tp.Shutdown(shutdownCtx); err != nil {
shutdownErr = errors.Join(shutdownErr, err)
} }
return mp.Shutdown(ctx) if err := mp.Shutdown(shutdownCtx); err != nil {
shutdownErr = errors.Join(shutdownErr, err)
}
return shutdownErr
}, nil }, nil
} }