From 88c9a77a588bd1105c9ca7af0b6951ba7f248233 Mon Sep 17 00:00:00 2001 From: Nik Afiq Date: Tue, 21 Apr 2026 23:22:13 +0900 Subject: [PATCH] feat: update OLLAMA_TIMEOUT to 120s and implement async command handling in Discord bot --- ai-gateway/.env.example | 2 +- ai-gateway/internal/config/config.go | 2 +- discord-bot/cmd/bot/main.go | 9 +- .../adapters/primary/discord/async.go | 46 ++ .../adapters/primary/discord/async_test.go | 58 ++ .../adapters/primary/discord/handler.go | 66 +- plan.md | 586 ------------------ 7 files changed, 160 insertions(+), 609 deletions(-) create mode 100644 discord-bot/internal/adapters/primary/discord/async.go create mode 100644 discord-bot/internal/adapters/primary/discord/async_test.go delete mode 100644 plan.md diff --git a/ai-gateway/.env.example b/ai-gateway/.env.example index b28856a..338a71a 100644 --- a/ai-gateway/.env.example +++ b/ai-gateway/.env.example @@ -1,7 +1,7 @@ GRPC_PORT=50052 OLLAMA_URL=http://192.168.7.96:11434 OLLAMA_MODEL=llama3 -OLLAMA_TIMEOUT=30s +OLLAMA_TIMEOUT=120s HA_GATEWAY_ADDR=localhost:50051 HA_GATEWAY_SERVER_NAME=ha-gateway.home-services.svc.cluster.local TLS_DIR= diff --git a/ai-gateway/internal/config/config.go b/ai-gateway/internal/config/config.go index 226b737..4b6221a 100644 --- a/ai-gateway/internal/config/config.go +++ b/ai-gateway/internal/config/config.go @@ -25,7 +25,7 @@ type Config struct { // Load reads configuration from environment variables and applies defaults. func Load() (*Config, error) { - ollamaTimeout, err := parseDurationEnv("OLLAMA_TIMEOUT", 30*time.Second) + ollamaTimeout, err := parseDurationEnv("OLLAMA_TIMEOUT", 120*time.Second) if err != nil { return nil, err } diff --git a/discord-bot/cmd/bot/main.go b/discord-bot/cmd/bot/main.go index 55b959f..acef256 100644 --- a/discord-bot/cmd/bot/main.go +++ b/discord-bot/cmd/bot/main.go @@ -89,6 +89,7 @@ func main() { modelStore := modelstore.New() validator := modelvalidator.New(aiClient, 30*time.Second) commandApp := app.NewCommandApp(haClient, aiClient, modelStore, validator) + tracker := discordadapter.NewTracker(context.Background()) // Discord-specific wiring stays at the edge so the app layer remains transport-agnostic. session, err := discordgo.New("Bot " + cfg.DiscordToken) @@ -98,7 +99,7 @@ func main() { } session.Identify.Intents = discordgo.IntentsGuilds - handler := discordadapter.NewHandler(commandApp) + handler := discordadapter.NewHandler(commandApp, tracker) handler.Register(session) if err := session.Open(); err != nil { @@ -124,7 +125,11 @@ func main() { ) <-ctx.Done() - log.Info("shutdown signal received, closing session") + log.Info("shutdown signal received, waiting for in-flight ai work") + + if clean := tracker.Shutdown(25 * time.Second); !clean { + log.Warn("shutdown grace exceeded; some ai requests cancelled") + } if err := session.Close(); err != nil { log.Error("close discord session failed", "err", err) diff --git a/discord-bot/internal/adapters/primary/discord/async.go b/discord-bot/internal/adapters/primary/discord/async.go new file mode 100644 index 0000000..e9a043e --- /dev/null +++ b/discord-bot/internal/adapters/primary/discord/async.go @@ -0,0 +1,46 @@ +package discord + +import ( + "context" + "sync" + "time" +) + +// Tracker coordinates in-flight async handlers so shutdown can wait briefly. +type Tracker struct { + wg sync.WaitGroup + ctx context.Context + cancel context.CancelFunc +} + +// NewTracker constructs a tracker with a cancellable root context. +func NewTracker(parent context.Context) *Tracker { + ctx, cancel := context.WithCancel(parent) + return &Tracker{ctx: ctx, cancel: cancel} +} + +// Go runs fn in a new goroutine and tracks its lifetime. +func (t *Tracker) Go(fn func(ctx context.Context)) { + t.wg.Add(1) + go func() { + defer t.wg.Done() + fn(t.ctx) + }() +} + +// Shutdown waits up to grace for all goroutines to finish, then cancels any stragglers. +func (t *Tracker) Shutdown(grace time.Duration) bool { + done := make(chan struct{}) + go func() { + t.wg.Wait() + close(done) + }() + + select { + case <-done: + return true + case <-time.After(grace): + t.cancel() + return false + } +} diff --git a/discord-bot/internal/adapters/primary/discord/async_test.go b/discord-bot/internal/adapters/primary/discord/async_test.go new file mode 100644 index 0000000..4dd4ef3 --- /dev/null +++ b/discord-bot/internal/adapters/primary/discord/async_test.go @@ -0,0 +1,58 @@ +package discord + +import ( + "context" + "sync/atomic" + "testing" + "time" +) + +func TestTrackerShutdownCompletes(t *testing.T) { + tracker := NewTracker(context.Background()) + done := make(chan struct{}) + tracker.Go(func(ctx context.Context) { + close(done) + }) + if clean := tracker.Shutdown(time.Second); !clean { + t.Fatal("Shutdown() = false, want true") + } + select { + case <-done: + default: + t.Fatal("goroutine did not run") + } +} + +func TestTrackerShutdownMultiple(t *testing.T) { + tracker := NewTracker(context.Background()) + var count atomic.Int32 + for range 2 { + tracker.Go(func(ctx context.Context) { + time.Sleep(100 * time.Millisecond) + count.Add(1) + }) + } + if clean := tracker.Shutdown(time.Second); !clean { + t.Fatal("Shutdown() = false, want true") + } + if count.Load() != 2 { + t.Fatalf("count = %d, want 2", count.Load()) + } +} + +func TestTrackerShutdownTimeoutCancels(t *testing.T) { + tracker := NewTracker(context.Background()) + done := make(chan struct{}) + tracker.Go(func(ctx context.Context) { + defer close(done) + <-ctx.Done() + }) + if clean := tracker.Shutdown(50 * time.Millisecond); clean { + t.Fatal("Shutdown() = true, want false") + } + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("goroutine was not cancelled") + } +} diff --git a/discord-bot/internal/adapters/primary/discord/handler.go b/discord-bot/internal/adapters/primary/discord/handler.go index 5d6bf9e..52096aa 100644 --- a/discord-bot/internal/adapters/primary/discord/handler.go +++ b/discord-bot/internal/adapters/primary/discord/handler.go @@ -2,6 +2,7 @@ package discord import ( "context" + "errors" "fmt" "log/slog" "strings" @@ -13,6 +14,8 @@ import ( "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/trace" + "google.golang.org/grpc/codes" + grpcstatus "google.golang.org/grpc/status" ) var tracer = otel.Tracer("discord-bot/commands") @@ -34,12 +37,13 @@ type commandHandler interface { // Handler adapts Discord interactions to the command application layer. type Handler struct { - app commandHandler + app commandHandler + tracker *Tracker } // NewHandler constructs the Discord interaction adapter. -func NewHandler(app commandHandler) *Handler { - return &Handler{app: app} +func NewHandler(app commandHandler, tracker *Tracker) *Handler { + return &Handler{app: app, tracker: tracker} } // Register attaches the interaction handler to the Discord session. @@ -163,18 +167,34 @@ func (h *Handler) handleApplicationCommand(ctx context.Context, s *discordgo.Ses ) return } - msg, err := h.app.HandleAIQuery(ctx, requiredStringOption(target, "text")) - h.followup(ctx, s, i.Interaction, msg, true, start, err) + text := requiredStringOption(target, "text") + interaction := i.Interaction + reqLog := logger.FromContext(ctx) + h.tracker.Go(func(trackerCtx context.Context) { + asyncCtx := logger.WithLogger(trackerCtx, reqLog) + asyncCtx, cancel := context.WithTimeout(asyncCtx, 5*time.Minute) + defer cancel() + + msg, err := h.app.HandleAIQuery(asyncCtx, text) + if err != nil { + reqLog.Error("ai command failed", "error", err.Error()) + msg = mapAIError(err) + } + if _, followErr := s.FollowupMessageCreate(interaction, true, &discordgo.WebhookParams{ + Content: msg, + Flags: discordgo.MessageFlagsEphemeral, + }); followErr != nil { + reqLog.Error("discord response failed", "error", followErr.Error()) + } + }) case "ai.model.set": - if err := h.deferResponse(s, i.Interaction, true); err != nil { - log.Error("discord response failed", - "duration_ms", time.Since(start).Milliseconds(), - "error", err.Error(), - ) + msg, err := h.app.HandleAIModelSet(ctx, requiredStringOption(target, "name")) + if err != nil { + h.respondError(ctx, s, i.Interaction, true, start, err) return } - msg, err := h.app.HandleAIModelSet(ctx, requiredStringOption(target, "name")) - h.followup(ctx, s, i.Interaction, msg, true, start, err) + h.respondMessage(ctx, s, i.Interaction, msg, true) + log.Info("command handled", "duration_ms", time.Since(start).Milliseconds()) case "ai.model.get": msg, err := h.app.HandleAIModelGet(ctx) if err != nil { @@ -184,15 +204,13 @@ func (h *Handler) handleApplicationCommand(ctx context.Context, s *discordgo.Ses h.respondMessage(ctx, s, i.Interaction, msg, true) log.Info("command handled", "duration_ms", time.Since(start).Milliseconds()) case "ai.model.list": - if err := h.deferResponse(s, i.Interaction, true); err != nil { - log.Error("discord response failed", - "duration_ms", time.Since(start).Milliseconds(), - "error", err.Error(), - ) + msg, err := h.app.HandleAIModelList(ctx) + if err != nil { + h.respondError(ctx, s, i.Interaction, true, start, err) return } - msg, err := h.app.HandleAIModelList(ctx) - h.followup(ctx, s, i.Interaction, msg, true, start, err) + h.respondMessage(ctx, s, i.Interaction, msg, true) + log.Info("command handled", "duration_ms", time.Since(start).Milliseconds()) default: h.respondError(ctx, s, i.Interaction, true, start, fmt.Errorf("unsupported command: %s", commandPath)) } @@ -310,6 +328,16 @@ func (h *Handler) followup(ctx context.Context, s *discordgo.Session, interactio } } +func mapAIError(err error) string { + if errors.Is(err, context.DeadlineExceeded) { + return "The AI took too long to respond. Try again later." + } + if status, ok := grpcstatus.FromError(err); ok && status.Code() == codes.Unavailable { + return "The AI service is unreachable right now. Try again in a moment." + } + return "Sorry, something went wrong handling that AI request." +} + // interactionLogger adds stable interaction metadata without logging noisy option values. func interactionLogger(ctx context.Context, i *discordgo.InteractionCreate) *slog.Logger { log := logger.FromContext(ctx) diff --git a/plan.md b/plan.md deleted file mode 100644 index 58bee62..0000000 --- a/plan.md +++ /dev/null @@ -1,586 +0,0 @@ -# ai-gateway — Implementation Plan - -This plan describes the implementation of a new Go microservice, `ai-gateway`, in the `home-services` monorepo (`gitea.nik4nao.com/nik/home-services`). It centralizes all AI/LLM logic behind a gRPC API so callers (`discord-bot`, `alexa-bridge`) remain thin transport adapters with zero AI knowledge. - ---- - -## 1. Goals & Non-Goals - -### Goals -- New gRPC service `ai-gateway` listening on `:50052`. -- Owns **all** AI logic: Ollama connection, prompt construction, LLM intent parsing, dispatch to `ha-gateway`. -- Callers send raw user text via `QueryRequest`; receive a human-readable reply in `QueryResponse`. -- mTLS client authentication when calling `ha-gateway` (ha-gateway requires mTLS). -- Hexagonal architecture, matching the existing `ha-gateway` layout. -- Structured logging via `slog`, OTel OTLP gRPC traces/metrics. -- Deployed to the `home-services` namespace on K3s. - -### Non-Goals -- No auth on `ai-gateway`'s own inbound gRPC surface in this iteration (in-cluster only; match current `ha-gateway` posture). -- No streaming responses — unary only. -- No conversation memory — each `Query` is stateless. -- No new Home Assistant features beyond what `ha-gateway` already exposes (LightService + EntityService). - ---- - -## 2. Repository Layout - -All paths are relative to the `home-services` repo root. - -``` -proto/ - ai/v1/ai.proto # NEW - -gen/ - ai/v1/ # NEW (generated; committed) - ai.pb.go - ai_grpc.pb.go - -services/ - ai-gateway/ # NEW - go.mod - cmd/ - ai-gateway/ - main.go - config/ - config.go - domain/ - prompt.go - service.go - intent.go - adapters/ - inbound/ - grpc/ - server.go - outbound/ - ollama/ - client.go - hagateway/ - client.go - internal/ - observability/ - logging.go - otel.go - Dockerfile - .dockerignore - discord-bot/ # MODIFIED - adapters/outbound/aigateway/client.go # NEW - (remove any direct Ollama code if present) -``` - -Also update: -- `go.work` — add `./services/ai-gateway` and keep `replace` directive to `../gen`. -- `buf.gen.yaml` / `buf.yaml` — include the new `ai/v1` proto package. - ---- - -## 3. Proto Definition - -### File: `proto/ai/v1/ai.proto` - -```proto -syntax = "proto3"; - -package ai.v1; - -option go_package = "gitea.nik4nao.com/nik/home-services/gen/ai/v1;aiv1"; - -// AIService accepts free-form natural language queries and returns a -// human-readable reply. It encapsulates LLM prompting, intent parsing, -// and dispatch to downstream services (e.g. ha-gateway). -service AIService { - rpc Query(QueryRequest) returns (QueryResponse); -} - -message QueryRequest { - // Raw user text, e.g. "turn on the living room light". - string text = 1; - - // Optional caller identifier for logging/tracing (e.g. "discord-bot"). - string source = 2; -} - -message QueryResponse { - // Human-readable reply to show the user. - string reply = 1; - - // Parsed intent name, if any. Empty if no actionable intent was detected. - string intent = 2; - - // True if an action was dispatched to a downstream service. - bool action_taken = 3; -} -``` - -### Generation -- Run `buf generate` from repo root. -- Commit `gen/ai/v1/*.pb.go` and `gen/ai/v1/*_grpc.pb.go` (per existing convention — `gen/` is committed to avoid CI codegen dependency). - ---- - -## 4. Configuration (`services/ai-gateway/config/config.go`) - -Load from environment. Use `os.Getenv` with defaults (matches existing ha-gateway style — no new dep). - -| Env Var | Default | Purpose | -| ----------------------------- | ----------------------------------------------------- | ------------------------------------------------ | -| `GRPC_LISTEN_ADDR` | `:50052` | Inbound gRPC bind address | -| `OLLAMA_URL` | `http://192.168.7.96:11434` | Ollama HTTP API (direct LAN IP; no K8s Service) | -| `OLLAMA_MODEL` | `llama3` | Model name | -| `OLLAMA_TIMEOUT` | `30s` | HTTP timeout for Ollama calls | -| `HA_GATEWAY_ADDR` | `ha-gateway.home-services.svc.cluster.local:50051` | ha-gateway gRPC endpoint | -| `HA_GATEWAY_TLS_CA_FILE` | `/etc/ai-gateway/tls/ca.crt` | CA cert that signed ha-gateway's server cert | -| `HA_GATEWAY_TLS_CERT_FILE` | `/etc/ai-gateway/tls/tls.crt` | ai-gateway's client cert (for mTLS) | -| `HA_GATEWAY_TLS_KEY_FILE` | `/etc/ai-gateway/tls/tls.key` | ai-gateway's client key | -| `HA_GATEWAY_SERVER_NAME` | `ha-gateway.home-services.svc.cluster.local` | SNI / cert verification name | -| `LOG_LEVEL` | `info` | `debug`/`info`/`warn`/`error` | -| `LOG_FORMAT` | `json` | `json` or `text` | -| `OTEL_EXPORTER_OTLP_ENDPOINT` | `otel-collector-opentelemetry-collector.monitoring.svc.cluster.local:4317` | OTLP gRPC endpoint | -| `OTEL_SERVICE_NAME` | `ai-gateway` | Service name for traces/metrics | - -Provide a `Config` struct with a `Load()` function returning `(Config, error)`. Validate required files exist at startup. - ---- - -## 5. Domain Layer - -### `domain/intent.go` - -Define the intent contract the LLM must produce: - -```go -package domain - -type Intent struct { - Name string `json:"intent"` // e.g. "turn_on_light", "turn_off_light", "none" - Entity string `json:"entity"` // e.g. "living_room" (friendly name or entity_id) - Params map[string]string `json:"params"` // optional, e.g. {"brightness":"80"} - Reply string `json:"reply"` // what to say back to the user -} - -const ( - IntentNone = "none" - IntentTurnOnLight = "turn_on_light" - IntentTurnOffLight = "turn_off_light" - IntentListEntities = "list_entities" -) -``` - -### `domain/prompt.go` - -Build the Ollama prompt. The system prompt MUST instruct the model to return **only** a single JSON object matching the `Intent` schema. No markdown fences, no prose. - -```go -package domain - -import "fmt" - -const systemPrompt = `You are a home automation assistant. Given a user request, respond with a single JSON object and nothing else — no markdown, no code fences, no explanation. - -Schema: -{ - "intent": "turn_on_light" | "turn_off_light" | "list_entities" | "none", - "entity": "", - "params": { "": "" }, - "reply": "" -} - -Rules: -- If the request is not actionable, use intent="none" and put the conversational answer in "reply". -- Always include all four fields. Use "" or {} for empty values. -- Do not wrap the JSON in backticks.` - -func BuildPrompt(userText string) string { - return fmt.Sprintf("%s\n\nUser: %s", systemPrompt, userText) -} -``` - -### `domain/service.go` - -The orchestrator. Depends on two ports (interfaces) defined here: - -```go -package domain - -import "context" - -type LLMClient interface { - Generate(ctx context.Context, prompt string) (string, error) -} - -type HAClient interface { - TurnOnLight(ctx context.Context, entity string, params map[string]string) error - TurnOffLight(ctx context.Context, entity string) error - ListEntities(ctx context.Context) ([]string, error) -} - -type Service struct { - llm LLMClient - ha HAClient - log *slog.Logger -} - -func NewService(llm LLMClient, ha HAClient, log *slog.Logger) *Service { /* ... */ } - -type QueryResult struct { - Reply string - Intent string - ActionTaken bool -} - -func (s *Service) Query(ctx context.Context, text string) (QueryResult, error) { - // 1. BuildPrompt(text) - // 2. s.llm.Generate(ctx, prompt) - // 3. json.Unmarshal into Intent - // - On unmarshal error: log at warn, return reply = "I didn't understand that." - // 4. switch intent.Name: - // turn_on_light -> s.ha.TurnOnLight(...) - // turn_off_light -> s.ha.TurnOffLight(...) - // list_entities -> s.ha.ListEntities(...); format into reply - // none / default -> reply = intent.Reply - // 5. Return QueryResult -} -``` - -**Error handling:** -- LLM call failure → return error; inbound adapter maps to gRPC `Unavailable`. -- JSON parse failure → do NOT error; return a friendly "I didn't understand" reply and log the raw LLM output at `warn` with the original text (not error). -- HA dispatch failure → log at `error`, return reply "I couldn't reach Home Assistant right now."; `ActionTaken=false`. - ---- - -## 6. Outbound Adapters - -### `adapters/outbound/ollama/client.go` - -- Plain `net/http.Client` with configured timeout. -- POST to `{OLLAMA_URL}/api/generate` with body: - ```json - { "model": "", "prompt": "", "stream": false } - ``` -- Decode JSON response, return the `response` field as a string. -- Implement `domain.LLMClient`. -- Wrap the HTTP client with OTel instrumentation (`otelhttp.NewTransport`). - -### `adapters/outbound/hagateway/client.go` - -This is the mTLS-critical piece. - -- Construct a `*grpc.ClientConn` to `HA_GATEWAY_ADDR` with TLS credentials built from the three cert files: - ```go - func loadTLSCredentials(caFile, certFile, keyFile, serverName string) (credentials.TransportCredentials, error) { - caPEM, err := os.ReadFile(caFile) - if err != nil { return nil, fmt.Errorf("read ca: %w", err) } - cp := x509.NewCertPool() - if !cp.AppendCertsFromPEM(caPEM) { - return nil, errors.New("failed to append CA cert") - } - clientCert, err := tls.LoadX509KeyPair(certFile, keyFile) - if err != nil { return nil, fmt.Errorf("load client keypair: %w", err) } - return credentials.NewTLS(&tls.Config{ - Certificates: []tls.Certificate{clientCert}, - RootCAs: cp, - ServerName: serverName, - MinVersion: tls.VersionTLS13, - }), nil - } - ``` -- Use `grpc.NewClient(addr, grpc.WithTransportCredentials(creds), grpc.WithStatsHandler(otelgrpc.NewClientHandler()))`. -- Wrap the generated ha-gateway clients (`LightServiceClient`, `EntityServiceClient`) to satisfy `domain.HAClient`. -- Expose a `Close()` method for graceful shutdown. - -**Cert source:** the cert files will be projected into the pod via a Kubernetes `Secret` mounted at `/etc/ai-gateway/tls/`. See deployment manifest below. Issuing the cert is covered in §10. - ---- - -## 7. Inbound Adapter - -### `adapters/inbound/grpc/server.go` - -- Implements `aiv1.AIServiceServer`. -- `Query(ctx, req)` → calls `domain.Service.Query(ctx, req.Text)` → maps `QueryResult` to `QueryResponse`. -- Attach OTel interceptor: `grpc.StatsHandler(otelgrpc.NewServerHandler())`. -- Attach a slog unary interceptor that logs method, duration, caller `source`, and error code. -- Register reflection service only if `LOG_LEVEL=debug` (convenience for `grpcurl`). - ---- - -## 8. Observability (`internal/observability/`) - -Copy the pattern from `ha-gateway`: - -### `logging.go` -- `NewLogger(level, format string) *slog.Logger` returning either `slog.NewJSONHandler` or `slog.NewTextHandler` wrapping `os.Stdout`. - -### `otel.go` -- `InitOTel(ctx, endpoint, serviceName) (shutdown func(context.Context) error, err error)`. -- Uses `otlptracegrpc` + `otlpmetricgrpc` exporters, insecure credentials (in-cluster). -- Sets global `TracerProvider` and `MeterProvider`. -- Resource attributes: `service.name`, `service.namespace=home-services`. - ---- - -## 9. Entry Point (`cmd/ai-gateway/main.go`) - -Standard startup sequence: - -1. Load config. -2. Build logger. -3. Init OTel; defer shutdown. -4. Build Ollama client. -5. Build ha-gateway client (mTLS); defer `Close()`. -6. Build domain service. -7. Build gRPC server with interceptors, register `AIService`. -8. Listen on `GRPC_LISTEN_ADDR`. -9. Handle `SIGINT`/`SIGTERM` for graceful shutdown: `server.GracefulStop()` with a 10s timeout, then OTel shutdown. - ---- - -## 10. TLS / mTLS Plumbing - -`ha-gateway` requires mTLS. `ai-gateway` needs a client certificate signed by the same CA that ha-gateway trusts. - -### Approach: cert-manager + internal-ca-issuer - -Create a `Certificate` resource for `ai-gateway` (file: `manifests/home-services/ai-gateway-client-cert.yaml`): - -```yaml -apiVersion: cert-manager.io/v1 -kind: Certificate -metadata: - name: ai-gateway-client - namespace: home-services -spec: - secretName: ai-gateway-client-tls - duration: 2160h # 90d - renewBefore: 360h # 15d - subject: - organizations: [home-services] - commonName: ai-gateway - usages: - - client auth - issuerRef: - name: internal-ca-issuer - kind: ClusterIssuer - group: cert-manager.io -``` - -**Important:** use `internal-ca-issuer` (the CA issuer), **never** `internal-ca` (the bootstrap self-signed issuer). This matches the homelab convention. - -The resulting secret `ai-gateway-client-tls` contains `tls.crt`, `tls.key`, and `ca.crt` — mount all three. - -### Verify ha-gateway's CA trust -Confirm ha-gateway's server TLS config trusts `internal-ca-issuer`'s CA (it should, since both use the same cluster CA). If ha-gateway uses a separate client-auth CA, adjust the issuer accordingly. - ---- - -## 11. Kubernetes Manifest - -### File: `manifests/home-services/ai-gateway.yaml` - -Single file with `---` separators per repo convention. - -```yaml -apiVersion: v1 -kind: Service -metadata: - name: ai-gateway - namespace: home-services -spec: - selector: { app: ai-gateway } - ports: - - name: grpc - port: 50052 - targetPort: 50052 ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: ai-gateway - namespace: home-services -spec: - replicas: 1 - selector: { matchLabels: { app: ai-gateway } } - template: - metadata: - labels: { app: ai-gateway } - spec: - containers: - - name: ai-gateway - image: gitea.nik4nao.com/nik/ai-gateway:latest - imagePullPolicy: Always - ports: - - containerPort: 50052 - name: grpc - env: - - { name: GRPC_LISTEN_ADDR, value: ":50052" } - - { name: OLLAMA_URL, value: "http://192.168.7.96:11434" } - - { name: OLLAMA_MODEL, value: "llama3" } - - { name: HA_GATEWAY_ADDR, value: "ha-gateway.home-services.svc.cluster.local:50051" } - - { name: HA_GATEWAY_TLS_CA_FILE, value: "/etc/ai-gateway/tls/ca.crt" } - - { name: HA_GATEWAY_TLS_CERT_FILE, value: "/etc/ai-gateway/tls/tls.crt" } - - { name: HA_GATEWAY_TLS_KEY_FILE, value: "/etc/ai-gateway/tls/tls.key" } - - { name: HA_GATEWAY_SERVER_NAME, value: "ha-gateway.home-services.svc.cluster.local" } - - { name: LOG_LEVEL, value: "info" } - - { name: LOG_FORMAT, value: "json" } - - { name: OTEL_EXPORTER_OTLP_ENDPOINT, - value: "otel-collector-opentelemetry-collector.monitoring.svc.cluster.local:4317" } - - { name: OTEL_SERVICE_NAME, value: "ai-gateway" } - volumeMounts: - - name: tls - mountPath: /etc/ai-gateway/tls - readOnly: true - readinessProbe: - tcpSocket: { port: 50052 } - initialDelaySeconds: 3 - periodSeconds: 10 - livenessProbe: - tcpSocket: { port: 50052 } - initialDelaySeconds: 10 - periodSeconds: 20 - volumes: - - name: tls - secret: - secretName: ai-gateway-client-tls - imagePullSecrets: - - name: gitea-registry -``` - -No resource `limits`/`requests` yet — matches current repo convention (memory limits not yet enforced on pods). - ---- - -## 12. discord-bot Changes - -### New: `services/discord-bot/adapters/outbound/aigateway/client.go` -- gRPC client to `ai-gateway.home-services.svc.cluster.local:50052`, **plaintext** (no auth on ai-gateway's inbound surface yet). -- Exposes `Query(ctx, text string) (reply string, err error)`. -- Inject into existing command handler. - -### Removed / simplified -- If `discord-bot` currently contains any direct Ollama calls, remove them. -- Slash command handler for free-form queries simply calls `aigateway.Query(ctx, msg.Content)` and posts the returned reply. -- Event-notification path (existing Discord → notify flow) is untouched. - -### Config additions to discord-bot -- `AI_GATEWAY_ADDR` (default `ai-gateway.home-services.svc.cluster.local:50052`). - ---- - -## 13. CI / Build - -### `services/ai-gateway/Dockerfile` -Multi-stage build matching existing services: - -```dockerfile -FROM golang:1.26 AS build -WORKDIR /src -COPY go.work go.work.sum ./ -COPY gen ./gen -COPY services/ai-gateway ./services/ai-gateway -WORKDIR /src/services/ai-gateway -RUN CGO_ENABLED=0 go build -o /out/ai-gateway ./cmd/ai-gateway - -FROM gcr.io/distroless/static-debian12:nonroot -COPY --from=build /out/ai-gateway /ai-gateway -USER nonroot:nonroot -ENTRYPOINT ["/ai-gateway"] -``` - -### Gitea Actions workflow -Mirror the existing `ha-gateway` workflow: -- Trigger on pushes touching `services/ai-gateway/**`, `gen/ai/**`, or `proto/ai/**`. -- `docker buildx` multiarch build (`linux/amd64,linux/arm64`). -- Push to `gitea.nik4nao.com/nik/ai-gateway:latest` and `:${{ github.sha }}`. -- Use the Gitea API token (`read:package` + `write:package`) as registry password — **not** the account password. -- Remember: buildkit CA must be injected each run (existing runner pattern). - ---- - -## 14. Workspace Wiring - -### `go.work` — add line: -``` -use ./services/ai-gateway -``` -Keep the existing `replace gitea.nik4nao.com/nik/home-services/gen => ../gen` in `services/ai-gateway/go.mod`. - -### `services/ai-gateway/go.mod` dependencies -- `google.golang.org/grpc` -- `google.golang.org/protobuf` -- `go.opentelemetry.io/otel` -- `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc` -- `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc` -- `go.opentelemetry.io/otel/sdk` -- `go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc` -- `go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp` - ---- - -## 15. Testing - -### Unit tests (`services/ai-gateway/domain/service_test.go`) -- Fake `LLMClient` returning canned JSON strings for each intent. -- Fake `HAClient` recording calls. -- Assert: - - Valid `turn_on_light` JSON → `HAClient.TurnOnLight` called with correct entity; reply matches. - - Invalid JSON → graceful reply, no panic, no HA call. - - `intent="none"` → no HA call; reply passed through. - - HA call returning error → reply contains "couldn't reach Home Assistant"; `ActionTaken=false`. - -### Integration smoke test (manual, post-deploy) -```bash -# From inside the cluster: -grpcurl -plaintext -d '{"text":"turn on the living room light","source":"manual"}' \ - ai-gateway.home-services.svc.cluster.local:50052 ai.v1.AIService/Query -``` - -### mTLS verification -```bash -# Should succeed (using mounted cert): -kubectl exec -n home-services deploy/ai-gateway -- /ai-gateway --selftest # if implemented -# Or inspect via openssl from within the pod if distroless allows a debug sidecar. -``` - ---- - -## 16. Rollout Order - -Implement in this order. Each step should compile and tests should pass before the next. - -1. **Proto + gen** — add `proto/ai/v1/ai.proto`, run `buf generate`, commit `gen/ai/v1/`. -2. **Scaffold** — create `services/ai-gateway/` with `go.mod`, `main.go` (stub), update `go.work`. -3. **Domain** — `intent.go`, `prompt.go`, `service.go` + unit tests with fakes. -4. **Ollama adapter** — HTTP client, manual curl-based validation against `192.168.7.96:11434`. -5. **ha-gateway adapter** — mTLS dial, wrap generated clients, satisfy `domain.HAClient`. -6. **Inbound gRPC adapter** — server, interceptors. -7. **Observability** — logging + OTel init. -8. **Entry point** — wire everything in `cmd/ai-gateway/main.go`. -9. **Dockerfile + CI** — build and push image to Gitea registry. -10. **Cert-manager Certificate** — apply `ai-gateway-client-cert.yaml`; verify `ai-gateway-client-tls` secret is created. -11. **Deployment manifest** — apply `ai-gateway.yaml`; verify pod ready, logs clean, `grpcurl` smoke test passes. -12. **discord-bot update** — add `aigateway` outbound adapter, remove any direct Ollama usage, redeploy. -13. **End-to-end test** — issue a Discord slash command, observe: - - Discord → ai-gateway → Ollama → ai-gateway → ha-gateway (mTLS) → HA → reply back. - - Traces visible in Tempo, logs in Loki, metrics in Prometheus. - ---- - -## 17. Open Questions / Deferred - -- **Auth on ai-gateway's inbound surface:** currently none. Revisit when `alexa-bridge` lands — Alexa path is public-ingress, so ai-gateway may eventually need mTLS inbound too. -- **Intent schema evolution:** if the set of intents grows meaningfully, consider moving the schema into the proto (enum + oneof) rather than free-form JSON. For now, JSON keeps the LLM prompt simple. -- **Conversation memory:** out of scope. If needed later, add a per-`source` session store (Valkey in `home-services`). -- **Prompt templates per model:** `llama3` works with the current system prompt. If swapping to a smaller model, prompt may need tuning — keep `BuildPrompt` easy to override via config. - ---- - -## 18. Acceptance Criteria - -- [ ] `ai-gateway` pod runs ready in `home-services` namespace. -- [ ] `grpcurl` smoke test (§15) returns a structured `QueryResponse` for a light command. -- [ ] Light actually turns on/off in Home Assistant when tested end-to-end. -- [ ] ha-gateway logs show mTLS handshake succeeded with CN=`ai-gateway`. -- [ ] Traces for a full Discord query show three spans: `discord-bot` → `ai-gateway` → `ha-gateway`. -- [ ] `discord-bot` contains no direct references to `OLLAMA_URL` or Ollama HTTP client code. -- [ ] Unit tests pass in CI; Docker image builds multiarch. \ No newline at end of file