From 88c9a77a588bd1105c9ca7af0b6951ba7f248233 Mon Sep 17 00:00:00 2001
From: Nik Afiq <nik.afiq98@ymail.com>
Date: Tue, 21 Apr 2026 23:22:13 +0900
Subject: [PATCH] feat: update OLLAMA_TIMEOUT to 120s and implement async
 command handling in Discord bot

---
 ai-gateway/.env.example                       |   2 +-
 ai-gateway/internal/config/config.go          |   2 +-
 discord-bot/cmd/bot/main.go                   |   9 +-
 .../adapters/primary/discord/async.go         |  46 ++
 .../adapters/primary/discord/async_test.go    |  58 ++
 .../adapters/primary/discord/handler.go       |  66 +-
 plan.md                                       | 586 ------------------
 7 files changed, 160 insertions(+), 609 deletions(-)
 create mode 100644 discord-bot/internal/adapters/primary/discord/async.go
 create mode 100644 discord-bot/internal/adapters/primary/discord/async_test.go
 delete mode 100644 plan.md

diff --git a/ai-gateway/.env.example b/ai-gateway/.env.example
index b28856a..338a71a 100644
--- a/ai-gateway/.env.example
+++ b/ai-gateway/.env.example
@@ -1,7 +1,7 @@
 GRPC_PORT=50052
 OLLAMA_URL=http://192.168.7.96:11434
 OLLAMA_MODEL=llama3
-OLLAMA_TIMEOUT=30s
+OLLAMA_TIMEOUT=120s
 HA_GATEWAY_ADDR=localhost:50051
 HA_GATEWAY_SERVER_NAME=ha-gateway.home-services.svc.cluster.local
 TLS_DIR=
diff --git a/ai-gateway/internal/config/config.go b/ai-gateway/internal/config/config.go
index 226b737..4b6221a 100644
--- a/ai-gateway/internal/config/config.go
+++ b/ai-gateway/internal/config/config.go
@@ -25,7 +25,7 @@ type Config struct {
 
 // Load reads configuration from environment variables and applies defaults.
 func Load() (*Config, error) {
-	ollamaTimeout, err := parseDurationEnv("OLLAMA_TIMEOUT", 30*time.Second)
+	ollamaTimeout, err := parseDurationEnv("OLLAMA_TIMEOUT", 120*time.Second)
 	if err != nil {
 		return nil, err
 	}
diff --git a/discord-bot/cmd/bot/main.go b/discord-bot/cmd/bot/main.go
index 55b959f..acef256 100644
--- a/discord-bot/cmd/bot/main.go
+++ b/discord-bot/cmd/bot/main.go
@@ -89,6 +89,7 @@ func main() {
 	modelStore := modelstore.New()
 	validator := modelvalidator.New(aiClient, 30*time.Second)
 	commandApp := app.NewCommandApp(haClient, aiClient, modelStore, validator)
+	tracker := discordadapter.NewTracker(context.Background())
 
 	// Discord-specific wiring stays at the edge so the app layer remains transport-agnostic.
 	session, err := discordgo.New("Bot " + cfg.DiscordToken)
@@ -98,7 +99,7 @@ func main() {
 	}
 	session.Identify.Intents = discordgo.IntentsGuilds
 
-	handler := discordadapter.NewHandler(commandApp)
+	handler := discordadapter.NewHandler(commandApp, tracker)
 	handler.Register(session)
 
 	if err := session.Open(); err != nil {
@@ -124,7 +125,11 @@ func main() {
 	)
 
 	<-ctx.Done()
-	log.Info("shutdown signal received, closing session")
+	log.Info("shutdown signal received, waiting for in-flight ai work")
+
+	if clean := tracker.Shutdown(25 * time.Second); !clean {
+		log.Warn("shutdown grace exceeded; some ai requests cancelled")
+	}
 
 	if err := session.Close(); err != nil {
 		log.Error("close discord session failed", "err", err)
diff --git a/discord-bot/internal/adapters/primary/discord/async.go b/discord-bot/internal/adapters/primary/discord/async.go
new file mode 100644
index 0000000..e9a043e
--- /dev/null
+++ b/discord-bot/internal/adapters/primary/discord/async.go
@@ -0,0 +1,46 @@
+package discord
+
+import (
+	"context"
+	"sync"
+	"time"
+)
+
+// Tracker coordinates in-flight async handlers so shutdown can wait briefly.
+type Tracker struct {
+	wg     sync.WaitGroup
+	ctx    context.Context
+	cancel context.CancelFunc
+}
+
+// NewTracker constructs a tracker with a cancellable root context.
+func NewTracker(parent context.Context) *Tracker {
+	ctx, cancel := context.WithCancel(parent)
+	return &Tracker{ctx: ctx, cancel: cancel}
+}
+
+// Go runs fn in a new goroutine and tracks its lifetime.
+func (t *Tracker) Go(fn func(ctx context.Context)) {
+	t.wg.Add(1)
+	go func() {
+		defer t.wg.Done()
+		fn(t.ctx)
+	}()
+}
+
+// Shutdown waits up to grace for all goroutines to finish, then cancels any stragglers.
+func (t *Tracker) Shutdown(grace time.Duration) bool {
+	done := make(chan struct{})
+	go func() {
+		t.wg.Wait()
+		close(done)
+	}()
+
+	select {
+	case <-done:
+		return true
+	case <-time.After(grace):
+		t.cancel()
+		return false
+	}
+}
diff --git a/discord-bot/internal/adapters/primary/discord/async_test.go b/discord-bot/internal/adapters/primary/discord/async_test.go
new file mode 100644
index 0000000..4dd4ef3
--- /dev/null
+++ b/discord-bot/internal/adapters/primary/discord/async_test.go
@@ -0,0 +1,58 @@
+package discord
+
+import (
+	"context"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+func TestTrackerShutdownCompletes(t *testing.T) {
+	tracker := NewTracker(context.Background())
+	done := make(chan struct{})
+	tracker.Go(func(ctx context.Context) {
+		close(done)
+	})
+	if clean := tracker.Shutdown(time.Second); !clean {
+		t.Fatal("Shutdown() = false, want true")
+	}
+	select {
+	case <-done:
+	default:
+		t.Fatal("goroutine did not run")
+	}
+}
+
+func TestTrackerShutdownMultiple(t *testing.T) {
+	tracker := NewTracker(context.Background())
+	var count atomic.Int32
+	for range 2 {
+		tracker.Go(func(ctx context.Context) {
+			time.Sleep(100 * time.Millisecond)
+			count.Add(1)
+		})
+	}
+	if clean := tracker.Shutdown(time.Second); !clean {
+		t.Fatal("Shutdown() = false, want true")
+	}
+	if count.Load() != 2 {
+		t.Fatalf("count = %d, want 2", count.Load())
+	}
+}
+
+func TestTrackerShutdownTimeoutCancels(t *testing.T) {
+	tracker := NewTracker(context.Background())
+	done := make(chan struct{})
+	tracker.Go(func(ctx context.Context) {
+		defer close(done)
+		<-ctx.Done()
+	})
+	if clean := tracker.Shutdown(50 * time.Millisecond); clean {
+		t.Fatal("Shutdown() = true, want false")
+	}
+	select {
+	case <-done:
+	case <-time.After(time.Second):
+		t.Fatal("goroutine was not cancelled")
+	}
+}
diff --git a/discord-bot/internal/adapters/primary/discord/handler.go b/discord-bot/internal/adapters/primary/discord/handler.go
index 5d6bf9e..52096aa 100644
--- a/discord-bot/internal/adapters/primary/discord/handler.go
+++ b/discord-bot/internal/adapters/primary/discord/handler.go
@@ -2,6 +2,7 @@ package discord
 
 import (
 	"context"
+	"errors"
 	"fmt"
 	"log/slog"
 	"strings"
@@ -13,6 +14,8 @@ import (
 	"go.opentelemetry.io/otel"
 	"go.opentelemetry.io/otel/attribute"
 	"go.opentelemetry.io/otel/trace"
+	"google.golang.org/grpc/codes"
+	grpcstatus "google.golang.org/grpc/status"
 )
 
 var tracer = otel.Tracer("discord-bot/commands")
@@ -34,12 +37,13 @@ type commandHandler interface {
 
 // Handler adapts Discord interactions to the command application layer.
 type Handler struct {
-	app commandHandler
+	app     commandHandler
+	tracker *Tracker
 }
 
 // NewHandler constructs the Discord interaction adapter.
-func NewHandler(app commandHandler) *Handler {
-	return &Handler{app: app}
+func NewHandler(app commandHandler, tracker *Tracker) *Handler {
+	return &Handler{app: app, tracker: tracker}
 }
 
 // Register attaches the interaction handler to the Discord session.
@@ -163,18 +167,34 @@ func (h *Handler) handleApplicationCommand(ctx context.Context, s *discordgo.Ses
 			)
 			return
 		}
-		msg, err := h.app.HandleAIQuery(ctx, requiredStringOption(target, "text"))
-		h.followup(ctx, s, i.Interaction, msg, true, start, err)
+		text := requiredStringOption(target, "text")
+		interaction := i.Interaction
+		reqLog := logger.FromContext(ctx)
+		h.tracker.Go(func(trackerCtx context.Context) {
+			asyncCtx := logger.WithLogger(trackerCtx, reqLog)
+			asyncCtx, cancel := context.WithTimeout(asyncCtx, 5*time.Minute)
+			defer cancel()
+
+			msg, err := h.app.HandleAIQuery(asyncCtx, text)
+			if err != nil {
+				reqLog.Error("ai command failed", "error", err.Error())
+				msg = mapAIError(err)
+			}
+			if _, followErr := s.FollowupMessageCreate(interaction, true, &discordgo.WebhookParams{
+				Content: msg,
+				Flags:   discordgo.MessageFlagsEphemeral,
+			}); followErr != nil {
+				reqLog.Error("discord response failed", "error", followErr.Error())
+			}
+		})
 	case "ai.model.set":
-		if err := h.deferResponse(s, i.Interaction, true); err != nil {
-			log.Error("discord response failed",
-				"duration_ms", time.Since(start).Milliseconds(),
-				"error", err.Error(),
-			)
+		msg, err := h.app.HandleAIModelSet(ctx, requiredStringOption(target, "name"))
+		if err != nil {
+			h.respondError(ctx, s, i.Interaction, true, start, err)
 			return
 		}
-		msg, err := h.app.HandleAIModelSet(ctx, requiredStringOption(target, "name"))
-		h.followup(ctx, s, i.Interaction, msg, true, start, err)
+		h.respondMessage(ctx, s, i.Interaction, msg, true)
+		log.Info("command handled", "duration_ms", time.Since(start).Milliseconds())
 	case "ai.model.get":
 		msg, err := h.app.HandleAIModelGet(ctx)
 		if err != nil {
@@ -184,15 +204,13 @@ func (h *Handler) handleApplicationCommand(ctx context.Context, s *discordgo.Ses
 		h.respondMessage(ctx, s, i.Interaction, msg, true)
 		log.Info("command handled", "duration_ms", time.Since(start).Milliseconds())
 	case "ai.model.list":
-		if err := h.deferResponse(s, i.Interaction, true); err != nil {
-			log.Error("discord response failed",
-				"duration_ms", time.Since(start).Milliseconds(),
-				"error", err.Error(),
-			)
+		msg, err := h.app.HandleAIModelList(ctx)
+		if err != nil {
+			h.respondError(ctx, s, i.Interaction, true, start, err)
 			return
 		}
-		msg, err := h.app.HandleAIModelList(ctx)
-		h.followup(ctx, s, i.Interaction, msg, true, start, err)
+		h.respondMessage(ctx, s, i.Interaction, msg, true)
+		log.Info("command handled", "duration_ms", time.Since(start).Milliseconds())
 	default:
 		h.respondError(ctx, s, i.Interaction, true, start, fmt.Errorf("unsupported command: %s", commandPath))
 	}
@@ -310,6 +328,16 @@ func (h *Handler) followup(ctx context.Context, s *discordgo.Session, interactio
 	}
 }
 
+func mapAIError(err error) string {
+	if errors.Is(err, context.DeadlineExceeded) {
+		return "The AI took too long to respond. Try again later."
+	}
+	if status, ok := grpcstatus.FromError(err); ok && status.Code() == codes.Unavailable {
+		return "The AI service is unreachable right now. Try again in a moment."
+	}
+	return "Sorry, something went wrong handling that AI request."
+}
+
 // interactionLogger adds stable interaction metadata without logging noisy option values.
 func interactionLogger(ctx context.Context, i *discordgo.InteractionCreate) *slog.Logger {
 	log := logger.FromContext(ctx)
diff --git a/plan.md b/plan.md
deleted file mode 100644
index 58bee62..0000000
--- a/plan.md
+++ /dev/null
@@ -1,586 +0,0 @@
-# ai-gateway — Implementation Plan
-
-This plan describes the implementation of a new Go microservice, `ai-gateway`, in the `home-services` monorepo (`gitea.nik4nao.com/nik/home-services`). It centralizes all AI/LLM logic behind a gRPC API so callers (`discord-bot`, `alexa-bridge`) remain thin transport adapters with zero AI knowledge.
-
----
-
-## 1. Goals & Non-Goals
-
-### Goals
-- New gRPC service `ai-gateway` listening on `:50052`.
-- Owns **all** AI logic: Ollama connection, prompt construction, LLM intent parsing, dispatch to `ha-gateway`.
-- Callers send raw user text via `QueryRequest`; receive a human-readable reply in `QueryResponse`.
-- mTLS client authentication when calling `ha-gateway` (ha-gateway requires mTLS).
-- Hexagonal architecture, matching the existing `ha-gateway` layout.
-- Structured logging via `slog`, OTel OTLP gRPC traces/metrics.
-- Deployed to the `home-services` namespace on K3s.
-
-### Non-Goals
-- No auth on `ai-gateway`'s own inbound gRPC surface in this iteration (in-cluster only; match current `ha-gateway` posture).
-- No streaming responses — unary only.
-- No conversation memory — each `Query` is stateless.
-- No new Home Assistant features beyond what `ha-gateway` already exposes (LightService + EntityService).
-
----
-
-## 2. Repository Layout
-
-All paths are relative to the `home-services` repo root.
-
-```
-proto/
-  ai/v1/ai.proto                          # NEW
-
-gen/
-  ai/v1/                                  # NEW (generated; committed)
-    ai.pb.go
-    ai_grpc.pb.go
-
-services/
-  ai-gateway/                             # NEW
-    go.mod
-    cmd/
-      ai-gateway/
-        main.go
-    config/
-      config.go
-    domain/
-      prompt.go
-      service.go
-      intent.go
-    adapters/
-      inbound/
-        grpc/
-          server.go
-      outbound/
-        ollama/
-          client.go
-        hagateway/
-          client.go
-    internal/
-      observability/
-        logging.go
-        otel.go
-    Dockerfile
-    .dockerignore
-  discord-bot/                            # MODIFIED
-    adapters/outbound/aigateway/client.go # NEW
-    (remove any direct Ollama code if present)
-```
-
-Also update:
-- `go.work` — add `./services/ai-gateway` and keep `replace` directive to `../gen`.
-- `buf.gen.yaml` / `buf.yaml` — include the new `ai/v1` proto package.
-
----
-
-## 3. Proto Definition
-
-### File: `proto/ai/v1/ai.proto`
-
-```proto
-syntax = "proto3";
-
-package ai.v1;
-
-option go_package = "gitea.nik4nao.com/nik/home-services/gen/ai/v1;aiv1";
-
-// AIService accepts free-form natural language queries and returns a
-// human-readable reply. It encapsulates LLM prompting, intent parsing,
-// and dispatch to downstream services (e.g. ha-gateway).
-service AIService {
-  rpc Query(QueryRequest) returns (QueryResponse);
-}
-
-message QueryRequest {
-  // Raw user text, e.g. "turn on the living room light".
-  string text = 1;
-
-  // Optional caller identifier for logging/tracing (e.g. "discord-bot").
-  string source = 2;
-}
-
-message QueryResponse {
-  // Human-readable reply to show the user.
-  string reply = 1;
-
-  // Parsed intent name, if any. Empty if no actionable intent was detected.
-  string intent = 2;
-
-  // True if an action was dispatched to a downstream service.
-  bool action_taken = 3;
-}
-```
-
-### Generation
-- Run `buf generate` from repo root.
-- Commit `gen/ai/v1/*.pb.go` and `gen/ai/v1/*_grpc.pb.go` (per existing convention — `gen/` is committed to avoid CI codegen dependency).
-
----
-
-## 4. Configuration (`services/ai-gateway/config/config.go`)
-
-Load from environment. Use `os.Getenv` with defaults (matches existing ha-gateway style — no new dep).
-
-| Env Var                       | Default                                               | Purpose                                          |
-| ----------------------------- | ----------------------------------------------------- | ------------------------------------------------ |
-| `GRPC_LISTEN_ADDR`            | `:50052`                                              | Inbound gRPC bind address                        |
-| `OLLAMA_URL`                  | `http://192.168.7.96:11434`                           | Ollama HTTP API (direct LAN IP; no K8s Service)  |
-| `OLLAMA_MODEL`                | `llama3`                                              | Model name                                       |
-| `OLLAMA_TIMEOUT`              | `30s`                                                 | HTTP timeout for Ollama calls                    |
-| `HA_GATEWAY_ADDR`             | `ha-gateway.home-services.svc.cluster.local:50051`    | ha-gateway gRPC endpoint                         |
-| `HA_GATEWAY_TLS_CA_FILE`      | `/etc/ai-gateway/tls/ca.crt`                          | CA cert that signed ha-gateway's server cert     |
-| `HA_GATEWAY_TLS_CERT_FILE`    | `/etc/ai-gateway/tls/tls.crt`                         | ai-gateway's client cert (for mTLS)              |
-| `HA_GATEWAY_TLS_KEY_FILE`     | `/etc/ai-gateway/tls/tls.key`                         | ai-gateway's client key                          |
-| `HA_GATEWAY_SERVER_NAME`      | `ha-gateway.home-services.svc.cluster.local`          | SNI / cert verification name                     |
-| `LOG_LEVEL`                   | `info`                                                | `debug`/`info`/`warn`/`error`                    |
-| `LOG_FORMAT`                  | `json`                                                | `json` or `text`                                 |
-| `OTEL_EXPORTER_OTLP_ENDPOINT` | `otel-collector-opentelemetry-collector.monitoring.svc.cluster.local:4317` | OTLP gRPC endpoint |
-| `OTEL_SERVICE_NAME`           | `ai-gateway`                                          | Service name for traces/metrics                  |
-
-Provide a `Config` struct with a `Load()` function returning `(Config, error)`. Validate required files exist at startup.
-
----
-
-## 5. Domain Layer
-
-### `domain/intent.go`
-
-Define the intent contract the LLM must produce:
-
-```go
-package domain
-
-type Intent struct {
-    Name    string            `json:"intent"`   // e.g. "turn_on_light", "turn_off_light", "none"
-    Entity  string            `json:"entity"`   // e.g. "living_room" (friendly name or entity_id)
-    Params  map[string]string `json:"params"`   // optional, e.g. {"brightness":"80"}
-    Reply   string            `json:"reply"`    // what to say back to the user
-}
-
-const (
-    IntentNone         = "none"
-    IntentTurnOnLight  = "turn_on_light"
-    IntentTurnOffLight = "turn_off_light"
-    IntentListEntities = "list_entities"
-)
-```
-
-### `domain/prompt.go`
-
-Build the Ollama prompt. The system prompt MUST instruct the model to return **only** a single JSON object matching the `Intent` schema. No markdown fences, no prose.
-
-```go
-package domain
-
-import "fmt"
-
-const systemPrompt = `You are a home automation assistant. Given a user request, respond with a single JSON object and nothing else — no markdown, no code fences, no explanation.
-
-Schema:
-{
-  "intent": "turn_on_light" | "turn_off_light" | "list_entities" | "none",
-  "entity": "<friendly_name_or_empty>",
-  "params": { "<key>": "<value>" },
-  "reply":  "<short human-readable reply>"
-}
-
-Rules:
-- If the request is not actionable, use intent="none" and put the conversational answer in "reply".
-- Always include all four fields. Use "" or {} for empty values.
-- Do not wrap the JSON in backticks.`
-
-func BuildPrompt(userText string) string {
-    return fmt.Sprintf("%s\n\nUser: %s", systemPrompt, userText)
-}
-```
-
-### `domain/service.go`
-
-The orchestrator. Depends on two ports (interfaces) defined here:
-
-```go
-package domain
-
-import "context"
-
-type LLMClient interface {
-    Generate(ctx context.Context, prompt string) (string, error)
-}
-
-type HAClient interface {
-    TurnOnLight(ctx context.Context, entity string, params map[string]string) error
-    TurnOffLight(ctx context.Context, entity string) error
-    ListEntities(ctx context.Context) ([]string, error)
-}
-
-type Service struct {
-    llm LLMClient
-    ha  HAClient
-    log *slog.Logger
-}
-
-func NewService(llm LLMClient, ha HAClient, log *slog.Logger) *Service { /* ... */ }
-
-type QueryResult struct {
-    Reply       string
-    Intent      string
-    ActionTaken bool
-}
-
-func (s *Service) Query(ctx context.Context, text string) (QueryResult, error) {
-    // 1. BuildPrompt(text)
-    // 2. s.llm.Generate(ctx, prompt)
-    // 3. json.Unmarshal into Intent
-    //    - On unmarshal error: log at warn, return reply = "I didn't understand that."
-    // 4. switch intent.Name:
-    //      turn_on_light  -> s.ha.TurnOnLight(...)
-    //      turn_off_light -> s.ha.TurnOffLight(...)
-    //      list_entities  -> s.ha.ListEntities(...); format into reply
-    //      none / default -> reply = intent.Reply
-    // 5. Return QueryResult
-}
-```
-
-**Error handling:**
-- LLM call failure → return error; inbound adapter maps to gRPC `Unavailable`.
-- JSON parse failure → do NOT error; return a friendly "I didn't understand" reply and log the raw LLM output at `warn` with the original text (not error).
-- HA dispatch failure → log at `error`, return reply "I couldn't reach Home Assistant right now."; `ActionTaken=false`.
-
----
-
-## 6. Outbound Adapters
-
-### `adapters/outbound/ollama/client.go`
-
-- Plain `net/http.Client` with configured timeout.
-- POST to `{OLLAMA_URL}/api/generate` with body:
-  ```json
-  { "model": "<OLLAMA_MODEL>", "prompt": "<prompt>", "stream": false }
-  ```
-- Decode JSON response, return the `response` field as a string.
-- Implement `domain.LLMClient`.
-- Wrap the HTTP client with OTel instrumentation (`otelhttp.NewTransport`).
-
-### `adapters/outbound/hagateway/client.go`
-
-This is the mTLS-critical piece.
-
-- Construct a `*grpc.ClientConn` to `HA_GATEWAY_ADDR` with TLS credentials built from the three cert files:
-  ```go
-  func loadTLSCredentials(caFile, certFile, keyFile, serverName string) (credentials.TransportCredentials, error) {
-      caPEM, err := os.ReadFile(caFile)
-      if err != nil { return nil, fmt.Errorf("read ca: %w", err) }
-      cp := x509.NewCertPool()
-      if !cp.AppendCertsFromPEM(caPEM) {
-          return nil, errors.New("failed to append CA cert")
-      }
-      clientCert, err := tls.LoadX509KeyPair(certFile, keyFile)
-      if err != nil { return nil, fmt.Errorf("load client keypair: %w", err) }
-      return credentials.NewTLS(&tls.Config{
-          Certificates: []tls.Certificate{clientCert},
-          RootCAs:      cp,
-          ServerName:   serverName,
-          MinVersion:   tls.VersionTLS13,
-      }), nil
-  }
-  ```
-- Use `grpc.NewClient(addr, grpc.WithTransportCredentials(creds), grpc.WithStatsHandler(otelgrpc.NewClientHandler()))`.
-- Wrap the generated ha-gateway clients (`LightServiceClient`, `EntityServiceClient`) to satisfy `domain.HAClient`.
-- Expose a `Close()` method for graceful shutdown.
-
-**Cert source:** the cert files will be projected into the pod via a Kubernetes `Secret` mounted at `/etc/ai-gateway/tls/`. See deployment manifest below. Issuing the cert is covered in §10.
-
----
-
-## 7. Inbound Adapter
-
-### `adapters/inbound/grpc/server.go`
-
-- Implements `aiv1.AIServiceServer`.
-- `Query(ctx, req)` → calls `domain.Service.Query(ctx, req.Text)` → maps `QueryResult` to `QueryResponse`.
-- Attach OTel interceptor: `grpc.StatsHandler(otelgrpc.NewServerHandler())`.
-- Attach a slog unary interceptor that logs method, duration, caller `source`, and error code.
-- Register reflection service only if `LOG_LEVEL=debug` (convenience for `grpcurl`).
-
----
-
-## 8. Observability (`internal/observability/`)
-
-Copy the pattern from `ha-gateway`:
-
-### `logging.go`
-- `NewLogger(level, format string) *slog.Logger` returning either `slog.NewJSONHandler` or `slog.NewTextHandler` wrapping `os.Stdout`.
-
-### `otel.go`
-- `InitOTel(ctx, endpoint, serviceName) (shutdown func(context.Context) error, err error)`.
-- Uses `otlptracegrpc` + `otlpmetricgrpc` exporters, insecure credentials (in-cluster).
-- Sets global `TracerProvider` and `MeterProvider`.
-- Resource attributes: `service.name`, `service.namespace=home-services`.
-
----
-
-## 9. Entry Point (`cmd/ai-gateway/main.go`)
-
-Standard startup sequence:
-
-1. Load config.
-2. Build logger.
-3. Init OTel; defer shutdown.
-4. Build Ollama client.
-5. Build ha-gateway client (mTLS); defer `Close()`.
-6. Build domain service.
-7. Build gRPC server with interceptors, register `AIService`.
-8. Listen on `GRPC_LISTEN_ADDR`.
-9. Handle `SIGINT`/`SIGTERM` for graceful shutdown: `server.GracefulStop()` with a 10s timeout, then OTel shutdown.
-
----
-
-## 10. TLS / mTLS Plumbing
-
-`ha-gateway` requires mTLS. `ai-gateway` needs a client certificate signed by the same CA that ha-gateway trusts.
-
-### Approach: cert-manager + internal-ca-issuer
-
-Create a `Certificate` resource for `ai-gateway` (file: `manifests/home-services/ai-gateway-client-cert.yaml`):
-
-```yaml
-apiVersion: cert-manager.io/v1
-kind: Certificate
-metadata:
-  name: ai-gateway-client
-  namespace: home-services
-spec:
-  secretName: ai-gateway-client-tls
-  duration: 2160h      # 90d
-  renewBefore: 360h    # 15d
-  subject:
-    organizations: [home-services]
-  commonName: ai-gateway
-  usages:
-    - client auth
-  issuerRef:
-    name: internal-ca-issuer
-    kind: ClusterIssuer
-    group: cert-manager.io
-```
-
-**Important:** use `internal-ca-issuer` (the CA issuer), **never** `internal-ca` (the bootstrap self-signed issuer). This matches the homelab convention.
-
-The resulting secret `ai-gateway-client-tls` contains `tls.crt`, `tls.key`, and `ca.crt` — mount all three.
-
-### Verify ha-gateway's CA trust
-Confirm ha-gateway's server TLS config trusts `internal-ca-issuer`'s CA (it should, since both use the same cluster CA). If ha-gateway uses a separate client-auth CA, adjust the issuer accordingly.
-
----
-
-## 11. Kubernetes Manifest
-
-### File: `manifests/home-services/ai-gateway.yaml`
-
-Single file with `---` separators per repo convention.
-
-```yaml
-apiVersion: v1
-kind: Service
-metadata:
-  name: ai-gateway
-  namespace: home-services
-spec:
-  selector: { app: ai-gateway }
-  ports:
-    - name: grpc
-      port: 50052
-      targetPort: 50052
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: ai-gateway
-  namespace: home-services
-spec:
-  replicas: 1
-  selector: { matchLabels: { app: ai-gateway } }
-  template:
-    metadata:
-      labels: { app: ai-gateway }
-    spec:
-      containers:
-        - name: ai-gateway
-          image: gitea.nik4nao.com/nik/ai-gateway:latest
-          imagePullPolicy: Always
-          ports:
-            - containerPort: 50052
-              name: grpc
-          env:
-            - { name: GRPC_LISTEN_ADDR, value: ":50052" }
-            - { name: OLLAMA_URL, value: "http://192.168.7.96:11434" }
-            - { name: OLLAMA_MODEL, value: "llama3" }
-            - { name: HA_GATEWAY_ADDR, value: "ha-gateway.home-services.svc.cluster.local:50051" }
-            - { name: HA_GATEWAY_TLS_CA_FILE,   value: "/etc/ai-gateway/tls/ca.crt" }
-            - { name: HA_GATEWAY_TLS_CERT_FILE, value: "/etc/ai-gateway/tls/tls.crt" }
-            - { name: HA_GATEWAY_TLS_KEY_FILE,  value: "/etc/ai-gateway/tls/tls.key" }
-            - { name: HA_GATEWAY_SERVER_NAME,   value: "ha-gateway.home-services.svc.cluster.local" }
-            - { name: LOG_LEVEL,  value: "info" }
-            - { name: LOG_FORMAT, value: "json" }
-            - { name: OTEL_EXPORTER_OTLP_ENDPOINT,
-                value: "otel-collector-opentelemetry-collector.monitoring.svc.cluster.local:4317" }
-            - { name: OTEL_SERVICE_NAME, value: "ai-gateway" }
-          volumeMounts:
-            - name: tls
-              mountPath: /etc/ai-gateway/tls
-              readOnly: true
-          readinessProbe:
-            tcpSocket: { port: 50052 }
-            initialDelaySeconds: 3
-            periodSeconds: 10
-          livenessProbe:
-            tcpSocket: { port: 50052 }
-            initialDelaySeconds: 10
-            periodSeconds: 20
-      volumes:
-        - name: tls
-          secret:
-            secretName: ai-gateway-client-tls
-      imagePullSecrets:
-        - name: gitea-registry
-```
-
-No resource `limits`/`requests` yet — matches current repo convention (memory limits not yet enforced on pods).
-
----
-
-## 12. discord-bot Changes
-
-### New: `services/discord-bot/adapters/outbound/aigateway/client.go`
-- gRPC client to `ai-gateway.home-services.svc.cluster.local:50052`, **plaintext** (no auth on ai-gateway's inbound surface yet).
-- Exposes `Query(ctx, text string) (reply string, err error)`.
-- Inject into existing command handler.
-
-### Removed / simplified
-- If `discord-bot` currently contains any direct Ollama calls, remove them.
-- Slash command handler for free-form queries simply calls `aigateway.Query(ctx, msg.Content)` and posts the returned reply.
-- Event-notification path (existing Discord → notify flow) is untouched.
-
-### Config additions to discord-bot
-- `AI_GATEWAY_ADDR` (default `ai-gateway.home-services.svc.cluster.local:50052`).
-
----
-
-## 13. CI / Build
-
-### `services/ai-gateway/Dockerfile`
-Multi-stage build matching existing services:
-
-```dockerfile
-FROM golang:1.26 AS build
-WORKDIR /src
-COPY go.work go.work.sum ./
-COPY gen ./gen
-COPY services/ai-gateway ./services/ai-gateway
-WORKDIR /src/services/ai-gateway
-RUN CGO_ENABLED=0 go build -o /out/ai-gateway ./cmd/ai-gateway
-
-FROM gcr.io/distroless/static-debian12:nonroot
-COPY --from=build /out/ai-gateway /ai-gateway
-USER nonroot:nonroot
-ENTRYPOINT ["/ai-gateway"]
-```
-
-### Gitea Actions workflow
-Mirror the existing `ha-gateway` workflow:
-- Trigger on pushes touching `services/ai-gateway/**`, `gen/ai/**`, or `proto/ai/**`.
-- `docker buildx` multiarch build (`linux/amd64,linux/arm64`).
-- Push to `gitea.nik4nao.com/nik/ai-gateway:latest` and `:${{ github.sha }}`.
-- Use the Gitea API token (`read:package` + `write:package`) as registry password — **not** the account password.
-- Remember: buildkit CA must be injected each run (existing runner pattern).
-
----
-
-## 14. Workspace Wiring
-
-### `go.work` — add line:
-```
-use ./services/ai-gateway
-```
-Keep the existing `replace gitea.nik4nao.com/nik/home-services/gen => ../gen` in `services/ai-gateway/go.mod`.
-
-### `services/ai-gateway/go.mod` dependencies
-- `google.golang.org/grpc`
-- `google.golang.org/protobuf`
-- `go.opentelemetry.io/otel`
-- `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc`
-- `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc`
-- `go.opentelemetry.io/otel/sdk`
-- `go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc`
-- `go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp`
-
----
-
-## 15. Testing
-
-### Unit tests (`services/ai-gateway/domain/service_test.go`)
-- Fake `LLMClient` returning canned JSON strings for each intent.
-- Fake `HAClient` recording calls.
-- Assert:
-  - Valid `turn_on_light` JSON → `HAClient.TurnOnLight` called with correct entity; reply matches.
-  - Invalid JSON → graceful reply, no panic, no HA call.
-  - `intent="none"` → no HA call; reply passed through.
-  - HA call returning error → reply contains "couldn't reach Home Assistant"; `ActionTaken=false`.
-
-### Integration smoke test (manual, post-deploy)
-```bash
-# From inside the cluster:
-grpcurl -plaintext -d '{"text":"turn on the living room light","source":"manual"}' \
-  ai-gateway.home-services.svc.cluster.local:50052 ai.v1.AIService/Query
-```
-
-### mTLS verification
-```bash
-# Should succeed (using mounted cert):
-kubectl exec -n home-services deploy/ai-gateway -- /ai-gateway --selftest  # if implemented
-# Or inspect via openssl from within the pod if distroless allows a debug sidecar.
-```
-
----
-
-## 16. Rollout Order
-
-Implement in this order. Each step should compile and tests should pass before the next.
-
-1. **Proto + gen** — add `proto/ai/v1/ai.proto`, run `buf generate`, commit `gen/ai/v1/`.
-2. **Scaffold** — create `services/ai-gateway/` with `go.mod`, `main.go` (stub), update `go.work`.
-3. **Domain** — `intent.go`, `prompt.go`, `service.go` + unit tests with fakes.
-4. **Ollama adapter** — HTTP client, manual curl-based validation against `192.168.7.96:11434`.
-5. **ha-gateway adapter** — mTLS dial, wrap generated clients, satisfy `domain.HAClient`.
-6. **Inbound gRPC adapter** — server, interceptors.
-7. **Observability** — logging + OTel init.
-8. **Entry point** — wire everything in `cmd/ai-gateway/main.go`.
-9. **Dockerfile + CI** — build and push image to Gitea registry.
-10. **Cert-manager Certificate** — apply `ai-gateway-client-cert.yaml`; verify `ai-gateway-client-tls` secret is created.
-11. **Deployment manifest** — apply `ai-gateway.yaml`; verify pod ready, logs clean, `grpcurl` smoke test passes.
-12. **discord-bot update** — add `aigateway` outbound adapter, remove any direct Ollama usage, redeploy.
-13. **End-to-end test** — issue a Discord slash command, observe:
-    - Discord → ai-gateway → Ollama → ai-gateway → ha-gateway (mTLS) → HA → reply back.
-    - Traces visible in Tempo, logs in Loki, metrics in Prometheus.
-
----
-
-## 17. Open Questions / Deferred
-
-- **Auth on ai-gateway's inbound surface:** currently none. Revisit when `alexa-bridge` lands — Alexa path is public-ingress, so ai-gateway may eventually need mTLS inbound too.
-- **Intent schema evolution:** if the set of intents grows meaningfully, consider moving the schema into the proto (enum + oneof) rather than free-form JSON. For now, JSON keeps the LLM prompt simple.
-- **Conversation memory:** out of scope. If needed later, add a per-`source` session store (Valkey in `home-services`).
-- **Prompt templates per model:** `llama3` works with the current system prompt. If swapping to a smaller model, prompt may need tuning — keep `BuildPrompt` easy to override via config.
-
----
-
-## 18. Acceptance Criteria
-
-- [ ] `ai-gateway` pod runs ready in `home-services` namespace.
-- [ ] `grpcurl` smoke test (§15) returns a structured `QueryResponse` for a light command.
-- [ ] Light actually turns on/off in Home Assistant when tested end-to-end.
-- [ ] ha-gateway logs show mTLS handshake succeeded with CN=`ai-gateway`.
-- [ ] Traces for a full Discord query show three spans: `discord-bot` → `ai-gateway` → `ha-gateway`.
-- [ ] `discord-bot` contains no direct references to `OLLAMA_URL` or Ollama HTTP client code.
-- [ ] Unit tests pass in CI; Docker image builds multiarch.
\ No newline at end of file