feat: add ai-gateway microservice with gRPC API for AI logic

- Implemented new gRPC service `AIService` in `proto/ai/v1/ai.proto` for handling natural language queries. - Generated Go code for the gRPC service and messages in `gen/ai/v1/`. - Created `services/ai-gateway/` directory structure with necessary files for the service. - Added configuration loading and structured logging. - Implemented domain logic for intent parsing and interaction with Home Assistant. - Established outbound adapters for Ollama and Home Assistant with mTLS support. - Updated `go.work` to include the new service and maintain existing dependencies. - Modified `discord-bot` to use the new `ai-gateway` for AI interactions. - Added deployment manifest for Kubernetes and CI/CD configuration for building and deploying the service.
2026-04-21 21:52:28 +09:00 · 2026-04-21 21:52:28 +09:00 · 520f5d1ffb
commit 520f5d1ffb
parent fb62076fbc
27 changed files with 2306 additions and 12 deletions
--- a/.gitea/workflows/ci.yaml
+++ b/.gitea/workflows/ci.yaml
@ -28,6 +28,7 @@ jobs:
          go-version-file: go.work
          cache-dependency-path: |
            go.work.sum
+            ai-gateway/go.sum
            ha-gateway/go.sum
            discord-bot/go.sum
            gen/go.sum
@ -35,15 +36,42 @@ jobs:
      - name: go vet
        run: |
          cd gen && go vet ./...
+          cd ../ai-gateway && go vet ./...
          cd ../ha-gateway && go vet ./...
          cd ../discord-bot && go vet ./...

      - name: go test
        run: |
          cd gen && go test ./...
+          cd ../ai-gateway && go test ./...
          cd ../ha-gateway && go test ./...
          cd ../discord-bot && go test ./...

+  build-ai-gateway:
+    needs: test
+    if: github.ref == 'refs/heads/main'
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: docker/setup-buildx-action@v4
+
+      - uses: docker/login-action@v4
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ secrets.REGISTRY_USER }}
+          password: ${{ secrets.REGISTRY_PASSWORD }}
+
+      - uses: docker/build-push-action@v7
+        with:
+          context: .
+          file: ai-gateway/Dockerfile
+          push: true
+          platforms: linux/amd64
+          tags: |
+            ${{ env.IMAGE_PREFIX }}/ai-gateway:${{ github.sha }}
+            ${{ env.IMAGE_PREFIX }}/ai-gateway:latest
+
  build-ha-gateway:
    needs: test
    if: github.ref == 'refs/heads/main'
--- a/ai-gateway/.dockerignore
+++ b/ai-gateway/.dockerignore
@ -0,0 +1,3 @@
+.env
+.git
+*.log
--- a/ai-gateway/.env.example
+++ b/ai-gateway/.env.example
@ -0,0 +1,11 @@
+GRPC_PORT=50052
+OLLAMA_URL=http://192.168.7.96:11434
+OLLAMA_MODEL=llama3
+OLLAMA_TIMEOUT=30s
+HA_GATEWAY_ADDR=localhost:50051
+HA_GATEWAY_SERVER_NAME=ha-gateway.home-services.svc.cluster.local
+TLS_DIR=
+OTEL_ENDPOINT=
+LOG_LEVEL=info
+LOG_FORMAT=text
+LIGHT_CACHE_TTL=60s
--- a/ai-gateway/Dockerfile
+++ b/ai-gateway/Dockerfile
@ -0,0 +1,21 @@
+FROM golang:1.26-alpine AS builder
+WORKDIR /workspace
+
+COPY go.work go.work.sum ./
+COPY gen/ ./gen/
+COPY ha-gateway/ ./ha-gateway/
+COPY discord-bot/ ./discord-bot/
+COPY ai-gateway/ ./ai-gateway/
+
+WORKDIR /workspace/ai-gateway
+RUN go mod download
+
+ARG VERSION=dev
+RUN CGO_ENABLED=0 GOOS=linux go build \
+  -ldflags="-s -w -X main.version=${VERSION}" \
+  -o /ai-gateway ./cmd/gateway
+
+FROM gcr.io/distroless/static:nonroot
+COPY --from=builder /ai-gateway /ai-gateway
+EXPOSE 50052
+ENTRYPOINT ["/ai-gateway"]
--- a/ai-gateway/cmd/gateway/main.go
+++ b/ai-gateway/cmd/gateway/main.go
@ -0,0 +1,160 @@
+package main
+
+import (
+	"context"
+	"crypto/tls"
+	"crypto/x509"
+	"fmt"
+	"log/slog"
+	"net"
+	"net/http"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"syscall"
+
+	"github.com/joho/godotenv"
+	"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials"
+	"google.golang.org/grpc/health"
+	grpc_health_v1 "google.golang.org/grpc/health/grpc_health_v1"
+	"google.golang.org/grpc/reflection"
+
+	aigrpc "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/adapters/primary/grpc"
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/adapters/secondary/hagateway"
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/adapters/secondary/ollama"
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/app"
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/config"
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/domain"
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/logger"
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/telemetry"
+	aiv1 "gitea.nik4nao.com/nik/home-services/gen/ai/v1"
+)
+
+// version is set at build time via -ldflags "-X main.version=<tag>".
+var version = "dev"
+
+func main() {
+	_ = godotenv.Load()
+
+	cfg, err := config.Load()
+	if err != nil {
+		os.Stderr.WriteString("config error: " + err.Error() + "\n")
+		os.Exit(1)
+	}
+
+	log := logger.New(cfg.LogFormat, cfg.LogLevel)
+	slog.SetDefault(log)
+	log.Info("starting ai-gateway",
+		"version", version,
+		"grpc_port", cfg.GRPCPort,
+		"ollama_url", cfg.OllamaURL,
+		"ollama_model", cfg.OllamaModel,
+		"ha_gateway_addr", cfg.HAGatewayAddr,
+		"tls_dir", cfg.TLSDir,
+		"otel_endpoint", cfg.OTELEndpoint,
+		"light_cache_ttl", cfg.LightCacheTTL.String(),
+	)
+
+	ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT)
+	defer stop()
+	ctx = logger.WithLogger(ctx, log)
+
+	shutdown, err := telemetry.Setup(ctx, "ai-gateway", version, cfg)
+	if err != nil {
+		log.Error("telemetry setup failed", "err", err)
+		os.Exit(1)
+	}
+
+	ollamaClient := ollama.New(cfg.OllamaURL, cfg.OllamaModel, &http.Client{Timeout: cfg.OllamaTimeout})
+	haClient, err := hagateway.New(ctx, cfg.HAGatewayAddr, cfg.TLSDir, cfg.HAGatewayServerName, log)
+	if err != nil {
+		log.Error("ha-gateway client setup failed", "err", err)
+		os.Exit(1)
+	}
+	defer func() {
+		if err := haClient.Close(); err != nil {
+			log.Error("ha-gateway client close failed", "err", err)
+		}
+	}()
+
+	lightCache := domain.NewLightCache(cfg.LightCacheTTL, haClient.ListLights)
+	queryApp := app.NewQueryApp(ollamaClient, haClient, lightCache, log)
+
+	serverOpts := []grpc.ServerOption{
+		grpc.StatsHandler(otelgrpc.NewServerHandler()),
+		grpc.ChainUnaryInterceptor(aigrpc.LoggingUnaryInterceptor(log)),
+	}
+	if cfg.TLSDir != "" {
+		creds, err := loadServerCredentials(cfg.TLSDir)
+		if err != nil {
+			log.Error("load mTLS credentials failed", "tls_dir", cfg.TLSDir, "err", err)
+			os.Exit(1)
+		}
+		serverOpts = append(serverOpts, grpc.Creds(creds))
+		log.Info("mTLS enabled", "tls_dir", cfg.TLSDir)
+	} else {
+		log.Info("mTLS disabled")
+	}
+
+	srv := grpc.NewServer(serverOpts...)
+	healthSrv := health.NewServer()
+	healthSrv.SetServingStatus("", grpc_health_v1.HealthCheckResponse_SERVING)
+
+	aiv1.RegisterAIServiceServer(srv, aigrpc.NewServer(queryApp))
+	grpc_health_v1.RegisterHealthServer(srv, healthSrv)
+	if cfg.LogLevel == "debug" {
+		reflection.Register(srv)
+	}
+
+	lis, err := net.Listen("tcp", ":"+cfg.GRPCPort)
+	if err != nil {
+		log.Error("listen failed", "err", err)
+		os.Exit(1)
+	}
+
+	go func() {
+		log.Info("ai-gateway listening", "addr", lis.Addr().String())
+		if err := srv.Serve(lis); err != nil {
+			log.Error("serve failed", "err", err)
+		}
+	}()
+
+	<-ctx.Done()
+	log.Info("shutdown signal received, draining")
+	healthSrv.SetServingStatus("", grpc_health_v1.HealthCheckResponse_NOT_SERVING)
+	srv.GracefulStop()
+	log.Info("shutdown complete")
+
+	if err := shutdown(context.Background()); err != nil {
+		log.Error("telemetry shutdown error", "err", err)
+	}
+}
+
+func loadServerCredentials(tlsDir string) (credentials.TransportCredentials, error) {
+	cert, err := tls.LoadX509KeyPair(
+		filepath.Join(tlsDir, "tls.crt"),
+		filepath.Join(tlsDir, "tls.key"),
+	)
+	if err != nil {
+		return nil, fmt.Errorf("load server key pair: %w", err)
+	}
+
+	caPEM, err := os.ReadFile(filepath.Join(tlsDir, "ca.crt"))
+	if err != nil {
+		return nil, fmt.Errorf("read client CA: %w", err)
+	}
+
+	clientCAs := x509.NewCertPool()
+	if !clientCAs.AppendCertsFromPEM(caPEM) {
+		return nil, fmt.Errorf("append client CA: invalid PEM")
+	}
+
+	return credentials.NewTLS(&tls.Config{
+		Certificates: []tls.Certificate{cert},
+		ClientCAs:    clientCAs,
+		ClientAuth:   tls.RequireAndVerifyClientCert,
+		MinVersion:   tls.VersionTLS13,
+	}), nil
+}
--- a/ai-gateway/go.mod
+++ b/ai-gateway/go.mod
@ -0,0 +1,39 @@
+module gitea.nik4nao.com/nik/home-services/ai-gateway
+
+go 1.26
+
+require (
+	gitea.nik4nao.com/nik/home-services/gen v0.0.0
+	github.com/joho/godotenv v1.5.1
+	go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0
+	go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0
+	go.opentelemetry.io/otel v1.39.0
+	go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.35.0
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0
+	go.opentelemetry.io/otel/metric v1.39.0
+	go.opentelemetry.io/otel/sdk v1.39.0
+	go.opentelemetry.io/otel/sdk/metric v1.39.0
+	go.opentelemetry.io/otel/trace v1.39.0
+	google.golang.org/grpc v1.79.3
+)
+
+require (
+	github.com/cenkalti/backoff/v4 v4.3.0 // indirect
+	github.com/cespare/xxhash/v2 v2.3.0 // indirect
+	github.com/felixge/httpsnoop v1.0.4 // indirect
+	github.com/go-logr/logr v1.4.3 // indirect
+	github.com/go-logr/stdr v1.2.2 // indirect
+	github.com/google/uuid v1.6.0 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 // indirect
+	go.opentelemetry.io/auto/sdk v1.2.1 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0 // indirect
+	go.opentelemetry.io/proto/otlp v1.5.0 // indirect
+	golang.org/x/net v0.48.0 // indirect
+	golang.org/x/sys v0.39.0 // indirect
+	golang.org/x/text v0.32.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect
+	google.golang.org/protobuf v1.36.11 // indirect
+)
+
+replace gitea.nik4nao.com/nik/home-services/gen => ../gen
--- a/ai-gateway/go.sum
+++ b/ai-gateway/go.sum
@ -0,0 +1,71 @@
+github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
+github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
+github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
+github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
+github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
+github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
+github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
+github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
+github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
+github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
+github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
+github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
+github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
+github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
+github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
+github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 h1:e9Rjr40Z98/clHv5Yg79Is0NtosR5LXRvdr7o/6NwbA=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1/go.mod h1:tIxuGz/9mpox++sgp9fJjHO0+q1X9/UOWd798aAm22M=
+github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0=
+github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
+github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
+github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
+github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
+github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
+go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
+go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0 h1:x7wzEgXfnzJcHDwStJT+mxOz4etr2EcexjqhBvmoakw=
+go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0/go.mod h1:rg+RlpR5dKwaS95IyyZqj5Wd4E13lk/msnTS0Xl9lJM=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ=
+go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48=
+go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8=
+go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.35.0 h1:QcFwRrZLc82r8wODjvyCbP7Ifp3UANaBSmhDSFjnqSc=
+go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.35.0/go.mod h1:CXIWhUomyWBG/oY2/r/kLp6K/cmx9e/7DLpBuuGdLCA=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0 h1:1fTNlAIJZGWLP5FVu0fikVry1IsiUnXjf7QFvoNN3Xw=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0/go.mod h1:zjPK58DtkqQFn+YUMbx0M2XV3QgKU0gS9LeGohREyK4=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0 h1:m639+BofXTvcY1q8CGs4ItwQarYtJPOWmVobfM1HpVI=
+go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0/go.mod h1:LjReUci/F4BUyv+y4dwnq3h/26iNOeC3wAIqgvTIZVo=
+go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0=
+go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs=
+go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18=
+go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE=
+go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8=
+go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew=
+go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI=
+go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA=
+go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4=
+go.opentelemetry.io/proto/otlp v1.5.0/go.mod h1:keN8WnHxOy8PG0rQZjJJ5A2ebUoafqWp0eVQ4yIXvJ4=
+go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
+go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
+golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU=
+golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY=
+golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk=
+golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU=
+golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY=
+gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
+gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
+google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 h1:fCvbg86sFXwdrl5LgVcTEvNC+2txB5mgROGmRL5mrls=
+google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk=
+google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE=
+google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ=
+google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
+google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
+gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
+gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
--- a/ai-gateway/internal/adapters/primary/grpc/interceptor.go
+++ b/ai-gateway/internal/adapters/primary/grpc/interceptor.go
@ -0,0 +1,61 @@
+package grpc
+
+import (
+	"context"
+	"log/slog"
+	"time"
+
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/logger"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/peer"
+	"google.golang.org/grpc/status"
+)
+
+// LoggingUnaryInterceptor logs one completion record for each unary gRPC call.
+func LoggingUnaryInterceptor(log *slog.Logger) grpc.UnaryServerInterceptor {
+	return func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) {
+		method, ok := grpc.Method(ctx)
+		if !ok {
+			method = info.FullMethod
+		}
+		reqLog := requestLogger(ctx, log, method, req)
+		ctx = logger.WithLogger(ctx, reqLog)
+
+		start := time.Now()
+		resp, err := handler(ctx, req)
+		logCompletion(reqLog, "grpc call completed", status.Code(err), time.Since(start), err)
+		return resp, err
+	}
+}
+
+func requestLogger(ctx context.Context, log *slog.Logger, method string, req any) *slog.Logger {
+	peerAddr := ""
+	if p, ok := peer.FromContext(ctx); ok && p.Addr != nil {
+		peerAddr = p.Addr.String()
+	}
+	source := ""
+	if r, ok := req.(interface{ GetSource() string }); ok {
+		source = r.GetSource()
+	}
+	return log.With("grpc.method", method, "grpc.peer", peerAddr, "source", source)
+}
+
+func logCompletion(log *slog.Logger, msg string, code codes.Code, duration time.Duration, err error) {
+	attrs := []any{
+		"duration_ms", duration.Milliseconds(),
+		"grpc.code", code.String(),
+	}
+	if err != nil {
+		attrs = append(attrs, "error", err.Error())
+	}
+
+	switch code {
+	case codes.OK:
+		log.Info(msg, attrs...)
+	case codes.InvalidArgument, codes.NotFound, codes.Unimplemented:
+		log.Warn(msg, attrs...)
+	default:
+		log.Error(msg, attrs...)
+	}
+}
--- a/ai-gateway/internal/adapters/primary/grpc/server.go
+++ b/ai-gateway/internal/adapters/primary/grpc/server.go
@ -0,0 +1,45 @@
+package grpc
+
+import (
+	"context"
+
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/app"
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/logger"
+	aiv1 "gitea.nik4nao.com/nik/home-services/gen/ai/v1"
+	"google.golang.org/grpc/codes"
+	"google.golang.org/grpc/status"
+)
+
+// Server adapts the AI query app to the gRPC transport.
+type Server struct {
+	aiv1.UnimplementedAIServiceServer
+	app *app.QueryApp
+}
+
+// NewServer constructs the AIService gRPC adapter.
+func NewServer(app *app.QueryApp) *Server {
+	return &Server{app: app}
+}
+
+// Query handles one AI query request.
+func (s *Server) Query(ctx context.Context, req *aiv1.QueryRequest) (*aiv1.QueryResponse, error) {
+	if req.GetText() == "" {
+		return nil, status.Error(codes.InvalidArgument, "text is required")
+	}
+
+	log := logger.FromContext(ctx)
+	if req.GetSource() != "" {
+		log = log.With("source", req.GetSource())
+		ctx = logger.WithLogger(ctx, log)
+	}
+
+	result, err := s.app.Query(ctx, req.GetText())
+	if err != nil {
+		return nil, status.Errorf(codes.Unavailable, "query failed: %v", err)
+	}
+	return &aiv1.QueryResponse{
+		Reply:       result.Reply,
+		Intent:      result.Intent,
+		ActionTaken: result.ActionTaken,
+	}, nil
+}
--- a/ai-gateway/internal/adapters/secondary/hagateway/client.go
+++ b/ai-gateway/internal/adapters/secondary/hagateway/client.go
@ -0,0 +1,162 @@
+package hagateway
+
+import (
+	"context"
+	"crypto/tls"
+	"crypto/x509"
+	"fmt"
+	"log/slog"
+	"os"
+	"path/filepath"
+	"strconv"
+	"time"
+
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/ports/driven"
+	hav1 "gitea.nik4nao.com/nik/home-services/gen/ha/v1"
+	"go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc"
+	"google.golang.org/grpc"
+	"google.golang.org/grpc/credentials"
+	"google.golang.org/grpc/credentials/insecure"
+)
+
+// Client implements the HA driven port over gRPC.
+type Client struct {
+	conn        *grpc.ClientConn
+	lightClient hav1.LightServiceClient
+	log         *slog.Logger
+}
+
+// New constructs a gRPC client for ha-gateway with optional mTLS.
+func New(ctx context.Context, addr, tlsDir, serverName string, log *slog.Logger) (*Client, error) {
+	transportCreds := insecure.NewCredentials()
+	if tlsDir != "" {
+		creds, err := loadTransportCredentials(tlsDir, serverName)
+		if err != nil {
+			return nil, fmt.Errorf("load mTLS credentials: %w", err)
+		}
+		transportCreds = creds
+	}
+
+	conn, err := grpc.NewClient(
+		addr,
+		grpc.WithTransportCredentials(transportCreds),
+		grpc.WithStatsHandler(otelgrpc.NewClientHandler()),
+	)
+	if err != nil {
+		return nil, fmt.Errorf("dial ha-gateway: %w", err)
+	}
+
+	return &Client{
+		conn:        conn,
+		lightClient: hav1.NewLightServiceClient(conn),
+		log:         log,
+	}, nil
+}
+
+// Close closes the underlying gRPC connection.
+func (c *Client) Close() error {
+	if err := c.conn.Close(); err != nil {
+		return fmt.Errorf("close ha-gateway client: %w", err)
+	}
+	return nil
+}
+
+// ListLights loads the discovery list used by prompt-building and list replies.
+func (c *Client) ListLights(ctx context.Context) ([]driven.Light, error) {
+	start := time.Now()
+	resp, err := c.lightClient.ListLights(ctx, &hav1.ListLightsRequest{})
+	if err != nil {
+		return nil, fmt.Errorf("list lights: %w", err)
+	}
+	c.log.Debug("grpc call completed", "grpc.method", "LightService/ListLights", "duration_ms", time.Since(start).Milliseconds())
+
+	lights := make([]driven.Light, 0, len(resp.GetLights()))
+	for _, light := range resp.GetLights() {
+		lights = append(lights, driven.Light{
+			EntityID:     light.GetEntityId(),
+			FriendlyName: light.GetFriendlyName(),
+			State:        light.GetState(),
+		})
+	}
+	return lights, nil
+}
+
+// TurnOnLight forwards a turn-on request to ha-gateway.
+func (c *Client) TurnOnLight(ctx context.Context, entity string, params map[string]string) error {
+	start := time.Now()
+	req := &hav1.TurnOnRequest{EntityId: entity}
+	if v, ok := firstParam(params, "brightness", "brightness_pct"); ok {
+		brightness, err := parseUint32(v)
+		if err != nil {
+			return err
+		}
+		req.BrightnessPct = &brightness
+	}
+	if v, ok := firstParam(params, "color_temp", "color_temp_kelvin"); ok {
+		colorTemp, err := parseUint32(v)
+		if err != nil {
+			return err
+		}
+		req.ColorTempKelvin = &colorTemp
+	}
+
+	if _, err := c.lightClient.TurnOn(ctx, req); err != nil {
+		return fmt.Errorf("turn on light %s: %w", entity, err)
+	}
+	c.log.Debug("grpc call completed", "grpc.method", "LightService/TurnOn", "duration_ms", time.Since(start).Milliseconds())
+	return nil
+}
+
+// TurnOffLight forwards a turn-off request to ha-gateway.
+func (c *Client) TurnOffLight(ctx context.Context, entity string) error {
+	start := time.Now()
+	if _, err := c.lightClient.TurnOff(ctx, &hav1.TurnOffRequest{EntityId: entity}); err != nil {
+		return fmt.Errorf("turn off light %s: %w", entity, err)
+	}
+	c.log.Debug("grpc call completed", "grpc.method", "LightService/TurnOff", "duration_ms", time.Since(start).Milliseconds())
+	return nil
+}
+
+func loadTransportCredentials(tlsDir, serverName string) (credentials.TransportCredentials, error) {
+	cert, err := tls.LoadX509KeyPair(
+		filepath.Join(tlsDir, "tls.crt"),
+		filepath.Join(tlsDir, "tls.key"),
+	)
+	if err != nil {
+		return nil, fmt.Errorf("load client key pair: %w", err)
+	}
+
+	caPEM, err := os.ReadFile(filepath.Join(tlsDir, "ca.crt"))
+	if err != nil {
+		return nil, fmt.Errorf("read server CA: %w", err)
+	}
+
+	rootCAs := x509.NewCertPool()
+	if !rootCAs.AppendCertsFromPEM(caPEM) {
+		return nil, fmt.Errorf("append server CA: invalid PEM")
+	}
+
+	return credentials.NewTLS(&tls.Config{
+		Certificates: []tls.Certificate{cert},
+		RootCAs:      rootCAs,
+		ServerName:   serverName,
+		MinVersion:   tls.VersionTLS13,
+	}), nil
+}
+
+func firstParam(params map[string]string, keys ...string) (string, bool) {
+	for _, key := range keys {
+		if v, ok := params[key]; ok {
+			return v, true
+		}
+	}
+	return "", false
+}
+
+func parseUint32(v string) (uint32, error) {
+	n, err := strconv.ParseUint(v, 10, 32)
+	if err != nil {
+		return 0, fmt.Errorf("parse uint32 value %q: %w", v, err)
+	}
+	return uint32(n), nil
+}
--- a/ai-gateway/internal/adapters/secondary/ollama/client.go
+++ b/ai-gateway/internal/adapters/secondary/ollama/client.go
@ -0,0 +1,73 @@
+package ollama
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+	"fmt"
+	"net/http"
+
+	"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
+)
+
+type generateRequest struct {
+	Model  string `json:"model"`
+	Prompt string `json:"prompt"`
+	Stream bool   `json:"stream"`
+}
+
+type generateResponse struct {
+	Response string `json:"response"`
+}
+
+// Client implements the LLM driven port with the Ollama generate API.
+type Client struct {
+	baseURL string
+	model   string
+	http    *http.Client
+}
+
+// New constructs an Ollama client with OTel-instrumented transport.
+func New(baseURL, model string, httpClient *http.Client) *Client {
+	if httpClient == nil {
+		httpClient = &http.Client{Transport: otelhttp.NewTransport(http.DefaultTransport)}
+	}
+	if httpClient.Transport == nil {
+		httpClient.Transport = otelhttp.NewTransport(http.DefaultTransport)
+	}
+	return &Client{baseURL: baseURL, model: model, http: httpClient}
+}
+
+// Generate sends one non-streaming prompt to Ollama.
+func (c *Client) Generate(ctx context.Context, prompt string) (string, error) {
+	body, err := json.Marshal(generateRequest{
+		Model:  c.model,
+		Prompt: prompt,
+		Stream: false,
+	})
+	if err != nil {
+		return "", fmt.Errorf("marshal ollama request: %w", err)
+	}
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/api/generate", bytes.NewReader(body))
+	if err != nil {
+		return "", fmt.Errorf("build ollama request: %w", err)
+	}
+	req.Header.Set("Content-Type", "application/json")
+
+	resp, err := c.http.Do(req)
+	if err != nil {
+		return "", fmt.Errorf("call ollama: %w", err)
+	}
+	defer resp.Body.Close()
+
+	if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+		return "", fmt.Errorf("ollama returned status %s", resp.Status)
+	}
+
+	var out generateResponse
+	if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
+		return "", fmt.Errorf("decode ollama response: %w", err)
+	}
+	return out.Response, nil
+}
--- a/ai-gateway/internal/app/query.go
+++ b/ai-gateway/internal/app/query.go
@ -0,0 +1,204 @@
+package app
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"log/slog"
+	"slices"
+	"strconv"
+	"strings"
+
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/domain"
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/ports/driven"
+)
+
+// QueryResult is the app-layer response mapped onto the gRPC API.
+type QueryResult struct {
+	Reply       string
+	Intent      string
+	ActionTaken bool
+}
+
+// QueryApp orchestrates one AI query request.
+type QueryApp struct {
+	llm   driven.LLMClient
+	ha    driven.HAClient
+	cache *domain.LightCache
+	log   *slog.Logger
+}
+
+// NewQueryApp constructs the AI query application service.
+func NewQueryApp(llm driven.LLMClient, ha driven.HAClient, cache *domain.LightCache, log *slog.Logger) *QueryApp {
+	return &QueryApp{llm: llm, ha: ha, cache: cache, log: log}
+}
+
+// Query runs the full intent parsing and dispatch flow for one user request.
+func (a *QueryApp) Query(ctx context.Context, text string) (QueryResult, error) {
+	lights, err := a.cache.Get(ctx)
+	if err != nil {
+		a.log.Error("light cache refresh failed", "err", err)
+		return QueryResult{
+			Reply:       "I couldn't reach Home Assistant right now.",
+			ActionTaken: false,
+		}, nil
+	}
+
+	prompt := domain.BuildPrompt(text, promptLightLines(lights))
+	raw, err := a.llm.Generate(ctx, prompt)
+	if err != nil {
+		return QueryResult{}, err
+	}
+
+	var intent domain.Intent
+	if err := json.Unmarshal([]byte(raw), &intent); err != nil {
+		a.log.Warn("llm returned invalid json", "text", text, "raw_output", raw)
+		return QueryResult{
+			Reply:       "I didn't understand that.",
+			Intent:      domain.IntentNone,
+			ActionTaken: false,
+		}, nil
+	}
+
+	switch intent.Name {
+	case domain.IntentTurnOnLight:
+		entityID, ok := resolveLightEntity(intent.Entity, lights)
+		if !ok {
+			return QueryResult{Reply: "I couldn't find that light.", Intent: intent.Name}, nil
+		}
+		params, err := ParseLightParams(intent.Params)
+		if err != nil {
+			return QueryResult{
+				Reply:       "I couldn't understand the light settings.",
+				Intent:      intent.Name,
+				ActionTaken: false,
+			}, nil
+		}
+		if err := a.ha.TurnOnLight(ctx, entityID, params); err != nil {
+			a.log.Error("turn on light failed", "entity_id", entityID, "err", err)
+			return QueryResult{
+				Reply:       "I couldn't reach Home Assistant right now.",
+				Intent:      intent.Name,
+				ActionTaken: false,
+			}, nil
+		}
+		return QueryResult{
+			Reply:       fallbackReply(intent.Reply, fmt.Sprintf("Turned on `%s`.", displayLightName(entityID, lights))),
+			Intent:      intent.Name,
+			ActionTaken: true,
+		}, nil
+	case domain.IntentTurnOffLight:
+		entityID, ok := resolveLightEntity(intent.Entity, lights)
+		if !ok {
+			return QueryResult{Reply: "I couldn't find that light.", Intent: intent.Name}, nil
+		}
+		if err := a.ha.TurnOffLight(ctx, entityID); err != nil {
+			a.log.Error("turn off light failed", "entity_id", entityID, "err", err)
+			return QueryResult{
+				Reply:       "I couldn't reach Home Assistant right now.",
+				Intent:      intent.Name,
+				ActionTaken: false,
+			}, nil
+		}
+		return QueryResult{
+			Reply:       fallbackReply(intent.Reply, fmt.Sprintf("Turned off `%s`.", displayLightName(entityID, lights))),
+			Intent:      intent.Name,
+			ActionTaken: true,
+		}, nil
+	case domain.IntentListLights:
+		return QueryResult{
+			Reply:       formatLightListReply(lights),
+			Intent:      intent.Name,
+			ActionTaken: false,
+		}, nil
+	case domain.IntentNone:
+		fallthrough
+	default:
+		return QueryResult{
+			Reply:       fallbackReply(intent.Reply, "I didn't understand that."),
+			Intent:      intent.Name,
+			ActionTaken: false,
+		}, nil
+	}
+}
+
+func promptLightLines(lights []driven.Light) []string {
+	lines := make([]string, 0, len(lights))
+	for _, light := range lights {
+		label := light.FriendlyName
+		if label == "" {
+			label = light.EntityID
+		}
+		lines = append(lines, fmt.Sprintf("- %s (%s) state=%s", label, light.EntityID, light.State))
+	}
+	return lines
+}
+
+func resolveLightEntity(value string, lights []driven.Light) (string, bool) {
+	needle := strings.TrimSpace(strings.ToLower(value))
+	if needle == "" {
+		return "", false
+	}
+	idx := slices.IndexFunc(lights, func(light driven.Light) bool {
+		return strings.ToLower(light.EntityID) == needle || strings.ToLower(light.FriendlyName) == needle
+	})
+	if idx != -1 {
+		return lights[idx].EntityID, true
+	}
+	idx = slices.IndexFunc(lights, func(light driven.Light) bool {
+		return strings.Contains(strings.ToLower(light.FriendlyName), needle)
+	})
+	if idx != -1 {
+		return lights[idx].EntityID, true
+	}
+	return "", false
+}
+
+func displayLightName(entityID string, lights []driven.Light) string {
+	idx := slices.IndexFunc(lights, func(light driven.Light) bool { return light.EntityID == entityID })
+	if idx == -1 || lights[idx].FriendlyName == "" {
+		return entityID
+	}
+	return lights[idx].FriendlyName
+}
+
+func fallbackReply(reply, fallback string) string {
+	if strings.TrimSpace(reply) == "" {
+		return fallback
+	}
+	return reply
+}
+
+func formatLightListReply(lights []driven.Light) string {
+	if len(lights) == 0 {
+		return "No lights found."
+	}
+	lines := make([]string, 0, len(lights)+1)
+	lines = append(lines, "Known lights:")
+	for _, light := range lights {
+		label := light.FriendlyName
+		if label == "" {
+			label = light.EntityID
+		}
+		lines = append(lines, fmt.Sprintf("- %s (%s) [%s]", label, light.EntityID, light.State))
+	}
+	return strings.Join(lines, "\n")
+}
+
+// ParseLightParams converts string params into the protobuf-compatible values the HA adapter expects.
+func ParseLightParams(params map[string]string) (map[string]string, error) {
+	if len(params) == 0 {
+		return map[string]string{}, nil
+	}
+	normalized := make(map[string]string, len(params))
+	for key, value := range params {
+		switch key {
+		case "brightness", "brightness_pct", "color_temp", "color_temp_kelvin":
+			if _, err := strconv.ParseUint(value, 10, 32); err != nil {
+				return nil, fmt.Errorf("invalid %s value %q: %w", key, value, err)
+			}
+		}
+		normalized[key] = value
+	}
+	return normalized, nil
+}
--- a/ai-gateway/internal/app/query_test.go
+++ b/ai-gateway/internal/app/query_test.go
@ -0,0 +1,166 @@
+package app
+
+import (
+	"context"
+	"errors"
+	"log/slog"
+	"reflect"
+	"testing"
+	"time"
+
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/domain"
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/ports/driven"
+)
+
+type fakeLLM struct {
+	generate func(context.Context, string) (string, error)
+}
+
+func (f *fakeLLM) Generate(ctx context.Context, prompt string) (string, error) {
+	return f.generate(ctx, prompt)
+}
+
+type fakeHA struct {
+	lights         []driven.Light
+	listErr        error
+	turnOnErr      error
+	turnOffErr     error
+	lastTurnOnID   string
+	lastTurnOffID  string
+	lastTurnParams map[string]string
+	listCalls      int
+}
+
+func (f *fakeHA) TurnOnLight(ctx context.Context, entity string, params map[string]string) error {
+	f.lastTurnOnID = entity
+	f.lastTurnParams = params
+	return f.turnOnErr
+}
+
+func (f *fakeHA) TurnOffLight(ctx context.Context, entity string) error {
+	f.lastTurnOffID = entity
+	return f.turnOffErr
+}
+
+func (f *fakeHA) ListLights(ctx context.Context) ([]driven.Light, error) {
+	f.listCalls++
+	if f.listErr != nil {
+		return nil, f.listErr
+	}
+	return append([]driven.Light(nil), f.lights...), nil
+}
+
+func TestQueryAppTurnOnLight(t *testing.T) {
+	ha := &fakeHA{lights: []driven.Light{{EntityID: "light.kitchen", FriendlyName: "Kitchen", State: "off"}}}
+	cache := domain.NewLightCache(time.Hour, ha.ListLights)
+	app := NewQueryApp(&fakeLLM{
+		generate: func(ctx context.Context, prompt string) (string, error) {
+			return `{"intent":"turn_on_light","entity":"Kitchen","params":{"brightness":"80"},"reply":"Turning on Kitchen."}`, nil
+		},
+	}, ha, cache, slog.Default())
+
+	got, err := app.Query(context.Background(), "turn on kitchen")
+	if err != nil {
+		t.Fatalf("Query() error = %v", err)
+	}
+	if got.Intent != domain.IntentTurnOnLight || !got.ActionTaken || got.Reply != "Turning on Kitchen." {
+		t.Fatalf("Query() = %+v", got)
+	}
+	if ha.lastTurnOnID != "light.kitchen" {
+		t.Fatalf("TurnOnLight entity = %q", ha.lastTurnOnID)
+	}
+	if !reflect.DeepEqual(ha.lastTurnParams, map[string]string{"brightness": "80"}) {
+		t.Fatalf("TurnOnLight params = %#v", ha.lastTurnParams)
+	}
+}
+
+func TestQueryAppInvalidJSON(t *testing.T) {
+	ha := &fakeHA{lights: []driven.Light{{EntityID: "light.kitchen", FriendlyName: "Kitchen", State: "off"}}}
+	app := NewQueryApp(&fakeLLM{
+		generate: func(ctx context.Context, prompt string) (string, error) {
+			return `not-json`, nil
+		},
+	}, ha, domain.NewLightCache(time.Hour, ha.ListLights), slog.Default())
+
+	got, err := app.Query(context.Background(), "turn on kitchen")
+	if err != nil {
+		t.Fatalf("Query() error = %v", err)
+	}
+	if got.Reply != "I didn't understand that." || got.ActionTaken {
+		t.Fatalf("Query() = %+v", got)
+	}
+	if ha.lastTurnOnID != "" {
+		t.Fatalf("expected no HA call, got %q", ha.lastTurnOnID)
+	}
+}
+
+func TestQueryAppIntentNone(t *testing.T) {
+	ha := &fakeHA{lights: []driven.Light{{EntityID: "light.kitchen", FriendlyName: "Kitchen", State: "off"}}}
+	app := NewQueryApp(&fakeLLM{
+		generate: func(ctx context.Context, prompt string) (string, error) {
+			return `{"intent":"none","entity":"","params":{},"reply":"Hello there."}`, nil
+		},
+	}, ha, domain.NewLightCache(time.Hour, ha.ListLights), slog.Default())
+
+	got, err := app.Query(context.Background(), "hello")
+	if err != nil {
+		t.Fatalf("Query() error = %v", err)
+	}
+	if got.Reply != "Hello there." || got.ActionTaken {
+		t.Fatalf("Query() = %+v", got)
+	}
+}
+
+func TestQueryAppHAFailure(t *testing.T) {
+	ha := &fakeHA{
+		lights:    []driven.Light{{EntityID: "light.kitchen", FriendlyName: "Kitchen", State: "off"}},
+		turnOnErr: errors.New("boom"),
+	}
+	app := NewQueryApp(&fakeLLM{
+		generate: func(ctx context.Context, prompt string) (string, error) {
+			return `{"intent":"turn_on_light","entity":"light.kitchen","params":{},"reply":"Turning on Kitchen."}`, nil
+		},
+	}, ha, domain.NewLightCache(time.Hour, ha.ListLights), slog.Default())
+
+	got, err := app.Query(context.Background(), "turn on kitchen")
+	if err != nil {
+		t.Fatalf("Query() error = %v", err)
+	}
+	if got.Reply != "I couldn't reach Home Assistant right now." || got.ActionTaken {
+		t.Fatalf("Query() = %+v", got)
+	}
+}
+
+func TestQueryAppListLights(t *testing.T) {
+	ha := &fakeHA{lights: []driven.Light{{EntityID: "light.kitchen", FriendlyName: "Kitchen", State: "on"}}}
+	app := NewQueryApp(&fakeLLM{
+		generate: func(ctx context.Context, prompt string) (string, error) {
+			return `{"intent":"list_lights","entity":"","params":{},"reply":""}`, nil
+		},
+	}, ha, domain.NewLightCache(time.Hour, ha.ListLights), slog.Default())
+
+	got, err := app.Query(context.Background(), "what lights exist")
+	if err != nil {
+		t.Fatalf("Query() error = %v", err)
+	}
+	want := "Known lights:\n- Kitchen (light.kitchen) [on]"
+	if got.Reply != want || got.ActionTaken {
+		t.Fatalf("Query() = %+v", got)
+	}
+}
+
+func TestLightCacheRefreshAfterTTL(t *testing.T) {
+	ha := &fakeHA{lights: []driven.Light{{EntityID: "light.kitchen", FriendlyName: "Kitchen", State: "off"}}}
+	cache := domain.NewLightCache(10*time.Millisecond, ha.ListLights)
+
+	if _, err := cache.Get(context.Background()); err != nil {
+		t.Fatalf("Get() error = %v", err)
+	}
+	time.Sleep(20 * time.Millisecond)
+	if _, err := cache.Get(context.Background()); err != nil {
+		t.Fatalf("Get() error = %v", err)
+	}
+	if ha.listCalls < 2 {
+		t.Fatalf("ListLights calls = %d, want at least 2", ha.listCalls)
+	}
+}
--- a/ai-gateway/internal/config/config.go
+++ b/ai-gateway/internal/config/config.go
@ -0,0 +1,89 @@
+package config
+
+import (
+	"errors"
+	"fmt"
+	"os"
+	"path/filepath"
+	"time"
+)
+
+// Config holds runtime configuration for the AI gRPC gateway.
+type Config struct {
+	GRPCPort            string
+	OllamaURL           string
+	OllamaModel         string
+	OllamaTimeout       time.Duration
+	HAGatewayAddr       string
+	HAGatewayServerName string
+	TLSDir              string
+	OTELEndpoint        string
+	LogLevel            string
+	LogFormat           string
+	LightCacheTTL       time.Duration
+}
+
+// Load reads configuration from environment variables and applies defaults.
+func Load() (*Config, error) {
+	ollamaTimeout, err := parseDurationEnv("OLLAMA_TIMEOUT", 30*time.Second)
+	if err != nil {
+		return nil, err
+	}
+	cacheTTL, err := parseDurationEnv("LIGHT_CACHE_TTL", 60*time.Second)
+	if err != nil {
+		return nil, err
+	}
+
+	cfg := &Config{
+		GRPCPort:            getenvDefault("GRPC_PORT", "50052"),
+		OllamaURL:           getenvDefault("OLLAMA_URL", "http://192.168.7.96:11434"),
+		OllamaModel:         getenvDefault("OLLAMA_MODEL", "llama3"),
+		OllamaTimeout:       ollamaTimeout,
+		HAGatewayAddr:       getenvDefault("HA_GATEWAY_ADDR", "ha-gateway.home-services.svc.cluster.local:50051"),
+		HAGatewayServerName: getenvDefault("HA_GATEWAY_SERVER_NAME", "ha-gateway.home-services.svc.cluster.local"),
+		TLSDir:              os.Getenv("TLS_DIR"),
+		OTELEndpoint:        os.Getenv("OTEL_ENDPOINT"),
+		LogLevel:            getenvDefault("LOG_LEVEL", "info"),
+		LogFormat:           getenvDefault("LOG_FORMAT", "json"),
+		LightCacheTTL:       cacheTTL,
+	}
+	if cfg.TLSDir != "" {
+		if err := validateTLSDir(cfg.TLSDir); err != nil {
+			return nil, err
+		}
+	}
+	return cfg, nil
+}
+
+func getenvDefault(key, fallback string) string {
+	if v := os.Getenv(key); v != "" {
+		return v
+	}
+	return fallback
+}
+
+func parseDurationEnv(key string, fallback time.Duration) (time.Duration, error) {
+	if v := os.Getenv(key); v != "" {
+		d, err := time.ParseDuration(v)
+		if err != nil {
+			return 0, fmt.Errorf("parse %s: %w", key, err)
+		}
+		return d, nil
+	}
+	return fallback, nil
+}
+
+func validateTLSDir(dir string) error {
+	required := []string{"tls.crt", "tls.key", "ca.crt"}
+	for _, name := range required {
+		path := filepath.Join(dir, name)
+		info, err := os.Stat(path)
+		if err != nil {
+			return fmt.Errorf("tls dir validation failed for %s: %w", path, err)
+		}
+		if info.IsDir() {
+			return fmt.Errorf("tls dir validation failed for %s: %w", path, errors.New("expected file"))
+		}
+	}
+	return nil
+}
--- a/ai-gateway/internal/core/domain/intent.go
+++ b/ai-gateway/internal/core/domain/intent.go
@ -0,0 +1,16 @@
+package domain
+
+// Intent describes the JSON contract expected from the LLM.
+type Intent struct {
+	Name   string            `json:"intent"`
+	Entity string            `json:"entity"`
+	Params map[string]string `json:"params"`
+	Reply  string            `json:"reply"`
+}
+
+const (
+	IntentNone         = "none"
+	IntentTurnOnLight  = "turn_on_light"
+	IntentTurnOffLight = "turn_off_light"
+	IntentListLights   = "list_lights"
+)
--- a/ai-gateway/internal/core/domain/light_cache.go
+++ b/ai-gateway/internal/core/domain/light_cache.go
@ -0,0 +1,49 @@
+package domain
+
+import (
+	"context"
+	"sync"
+	"time"
+
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/ports/driven"
+)
+
+// LightCache keeps recently-fetched light discovery data in memory.
+type LightCache struct {
+	ttl      time.Duration
+	loadFunc func(context.Context) ([]driven.Light, error)
+
+	mu       sync.RWMutex
+	lights   []driven.Light
+	loadedAt time.Time
+}
+
+// NewLightCache constructs a TTL-based in-memory light cache.
+func NewLightCache(ttl time.Duration, loadFunc func(context.Context) ([]driven.Light, error)) *LightCache {
+	return &LightCache{ttl: ttl, loadFunc: loadFunc}
+}
+
+// Get returns the cached lights, refreshing lazily when empty or stale.
+func (c *LightCache) Get(ctx context.Context) ([]driven.Light, error) {
+	c.mu.RLock()
+	if len(c.lights) > 0 && time.Since(c.loadedAt) < c.ttl {
+		lights := append([]driven.Light(nil), c.lights...)
+		c.mu.RUnlock()
+		return lights, nil
+	}
+	c.mu.RUnlock()
+
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	if len(c.lights) > 0 && time.Since(c.loadedAt) < c.ttl {
+		return append([]driven.Light(nil), c.lights...), nil
+	}
+
+	lights, err := c.loadFunc(ctx)
+	if err != nil {
+		return nil, err
+	}
+	c.lights = append([]driven.Light(nil), lights...)
+	c.loadedAt = time.Now()
+	return append([]driven.Light(nil), c.lights...), nil
+}
--- a/ai-gateway/internal/core/domain/prompt.go
+++ b/ai-gateway/internal/core/domain/prompt.go
@ -0,0 +1,33 @@
+package domain
+
+import (
+	"fmt"
+	"strings"
+)
+
+const systemPrompt = `You are a home automation assistant. Respond with a single JSON object and nothing else.
+
+Schema:
+{
+  "intent": "turn_on_light" | "turn_off_light" | "list_lights" | "none",
+  "entity": "<entity_id_or_friendly_name_or_empty>",
+  "params": { "<key>": "<value>" },
+  "reply": "<short human-readable reply>"
+}
+
+Rules:
+- Always include all four fields.
+- Return only valid JSON. No markdown, no code fences, no extra prose.
+- Use "none" for non-actionable requests.
+- If the user asks what lights exist, use "list_lights".
+- Prefer returning the exact entity_id when it is obvious from the known light list.
+- TODO: switch discovery is intentionally omitted until ha-gateway switch support is ready.`
+
+// BuildPrompt combines the system instructions, known lights, and the user request.
+func BuildPrompt(userText string, knownLights []string) string {
+	lightLines := "- none known"
+	if len(knownLights) > 0 {
+		lightLines = strings.Join(knownLights, "\n")
+	}
+	return fmt.Sprintf("%s\n\nKnown lights:\n%s\n\nUser request: %s", systemPrompt, lightLines, userText)
+}
--- a/ai-gateway/internal/core/ports/driven/ha.go
+++ b/ai-gateway/internal/core/ports/driven/ha.go
@ -0,0 +1,17 @@
+package driven
+
+import "context"
+
+// HAClient exposes the Home Assistant functions ai-gateway needs.
+type HAClient interface {
+	TurnOnLight(ctx context.Context, entity string, params map[string]string) error
+	TurnOffLight(ctx context.Context, entity string) error
+	ListLights(ctx context.Context) ([]Light, error)
+}
+
+// Light is the discovery view ai-gateway uses to build prompts and replies.
+type Light struct {
+	EntityID     string
+	FriendlyName string
+	State        string
+}
--- a/ai-gateway/internal/core/ports/driven/llm.go
+++ b/ai-gateway/internal/core/ports/driven/llm.go
@ -0,0 +1,8 @@
+package driven
+
+import "context"
+
+// LLMClient generates one model response for a prompt.
+type LLMClient interface {
+	Generate(ctx context.Context, prompt string) (string, error)
+}
--- a/ai-gateway/internal/logger/logger.go
+++ b/ai-gateway/internal/logger/logger.go
@ -0,0 +1,38 @@
+package logger
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"os"
+)
+
+type contextKey struct{}
+
+// New constructs a root logger for the configured format and level.
+func New(format, level string) *slog.Logger {
+	var parsed slog.Level
+	if err := parsed.UnmarshalText([]byte(level)); err != nil {
+		_, _ = fmt.Fprintf(os.Stderr, "invalid log level %q, falling back to info\n", level)
+		parsed = slog.LevelInfo
+	}
+
+	opts := &slog.HandlerOptions{Level: parsed}
+	if format == "json" {
+		return slog.New(slog.NewJSONHandler(os.Stdout, opts))
+	}
+	return slog.New(slog.NewTextHandler(os.Stdout, opts))
+}
+
+// WithLogger attaches a logger to the provided context.
+func WithLogger(ctx context.Context, l *slog.Logger) context.Context {
+	return context.WithValue(ctx, contextKey{}, l)
+}
+
+// FromContext retrieves a logger from context and falls back to slog.Default().
+func FromContext(ctx context.Context) *slog.Logger {
+	if l, ok := ctx.Value(contextKey{}).(*slog.Logger); ok && l != nil {
+		return l
+	}
+	return slog.Default()
+}
--- a/ai-gateway/internal/telemetry/telemetry.go
+++ b/ai-gateway/internal/telemetry/telemetry.go
@ -0,0 +1,82 @@
+package telemetry
+
+import (
+	"context"
+	"errors"
+	"time"
+
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/config"
+	"gitea.nik4nao.com/nik/home-services/ai-gateway/internal/logger"
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
+	"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
+	"go.opentelemetry.io/otel/metric/noop"
+	"go.opentelemetry.io/otel/propagation"
+	sdkmetric "go.opentelemetry.io/otel/sdk/metric"
+	"go.opentelemetry.io/otel/sdk/resource"
+	sdktrace "go.opentelemetry.io/otel/sdk/trace"
+	semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
+	tracenoop "go.opentelemetry.io/otel/trace/noop"
+)
+
+// Setup initialises OTel trace and metric providers for one service.
+func Setup(ctx context.Context, serviceName, version string, cfg *config.Config) (shutdown func(context.Context) error, err error) {
+	if cfg.OTELEndpoint == "" {
+		otel.SetTracerProvider(tracenoop.NewTracerProvider())
+		otel.SetMeterProvider(noop.NewMeterProvider())
+		logger.FromContext(ctx).Debug("otel disabled", "reason", "OTEL_ENDPOINT not set")
+		return func(context.Context) error { return nil }, nil
+	}
+
+	res := resource.NewWithAttributes(
+		semconv.SchemaURL,
+		semconv.ServiceNameKey.String(serviceName),
+		semconv.ServiceVersionKey.String(version),
+	)
+
+	traceExp, err := otlptracegrpc.New(ctx,
+		otlptracegrpc.WithEndpoint(cfg.OTELEndpoint),
+		otlptracegrpc.WithInsecure(),
+	)
+	if err != nil {
+		return nil, err
+	}
+	tp := sdktrace.NewTracerProvider(
+		sdktrace.WithBatcher(traceExp),
+		sdktrace.WithResource(res),
+		sdktrace.WithSampler(sdktrace.AlwaysSample()),
+	)
+	otel.SetTracerProvider(tp)
+	otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator(
+		propagation.TraceContext{},
+		propagation.Baggage{},
+	))
+
+	metricExp, err := otlpmetricgrpc.New(ctx,
+		otlpmetricgrpc.WithEndpoint(cfg.OTELEndpoint),
+		otlpmetricgrpc.WithInsecure(),
+	)
+	if err != nil {
+		_ = tp.Shutdown(ctx)
+		return nil, err
+	}
+	mp := sdkmetric.NewMeterProvider(
+		sdkmetric.WithReader(sdkmetric.NewPeriodicReader(metricExp,
+			sdkmetric.WithInterval(30*time.Second))),
+		sdkmetric.WithResource(res),
+	)
+	otel.SetMeterProvider(mp)
+
+	return func(ctx context.Context) error {
+		shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
+		defer cancel()
+		var shutdownErr error
+		if err := tp.Shutdown(shutdownCtx); err != nil {
+			shutdownErr = errors.Join(shutdownErr, err)
+		}
+		if err := mp.Shutdown(shutdownCtx); err != nil {
+			shutdownErr = errors.Join(shutdownErr, err)
+		}
+		return shutdownErr
+	}, nil
+}
--- a/gen/ai/v1/ai.pb.go
+++ b/gen/ai/v1/ai.pb.go
@ -0,0 +1,200 @@
+// Code generated by protoc-gen-go. DO NOT EDIT.
+// versions:
+// 	protoc-gen-go v1.36.11
+// 	protoc        (unknown)
+// source: ai/v1/ai.proto
+
+package aiv1
+
+import (
+	protoreflect "google.golang.org/protobuf/reflect/protoreflect"
+	protoimpl "google.golang.org/protobuf/runtime/protoimpl"
+	reflect "reflect"
+	sync "sync"
+	unsafe "unsafe"
+)
+
+const (
+	// Verify that this generated code is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
+	// Verify that runtime/protoimpl is sufficiently up-to-date.
+	_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
+)
+
+type QueryRequest struct {
+	state         protoimpl.MessageState `protogen:"open.v1"`
+	Text          string                 `protobuf:"bytes,1,opt,name=text,proto3" json:"text,omitempty"`
+	Source        string                 `protobuf:"bytes,2,opt,name=source,proto3" json:"source,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *QueryRequest) Reset() {
+	*x = QueryRequest{}
+	mi := &file_ai_v1_ai_proto_msgTypes[0]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *QueryRequest) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*QueryRequest) ProtoMessage() {}
+
+func (x *QueryRequest) ProtoReflect() protoreflect.Message {
+	mi := &file_ai_v1_ai_proto_msgTypes[0]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use QueryRequest.ProtoReflect.Descriptor instead.
+func (*QueryRequest) Descriptor() ([]byte, []int) {
+	return file_ai_v1_ai_proto_rawDescGZIP(), []int{0}
+}
+
+func (x *QueryRequest) GetText() string {
+	if x != nil {
+		return x.Text
+	}
+	return ""
+}
+
+func (x *QueryRequest) GetSource() string {
+	if x != nil {
+		return x.Source
+	}
+	return ""
+}
+
+type QueryResponse struct {
+	state         protoimpl.MessageState `protogen:"open.v1"`
+	Reply         string                 `protobuf:"bytes,1,opt,name=reply,proto3" json:"reply,omitempty"`
+	Intent        string                 `protobuf:"bytes,2,opt,name=intent,proto3" json:"intent,omitempty"`
+	ActionTaken   bool                   `protobuf:"varint,3,opt,name=action_taken,json=actionTaken,proto3" json:"action_taken,omitempty"`
+	unknownFields protoimpl.UnknownFields
+	sizeCache     protoimpl.SizeCache
+}
+
+func (x *QueryResponse) Reset() {
+	*x = QueryResponse{}
+	mi := &file_ai_v1_ai_proto_msgTypes[1]
+	ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+	ms.StoreMessageInfo(mi)
+}
+
+func (x *QueryResponse) String() string {
+	return protoimpl.X.MessageStringOf(x)
+}
+
+func (*QueryResponse) ProtoMessage() {}
+
+func (x *QueryResponse) ProtoReflect() protoreflect.Message {
+	mi := &file_ai_v1_ai_proto_msgTypes[1]
+	if x != nil {
+		ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
+		if ms.LoadMessageInfo() == nil {
+			ms.StoreMessageInfo(mi)
+		}
+		return ms
+	}
+	return mi.MessageOf(x)
+}
+
+// Deprecated: Use QueryResponse.ProtoReflect.Descriptor instead.
+func (*QueryResponse) Descriptor() ([]byte, []int) {
+	return file_ai_v1_ai_proto_rawDescGZIP(), []int{1}
+}
+
+func (x *QueryResponse) GetReply() string {
+	if x != nil {
+		return x.Reply
+	}
+	return ""
+}
+
+func (x *QueryResponse) GetIntent() string {
+	if x != nil {
+		return x.Intent
+	}
+	return ""
+}
+
+func (x *QueryResponse) GetActionTaken() bool {
+	if x != nil {
+		return x.ActionTaken
+	}
+	return false
+}
+
+var File_ai_v1_ai_proto protoreflect.FileDescriptor
+
+const file_ai_v1_ai_proto_rawDesc = "" +
+	"\n" +
+	"\x0eai/v1/ai.proto\x12\x05ai.v1\":\n" +
+	"\fQueryRequest\x12\x12\n" +
+	"\x04text\x18\x01 \x01(\tR\x04text\x12\x16\n" +
+	"\x06source\x18\x02 \x01(\tR\x06source\"`\n" +
+	"\rQueryResponse\x12\x14\n" +
+	"\x05reply\x18\x01 \x01(\tR\x05reply\x12\x16\n" +
+	"\x06intent\x18\x02 \x01(\tR\x06intent\x12!\n" +
+	"\faction_taken\x18\x03 \x01(\bR\vactionTaken2?\n" +
+	"\tAIService\x122\n" +
+	"\x05Query\x12\x13.ai.v1.QueryRequest\x1a\x14.ai.v1.QueryResponseB4Z2gitea.nik4nao.com/nik/home-services/gen/ai/v1;aiv1b\x06proto3"
+
+var (
+	file_ai_v1_ai_proto_rawDescOnce sync.Once
+	file_ai_v1_ai_proto_rawDescData []byte
+)
+
+func file_ai_v1_ai_proto_rawDescGZIP() []byte {
+	file_ai_v1_ai_proto_rawDescOnce.Do(func() {
+		file_ai_v1_ai_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_ai_v1_ai_proto_rawDesc), len(file_ai_v1_ai_proto_rawDesc)))
+	})
+	return file_ai_v1_ai_proto_rawDescData
+}
+
+var file_ai_v1_ai_proto_msgTypes = make([]protoimpl.MessageInfo, 2)
+var file_ai_v1_ai_proto_goTypes = []any{
+	(*QueryRequest)(nil),  // 0: ai.v1.QueryRequest
+	(*QueryResponse)(nil), // 1: ai.v1.QueryResponse
+}
+var file_ai_v1_ai_proto_depIdxs = []int32{
+	0, // 0: ai.v1.AIService.Query:input_type -> ai.v1.QueryRequest
+	1, // 1: ai.v1.AIService.Query:output_type -> ai.v1.QueryResponse
+	1, // [1:2] is the sub-list for method output_type
+	0, // [0:1] is the sub-list for method input_type
+	0, // [0:0] is the sub-list for extension type_name
+	0, // [0:0] is the sub-list for extension extendee
+	0, // [0:0] is the sub-list for field type_name
+}
+
+func init() { file_ai_v1_ai_proto_init() }
+func file_ai_v1_ai_proto_init() {
+	if File_ai_v1_ai_proto != nil {
+		return
+	}
+	type x struct{}
+	out := protoimpl.TypeBuilder{
+		File: protoimpl.DescBuilder{
+			GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
+			RawDescriptor: unsafe.Slice(unsafe.StringData(file_ai_v1_ai_proto_rawDesc), len(file_ai_v1_ai_proto_rawDesc)),
+			NumEnums:      0,
+			NumMessages:   2,
+			NumExtensions: 0,
+			NumServices:   1,
+		},
+		GoTypes:           file_ai_v1_ai_proto_goTypes,
+		DependencyIndexes: file_ai_v1_ai_proto_depIdxs,
+		MessageInfos:      file_ai_v1_ai_proto_msgTypes,
+	}.Build()
+	File_ai_v1_ai_proto = out.File
+	file_ai_v1_ai_proto_goTypes = nil
+	file_ai_v1_ai_proto_depIdxs = nil
+}
--- a/gen/ai/v1/ai_grpc.pb.go
+++ b/gen/ai/v1/ai_grpc.pb.go
@ -0,0 +1,121 @@
+// Code generated by protoc-gen-go-grpc. DO NOT EDIT.
+// versions:
+// - protoc-gen-go-grpc v1.6.1
+// - protoc             (unknown)
+// source: ai/v1/ai.proto
+
+package aiv1
+
+import (
+	context "context"
+	grpc "google.golang.org/grpc"
+	codes "google.golang.org/grpc/codes"
+	status "google.golang.org/grpc/status"
+)
+
+// This is a compile-time assertion to ensure that this generated file
+// is compatible with the grpc package it is being compiled against.
+// Requires gRPC-Go v1.64.0 or later.
+const _ = grpc.SupportPackageIsVersion9
+
+const (
+	AIService_Query_FullMethodName = "/ai.v1.AIService/Query"
+)
+
+// AIServiceClient is the client API for AIService service.
+//
+// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream.
+type AIServiceClient interface {
+	Query(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (*QueryResponse, error)
+}
+
+type aIServiceClient struct {
+	cc grpc.ClientConnInterface
+}
+
+func NewAIServiceClient(cc grpc.ClientConnInterface) AIServiceClient {
+	return &aIServiceClient{cc}
+}
+
+func (c *aIServiceClient) Query(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (*QueryResponse, error) {
+	cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
+	out := new(QueryResponse)
+	err := c.cc.Invoke(ctx, AIService_Query_FullMethodName, in, out, cOpts...)
+	if err != nil {
+		return nil, err
+	}
+	return out, nil
+}
+
+// AIServiceServer is the server API for AIService service.
+// All implementations must embed UnimplementedAIServiceServer
+// for forward compatibility.
+type AIServiceServer interface {
+	Query(context.Context, *QueryRequest) (*QueryResponse, error)
+	mustEmbedUnimplementedAIServiceServer()
+}
+
+// UnimplementedAIServiceServer must be embedded to have
+// forward compatible implementations.
+//
+// NOTE: this should be embedded by value instead of pointer to avoid a nil
+// pointer dereference when methods are called.
+type UnimplementedAIServiceServer struct{}
+
+func (UnimplementedAIServiceServer) Query(context.Context, *QueryRequest) (*QueryResponse, error) {
+	return nil, status.Error(codes.Unimplemented, "method Query not implemented")
+}
+func (UnimplementedAIServiceServer) mustEmbedUnimplementedAIServiceServer() {}
+func (UnimplementedAIServiceServer) testEmbeddedByValue()                   {}
+
+// UnsafeAIServiceServer may be embedded to opt out of forward compatibility for this service.
+// Use of this interface is not recommended, as added methods to AIServiceServer will
+// result in compilation errors.
+type UnsafeAIServiceServer interface {
+	mustEmbedUnimplementedAIServiceServer()
+}
+
+func RegisterAIServiceServer(s grpc.ServiceRegistrar, srv AIServiceServer) {
+	// If the following call panics, it indicates UnimplementedAIServiceServer was
+	// embedded by pointer and is nil.  This will cause panics if an
+	// unimplemented method is ever invoked, so we test this at initialization
+	// time to prevent it from happening at runtime later due to I/O.
+	if t, ok := srv.(interface{ testEmbeddedByValue() }); ok {
+		t.testEmbeddedByValue()
+	}
+	s.RegisterService(&AIService_ServiceDesc, srv)
+}
+
+func _AIService_Query_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
+	in := new(QueryRequest)
+	if err := dec(in); err != nil {
+		return nil, err
+	}
+	if interceptor == nil {
+		return srv.(AIServiceServer).Query(ctx, in)
+	}
+	info := &grpc.UnaryServerInfo{
+		Server:     srv,
+		FullMethod: AIService_Query_FullMethodName,
+	}
+	handler := func(ctx context.Context, req interface{}) (interface{}, error) {
+		return srv.(AIServiceServer).Query(ctx, req.(*QueryRequest))
+	}
+	return interceptor(ctx, in, info, handler)
+}
+
+// AIService_ServiceDesc is the grpc.ServiceDesc for AIService service.
+// It's only intended for direct use with grpc.RegisterService,
+// and not to be introspected or modified (even as a copy)
+var AIService_ServiceDesc = grpc.ServiceDesc{
+	ServiceName: "ai.v1.AIService",
+	HandlerType: (*AIServiceServer)(nil),
+	Methods: []grpc.MethodDesc{
+		{
+			MethodName: "Query",
+			Handler:    _AIService_Query_Handler,
+		},
+	},
+	Streams:  []grpc.StreamDesc{},
+	Metadata: "ai/v1/ai.proto",
+}
--- a/go.work
+++ b/go.work
@ -1,6 +1,7 @@
 go 1.26

 use (
+	./ai-gateway
 	./discord-bot
 	./gen
 	./ha-gateway
--- a/go.work.sum
+++ b/go.work.sum
@ -5,8 +5,6 @@ cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCB
 github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.25.0/go.mod h1:obipzmGjfSjam60XLwGfqUkJsfiheAl+TUjG+4yzyPM=
 github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0=
 github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
-github.com/bwmarrin/discordgo v0.29.0 h1:FmWeXFaKUwrcL3Cx65c20bTRW+vOb6k8AnaP+EgjDno=
-github.com/bwmarrin/discordgo v0.29.0/go.mod h1:NJZpH+1AfhIcyQsPeuBKsUtYrRnjkyu0kIVMCHkZtRY=
 github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8=
 github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI=
 github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
@ -18,6 +16,7 @@ github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9O
 github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4=
 github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU=
 github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA=
+github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
 github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08=
 github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
 github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
@ -25,8 +24,6 @@ github.com/golang/glog v1.2.4/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwm
 github.com/golang/glog v1.2.5/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w=
 github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
 github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
-github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc=
-github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
 github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
 github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8=
@ -39,6 +36,7 @@ go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJyS
 go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
 go.opentelemetry.io/contrib/detectors/gcp v1.34.0/go.mod h1:cV4BMFcscUR/ckqLkbfQmF0PRsq8w/lMGzdbCSveBHo=
 go.opentelemetry.io/contrib/detectors/gcp v1.39.0/go.mod h1:t/OGqzHBa5v6RHZwrDBJ2OirWc+4q/w2fTbLZwAKjTk=
+go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ=
 go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ=
 go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y=
 go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M=
@ -51,26 +49,18 @@ go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt
 go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc=
 go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4=
 go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE=
-golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
-golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU=
-golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0=
 golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
-golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
 golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8=
 golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk=
 golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
 golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA=
 golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
-golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
 golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
 golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
-golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg=
-golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
 golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM=
 golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY=
-golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
 golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 google.golang.org/genproto/googleapis/api v0.0.0-20250218202821-56aae31c358a h1:nwKuGPlUAt+aR+pcrkfFRrTU1BVrSmYyYMxYbUIVHr0=
--- a/plan.md
+++ b/plan.md
@ -0,0 +1,586 @@
+# ai-gateway — Implementation Plan
+
+This plan describes the implementation of a new Go microservice, `ai-gateway`, in the `home-services` monorepo (`gitea.nik4nao.com/nik/home-services`). It centralizes all AI/LLM logic behind a gRPC API so callers (`discord-bot`, `alexa-bridge`) remain thin transport adapters with zero AI knowledge.
+
+---
+
+## 1. Goals & Non-Goals
+
+### Goals
+- New gRPC service `ai-gateway` listening on `:50052`.
+- Owns **all** AI logic: Ollama connection, prompt construction, LLM intent parsing, dispatch to `ha-gateway`.
+- Callers send raw user text via `QueryRequest`; receive a human-readable reply in `QueryResponse`.
+- mTLS client authentication when calling `ha-gateway` (ha-gateway requires mTLS).
+- Hexagonal architecture, matching the existing `ha-gateway` layout.
+- Structured logging via `slog`, OTel OTLP gRPC traces/metrics.
+- Deployed to the `home-services` namespace on K3s.
+
+### Non-Goals
+- No auth on `ai-gateway`'s own inbound gRPC surface in this iteration (in-cluster only; match current `ha-gateway` posture).
+- No streaming responses — unary only.
+- No conversation memory — each `Query` is stateless.
+- No new Home Assistant features beyond what `ha-gateway` already exposes (LightService + EntityService).
+
+---
+
+## 2. Repository Layout
+
+All paths are relative to the `home-services` repo root.
+
+```
+proto/
+  ai/v1/ai.proto                          # NEW
+
+gen/
+  ai/v1/                                  # NEW (generated; committed)
+    ai.pb.go
+    ai_grpc.pb.go
+
+services/
+  ai-gateway/                             # NEW
+    go.mod
+    cmd/
+      ai-gateway/
+        main.go
+    config/
+      config.go
+    domain/
+      prompt.go
+      service.go
+      intent.go
+    adapters/
+      inbound/
+        grpc/
+          server.go
+      outbound/
+        ollama/
+          client.go
+        hagateway/
+          client.go
+    internal/
+      observability/
+        logging.go
+        otel.go
+    Dockerfile
+    .dockerignore
+  discord-bot/                            # MODIFIED
+    adapters/outbound/aigateway/client.go # NEW
+    (remove any direct Ollama code if present)
+```
+
+Also update:
+- `go.work` — add `./services/ai-gateway` and keep `replace` directive to `../gen`.
+- `buf.gen.yaml` / `buf.yaml` — include the new `ai/v1` proto package.
+
+---
+
+## 3. Proto Definition
+
+### File: `proto/ai/v1/ai.proto`
+
+```proto
+syntax = "proto3";
+
+package ai.v1;
+
+option go_package = "gitea.nik4nao.com/nik/home-services/gen/ai/v1;aiv1";
+
+// AIService accepts free-form natural language queries and returns a
+// human-readable reply. It encapsulates LLM prompting, intent parsing,
+// and dispatch to downstream services (e.g. ha-gateway).
+service AIService {
+  rpc Query(QueryRequest) returns (QueryResponse);
+}
+
+message QueryRequest {
+  // Raw user text, e.g. "turn on the living room light".
+  string text = 1;
+
+  // Optional caller identifier for logging/tracing (e.g. "discord-bot").
+  string source = 2;
+}
+
+message QueryResponse {
+  // Human-readable reply to show the user.
+  string reply = 1;
+
+  // Parsed intent name, if any. Empty if no actionable intent was detected.
+  string intent = 2;
+
+  // True if an action was dispatched to a downstream service.
+  bool action_taken = 3;
+}
+```
+
+### Generation
+- Run `buf generate` from repo root.
+- Commit `gen/ai/v1/*.pb.go` and `gen/ai/v1/*_grpc.pb.go` (per existing convention — `gen/` is committed to avoid CI codegen dependency).
+
+---
+
+## 4. Configuration (`services/ai-gateway/config/config.go`)
+
+Load from environment. Use `os.Getenv` with defaults (matches existing ha-gateway style — no new dep).
+
+| Env Var                       | Default                                               | Purpose                                          |
+| ----------------------------- | ----------------------------------------------------- | ------------------------------------------------ |
+| `GRPC_LISTEN_ADDR`            | `:50052`                                              | Inbound gRPC bind address                        |
+| `OLLAMA_URL`                  | `http://192.168.7.96:11434`                           | Ollama HTTP API (direct LAN IP; no K8s Service)  |
+| `OLLAMA_MODEL`                | `llama3`                                              | Model name                                       |
+| `OLLAMA_TIMEOUT`              | `30s`                                                 | HTTP timeout for Ollama calls                    |
+| `HA_GATEWAY_ADDR`             | `ha-gateway.home-services.svc.cluster.local:50051`    | ha-gateway gRPC endpoint                         |
+| `HA_GATEWAY_TLS_CA_FILE`      | `/etc/ai-gateway/tls/ca.crt`                          | CA cert that signed ha-gateway's server cert     |
+| `HA_GATEWAY_TLS_CERT_FILE`    | `/etc/ai-gateway/tls/tls.crt`                         | ai-gateway's client cert (for mTLS)              |
+| `HA_GATEWAY_TLS_KEY_FILE`     | `/etc/ai-gateway/tls/tls.key`                         | ai-gateway's client key                          |
+| `HA_GATEWAY_SERVER_NAME`      | `ha-gateway.home-services.svc.cluster.local`          | SNI / cert verification name                     |
+| `LOG_LEVEL`                   | `info`                                                | `debug`/`info`/`warn`/`error`                    |
+| `LOG_FORMAT`                  | `json`                                                | `json` or `text`                                 |
+| `OTEL_EXPORTER_OTLP_ENDPOINT` | `otel-collector-opentelemetry-collector.monitoring.svc.cluster.local:4317` | OTLP gRPC endpoint |
+| `OTEL_SERVICE_NAME`           | `ai-gateway`                                          | Service name for traces/metrics                  |
+
+Provide a `Config` struct with a `Load()` function returning `(Config, error)`. Validate required files exist at startup.
+
+---
+
+## 5. Domain Layer
+
+### `domain/intent.go`
+
+Define the intent contract the LLM must produce:
+
+```go
+package domain
+
+type Intent struct {
+    Name    string            `json:"intent"`   // e.g. "turn_on_light", "turn_off_light", "none"
+    Entity  string            `json:"entity"`   // e.g. "living_room" (friendly name or entity_id)
+    Params  map[string]string `json:"params"`   // optional, e.g. {"brightness":"80"}
+    Reply   string            `json:"reply"`    // what to say back to the user
+}
+
+const (
+    IntentNone         = "none"
+    IntentTurnOnLight  = "turn_on_light"
+    IntentTurnOffLight = "turn_off_light"
+    IntentListEntities = "list_entities"
+)
+```
+
+### `domain/prompt.go`
+
+Build the Ollama prompt. The system prompt MUST instruct the model to return **only** a single JSON object matching the `Intent` schema. No markdown fences, no prose.
+
+```go
+package domain
+
+import "fmt"
+
+const systemPrompt = `You are a home automation assistant. Given a user request, respond with a single JSON object and nothing else — no markdown, no code fences, no explanation.
+
+Schema:
+{
+  "intent": "turn_on_light" | "turn_off_light" | "list_entities" | "none",
+  "entity": "<friendly_name_or_empty>",
+  "params": { "<key>": "<value>" },
+  "reply":  "<short human-readable reply>"
+}
+
+Rules:
+- If the request is not actionable, use intent="none" and put the conversational answer in "reply".
+- Always include all four fields. Use "" or {} for empty values.
+- Do not wrap the JSON in backticks.`
+
+func BuildPrompt(userText string) string {
+    return fmt.Sprintf("%s\n\nUser: %s", systemPrompt, userText)
+}
+```
+
+### `domain/service.go`
+
+The orchestrator. Depends on two ports (interfaces) defined here:
+
+```go
+package domain
+
+import "context"
+
+type LLMClient interface {
+    Generate(ctx context.Context, prompt string) (string, error)
+}
+
+type HAClient interface {
+    TurnOnLight(ctx context.Context, entity string, params map[string]string) error
+    TurnOffLight(ctx context.Context, entity string) error
+    ListEntities(ctx context.Context) ([]string, error)
+}
+
+type Service struct {
+    llm LLMClient
+    ha  HAClient
+    log *slog.Logger
+}
+
+func NewService(llm LLMClient, ha HAClient, log *slog.Logger) *Service { /* ... */ }
+
+type QueryResult struct {
+    Reply       string
+    Intent      string
+    ActionTaken bool
+}
+
+func (s *Service) Query(ctx context.Context, text string) (QueryResult, error) {
+    // 1. BuildPrompt(text)
+    // 2. s.llm.Generate(ctx, prompt)
+    // 3. json.Unmarshal into Intent
+    //    - On unmarshal error: log at warn, return reply = "I didn't understand that."
+    // 4. switch intent.Name:
+    //      turn_on_light  -> s.ha.TurnOnLight(...)
+    //      turn_off_light -> s.ha.TurnOffLight(...)
+    //      list_entities  -> s.ha.ListEntities(...); format into reply
+    //      none / default -> reply = intent.Reply
+    // 5. Return QueryResult
+}
+```
+
+**Error handling:**
+- LLM call failure → return error; inbound adapter maps to gRPC `Unavailable`.
+- JSON parse failure → do NOT error; return a friendly "I didn't understand" reply and log the raw LLM output at `warn` with the original text (not error).
+- HA dispatch failure → log at `error`, return reply "I couldn't reach Home Assistant right now."; `ActionTaken=false`.
+
+---
+
+## 6. Outbound Adapters
+
+### `adapters/outbound/ollama/client.go`
+
+- Plain `net/http.Client` with configured timeout.
+- POST to `{OLLAMA_URL}/api/generate` with body:
+  ```json
+  { "model": "<OLLAMA_MODEL>", "prompt": "<prompt>", "stream": false }
+  ```
+- Decode JSON response, return the `response` field as a string.
+- Implement `domain.LLMClient`.
+- Wrap the HTTP client with OTel instrumentation (`otelhttp.NewTransport`).
+
+### `adapters/outbound/hagateway/client.go`
+
+This is the mTLS-critical piece.
+
+- Construct a `*grpc.ClientConn` to `HA_GATEWAY_ADDR` with TLS credentials built from the three cert files:
+  ```go
+  func loadTLSCredentials(caFile, certFile, keyFile, serverName string) (credentials.TransportCredentials, error) {
+      caPEM, err := os.ReadFile(caFile)
+      if err != nil { return nil, fmt.Errorf("read ca: %w", err) }
+      cp := x509.NewCertPool()
+      if !cp.AppendCertsFromPEM(caPEM) {
+          return nil, errors.New("failed to append CA cert")
+      }
+      clientCert, err := tls.LoadX509KeyPair(certFile, keyFile)
+      if err != nil { return nil, fmt.Errorf("load client keypair: %w", err) }
+      return credentials.NewTLS(&tls.Config{
+          Certificates: []tls.Certificate{clientCert},
+          RootCAs:      cp,
+          ServerName:   serverName,
+          MinVersion:   tls.VersionTLS13,
+      }), nil
+  }
+  ```
+- Use `grpc.NewClient(addr, grpc.WithTransportCredentials(creds), grpc.WithStatsHandler(otelgrpc.NewClientHandler()))`.
+- Wrap the generated ha-gateway clients (`LightServiceClient`, `EntityServiceClient`) to satisfy `domain.HAClient`.
+- Expose a `Close()` method for graceful shutdown.
+
+**Cert source:** the cert files will be projected into the pod via a Kubernetes `Secret` mounted at `/etc/ai-gateway/tls/`. See deployment manifest below. Issuing the cert is covered in §10.
+
+---
+
+## 7. Inbound Adapter
+
+### `adapters/inbound/grpc/server.go`
+
+- Implements `aiv1.AIServiceServer`.
+- `Query(ctx, req)` → calls `domain.Service.Query(ctx, req.Text)` → maps `QueryResult` to `QueryResponse`.
+- Attach OTel interceptor: `grpc.StatsHandler(otelgrpc.NewServerHandler())`.
+- Attach a slog unary interceptor that logs method, duration, caller `source`, and error code.
+- Register reflection service only if `LOG_LEVEL=debug` (convenience for `grpcurl`).
+
+---
+
+## 8. Observability (`internal/observability/`)
+
+Copy the pattern from `ha-gateway`:
+
+### `logging.go`
+- `NewLogger(level, format string) *slog.Logger` returning either `slog.NewJSONHandler` or `slog.NewTextHandler` wrapping `os.Stdout`.
+
+### `otel.go`
+- `InitOTel(ctx, endpoint, serviceName) (shutdown func(context.Context) error, err error)`.
+- Uses `otlptracegrpc` + `otlpmetricgrpc` exporters, insecure credentials (in-cluster).
+- Sets global `TracerProvider` and `MeterProvider`.
+- Resource attributes: `service.name`, `service.namespace=home-services`.
+
+---
+
+## 9. Entry Point (`cmd/ai-gateway/main.go`)
+
+Standard startup sequence:
+
+1. Load config.
+2. Build logger.
+3. Init OTel; defer shutdown.
+4. Build Ollama client.
+5. Build ha-gateway client (mTLS); defer `Close()`.
+6. Build domain service.
+7. Build gRPC server with interceptors, register `AIService`.
+8. Listen on `GRPC_LISTEN_ADDR`.
+9. Handle `SIGINT`/`SIGTERM` for graceful shutdown: `server.GracefulStop()` with a 10s timeout, then OTel shutdown.
+
+---
+
+## 10. TLS / mTLS Plumbing
+
+`ha-gateway` requires mTLS. `ai-gateway` needs a client certificate signed by the same CA that ha-gateway trusts.
+
+### Approach: cert-manager + internal-ca-issuer
+
+Create a `Certificate` resource for `ai-gateway` (file: `manifests/home-services/ai-gateway-client-cert.yaml`):
+
+```yaml
+apiVersion: cert-manager.io/v1
+kind: Certificate
+metadata:
+  name: ai-gateway-client
+  namespace: home-services
+spec:
+  secretName: ai-gateway-client-tls
+  duration: 2160h      # 90d
+  renewBefore: 360h    # 15d
+  subject:
+    organizations: [home-services]
+  commonName: ai-gateway
+  usages:
+    - client auth
+  issuerRef:
+    name: internal-ca-issuer
+    kind: ClusterIssuer
+    group: cert-manager.io
+```
+
+**Important:** use `internal-ca-issuer` (the CA issuer), **never** `internal-ca` (the bootstrap self-signed issuer). This matches the homelab convention.
+
+The resulting secret `ai-gateway-client-tls` contains `tls.crt`, `tls.key`, and `ca.crt` — mount all three.
+
+### Verify ha-gateway's CA trust
+Confirm ha-gateway's server TLS config trusts `internal-ca-issuer`'s CA (it should, since both use the same cluster CA). If ha-gateway uses a separate client-auth CA, adjust the issuer accordingly.
+
+---
+
+## 11. Kubernetes Manifest
+
+### File: `manifests/home-services/ai-gateway.yaml`
+
+Single file with `---` separators per repo convention.
+
+```yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: ai-gateway
+  namespace: home-services
+spec:
+  selector: { app: ai-gateway }
+  ports:
+    - name: grpc
+      port: 50052
+      targetPort: 50052
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: ai-gateway
+  namespace: home-services
+spec:
+  replicas: 1
+  selector: { matchLabels: { app: ai-gateway } }
+  template:
+    metadata:
+      labels: { app: ai-gateway }
+    spec:
+      containers:
+        - name: ai-gateway
+          image: gitea.nik4nao.com/nik/ai-gateway:latest
+          imagePullPolicy: Always
+          ports:
+            - containerPort: 50052
+              name: grpc
+          env:
+            - { name: GRPC_LISTEN_ADDR, value: ":50052" }
+            - { name: OLLAMA_URL, value: "http://192.168.7.96:11434" }
+            - { name: OLLAMA_MODEL, value: "llama3" }
+            - { name: HA_GATEWAY_ADDR, value: "ha-gateway.home-services.svc.cluster.local:50051" }
+            - { name: HA_GATEWAY_TLS_CA_FILE,   value: "/etc/ai-gateway/tls/ca.crt" }
+            - { name: HA_GATEWAY_TLS_CERT_FILE, value: "/etc/ai-gateway/tls/tls.crt" }
+            - { name: HA_GATEWAY_TLS_KEY_FILE,  value: "/etc/ai-gateway/tls/tls.key" }
+            - { name: HA_GATEWAY_SERVER_NAME,   value: "ha-gateway.home-services.svc.cluster.local" }
+            - { name: LOG_LEVEL,  value: "info" }
+            - { name: LOG_FORMAT, value: "json" }
+            - { name: OTEL_EXPORTER_OTLP_ENDPOINT,
+                value: "otel-collector-opentelemetry-collector.monitoring.svc.cluster.local:4317" }
+            - { name: OTEL_SERVICE_NAME, value: "ai-gateway" }
+          volumeMounts:
+            - name: tls
+              mountPath: /etc/ai-gateway/tls
+              readOnly: true
+          readinessProbe:
+            tcpSocket: { port: 50052 }
+            initialDelaySeconds: 3
+            periodSeconds: 10
+          livenessProbe:
+            tcpSocket: { port: 50052 }
+            initialDelaySeconds: 10
+            periodSeconds: 20
+      volumes:
+        - name: tls
+          secret:
+            secretName: ai-gateway-client-tls
+      imagePullSecrets:
+        - name: gitea-registry
+```
+
+No resource `limits`/`requests` yet — matches current repo convention (memory limits not yet enforced on pods).
+
+---
+
+## 12. discord-bot Changes
+
+### New: `services/discord-bot/adapters/outbound/aigateway/client.go`
+- gRPC client to `ai-gateway.home-services.svc.cluster.local:50052`, **plaintext** (no auth on ai-gateway's inbound surface yet).
+- Exposes `Query(ctx, text string) (reply string, err error)`.
+- Inject into existing command handler.
+
+### Removed / simplified
+- If `discord-bot` currently contains any direct Ollama calls, remove them.
+- Slash command handler for free-form queries simply calls `aigateway.Query(ctx, msg.Content)` and posts the returned reply.
+- Event-notification path (existing Discord → notify flow) is untouched.
+
+### Config additions to discord-bot
+- `AI_GATEWAY_ADDR` (default `ai-gateway.home-services.svc.cluster.local:50052`).
+
+---
+
+## 13. CI / Build
+
+### `services/ai-gateway/Dockerfile`
+Multi-stage build matching existing services:
+
+```dockerfile
+FROM golang:1.26 AS build
+WORKDIR /src
+COPY go.work go.work.sum ./
+COPY gen ./gen
+COPY services/ai-gateway ./services/ai-gateway
+WORKDIR /src/services/ai-gateway
+RUN CGO_ENABLED=0 go build -o /out/ai-gateway ./cmd/ai-gateway
+
+FROM gcr.io/distroless/static-debian12:nonroot
+COPY --from=build /out/ai-gateway /ai-gateway
+USER nonroot:nonroot
+ENTRYPOINT ["/ai-gateway"]
+```
+
+### Gitea Actions workflow
+Mirror the existing `ha-gateway` workflow:
+- Trigger on pushes touching `services/ai-gateway/**`, `gen/ai/**`, or `proto/ai/**`.
+- `docker buildx` multiarch build (`linux/amd64,linux/arm64`).
+- Push to `gitea.nik4nao.com/nik/ai-gateway:latest` and `:${{ github.sha }}`.
+- Use the Gitea API token (`read:package` + `write:package`) as registry password — **not** the account password.
+- Remember: buildkit CA must be injected each run (existing runner pattern).
+
+---
+
+## 14. Workspace Wiring
+
+### `go.work` — add line:
+```
+use ./services/ai-gateway
+```
+Keep the existing `replace gitea.nik4nao.com/nik/home-services/gen => ../gen` in `services/ai-gateway/go.mod`.
+
+### `services/ai-gateway/go.mod` dependencies
+- `google.golang.org/grpc`
+- `google.golang.org/protobuf`
+- `go.opentelemetry.io/otel`
+- `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc`
+- `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc`
+- `go.opentelemetry.io/otel/sdk`
+- `go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc`
+- `go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp`
+
+---
+
+## 15. Testing
+
+### Unit tests (`services/ai-gateway/domain/service_test.go`)
+- Fake `LLMClient` returning canned JSON strings for each intent.
+- Fake `HAClient` recording calls.
+- Assert:
+  - Valid `turn_on_light` JSON → `HAClient.TurnOnLight` called with correct entity; reply matches.
+  - Invalid JSON → graceful reply, no panic, no HA call.
+  - `intent="none"` → no HA call; reply passed through.
+  - HA call returning error → reply contains "couldn't reach Home Assistant"; `ActionTaken=false`.
+
+### Integration smoke test (manual, post-deploy)
+```bash
+# From inside the cluster:
+grpcurl -plaintext -d '{"text":"turn on the living room light","source":"manual"}' \
+  ai-gateway.home-services.svc.cluster.local:50052 ai.v1.AIService/Query
+```
+
+### mTLS verification
+```bash
+# Should succeed (using mounted cert):
+kubectl exec -n home-services deploy/ai-gateway -- /ai-gateway --selftest  # if implemented
+# Or inspect via openssl from within the pod if distroless allows a debug sidecar.
+```
+
+---
+
+## 16. Rollout Order
+
+Implement in this order. Each step should compile and tests should pass before the next.
+
+1. **Proto + gen** — add `proto/ai/v1/ai.proto`, run `buf generate`, commit `gen/ai/v1/`.
+2. **Scaffold** — create `services/ai-gateway/` with `go.mod`, `main.go` (stub), update `go.work`.
+3. **Domain** — `intent.go`, `prompt.go`, `service.go` + unit tests with fakes.
+4. **Ollama adapter** — HTTP client, manual curl-based validation against `192.168.7.96:11434`.
+5. **ha-gateway adapter** — mTLS dial, wrap generated clients, satisfy `domain.HAClient`.
+6. **Inbound gRPC adapter** — server, interceptors.
+7. **Observability** — logging + OTel init.
+8. **Entry point** — wire everything in `cmd/ai-gateway/main.go`.
+9. **Dockerfile + CI** — build and push image to Gitea registry.
+10. **Cert-manager Certificate** — apply `ai-gateway-client-cert.yaml`; verify `ai-gateway-client-tls` secret is created.
+11. **Deployment manifest** — apply `ai-gateway.yaml`; verify pod ready, logs clean, `grpcurl` smoke test passes.
+12. **discord-bot update** — add `aigateway` outbound adapter, remove any direct Ollama usage, redeploy.
+13. **End-to-end test** — issue a Discord slash command, observe:
+    - Discord → ai-gateway → Ollama → ai-gateway → ha-gateway (mTLS) → HA → reply back.
+    - Traces visible in Tempo, logs in Loki, metrics in Prometheus.
+
+---
+
+## 17. Open Questions / Deferred
+
+- **Auth on ai-gateway's inbound surface:** currently none. Revisit when `alexa-bridge` lands — Alexa path is public-ingress, so ai-gateway may eventually need mTLS inbound too.
+- **Intent schema evolution:** if the set of intents grows meaningfully, consider moving the schema into the proto (enum + oneof) rather than free-form JSON. For now, JSON keeps the LLM prompt simple.
+- **Conversation memory:** out of scope. If needed later, add a per-`source` session store (Valkey in `home-services`).
+- **Prompt templates per model:** `llama3` works with the current system prompt. If swapping to a smaller model, prompt may need tuning — keep `BuildPrompt` easy to override via config.
+
+---
+
+## 18. Acceptance Criteria
+
+- [ ] `ai-gateway` pod runs ready in `home-services` namespace.
+- [ ] `grpcurl` smoke test (§15) returns a structured `QueryResponse` for a light command.
+- [ ] Light actually turns on/off in Home Assistant when tested end-to-end.
+- [ ] ha-gateway logs show mTLS handshake succeeded with CN=`ai-gateway`.
+- [ ] Traces for a full Discord query show three spans: `discord-bot` → `ai-gateway` → `ha-gateway`.
+- [ ] `discord-bot` contains no direct references to `OLLAMA_URL` or Ollama HTTP client code.
+- [ ] Unit tests pass in CI; Docker image builds multiarch.
--- a/proto/ai/v1/ai.proto
+++ b/proto/ai/v1/ai.proto
@ -0,0 +1,20 @@
+syntax = "proto3";
+
+package ai.v1;
+
+option go_package = "gitea.nik4nao.com/nik/home-services/gen/ai/v1;aiv1";
+
+service AIService {
+  rpc Query(QueryRequest) returns (QueryResponse);
+}
+
+message QueryRequest {
+  string text = 1;
+  string source = 2;
+}
+
+message QueryResponse {
+  string reply = 1;
+  string intent = 2;
+  bool action_taken = 3;
+}