diff --git a/.gitea/workflows/ci.yaml b/.gitea/workflows/ci.yaml index ecf2271..85f5fe4 100644 --- a/.gitea/workflows/ci.yaml +++ b/.gitea/workflows/ci.yaml @@ -28,6 +28,7 @@ jobs: go-version-file: go.work cache-dependency-path: | go.work.sum + ai-gateway/go.sum ha-gateway/go.sum discord-bot/go.sum gen/go.sum @@ -35,15 +36,42 @@ jobs: - name: go vet run: | cd gen && go vet ./... + cd ../ai-gateway && go vet ./... cd ../ha-gateway && go vet ./... cd ../discord-bot && go vet ./... - name: go test run: | cd gen && go test ./... + cd ../ai-gateway && go test ./... cd ../ha-gateway && go test ./... cd ../discord-bot && go test ./... + build-ai-gateway: + needs: test + if: github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v6 + + - uses: docker/setup-buildx-action@v4 + + - uses: docker/login-action@v4 + with: + registry: ${{ env.REGISTRY }} + username: ${{ secrets.REGISTRY_USER }} + password: ${{ secrets.REGISTRY_PASSWORD }} + + - uses: docker/build-push-action@v7 + with: + context: . + file: ai-gateway/Dockerfile + push: true + platforms: linux/amd64 + tags: | + ${{ env.IMAGE_PREFIX }}/ai-gateway:${{ github.sha }} + ${{ env.IMAGE_PREFIX }}/ai-gateway:latest + build-ha-gateway: needs: test if: github.ref == 'refs/heads/main' diff --git a/ai-gateway/.dockerignore b/ai-gateway/.dockerignore new file mode 100644 index 0000000..cd5f46f --- /dev/null +++ b/ai-gateway/.dockerignore @@ -0,0 +1,3 @@ +.env +.git +*.log diff --git a/ai-gateway/.env.example b/ai-gateway/.env.example new file mode 100644 index 0000000..b28856a --- /dev/null +++ b/ai-gateway/.env.example @@ -0,0 +1,11 @@ +GRPC_PORT=50052 +OLLAMA_URL=http://192.168.7.96:11434 +OLLAMA_MODEL=llama3 +OLLAMA_TIMEOUT=30s +HA_GATEWAY_ADDR=localhost:50051 +HA_GATEWAY_SERVER_NAME=ha-gateway.home-services.svc.cluster.local +TLS_DIR= +OTEL_ENDPOINT= +LOG_LEVEL=info +LOG_FORMAT=text +LIGHT_CACHE_TTL=60s diff --git a/ai-gateway/Dockerfile b/ai-gateway/Dockerfile new file mode 100644 index 0000000..e8a7bc5 --- /dev/null +++ b/ai-gateway/Dockerfile @@ -0,0 +1,21 @@ +FROM golang:1.26-alpine AS builder +WORKDIR /workspace + +COPY go.work go.work.sum ./ +COPY gen/ ./gen/ +COPY ha-gateway/ ./ha-gateway/ +COPY discord-bot/ ./discord-bot/ +COPY ai-gateway/ ./ai-gateway/ + +WORKDIR /workspace/ai-gateway +RUN go mod download + +ARG VERSION=dev +RUN CGO_ENABLED=0 GOOS=linux go build \ + -ldflags="-s -w -X main.version=${VERSION}" \ + -o /ai-gateway ./cmd/gateway + +FROM gcr.io/distroless/static:nonroot +COPY --from=builder /ai-gateway /ai-gateway +EXPOSE 50052 +ENTRYPOINT ["/ai-gateway"] diff --git a/ai-gateway/cmd/gateway/main.go b/ai-gateway/cmd/gateway/main.go new file mode 100644 index 0000000..357c964 --- /dev/null +++ b/ai-gateway/cmd/gateway/main.go @@ -0,0 +1,160 @@ +package main + +import ( + "context" + "crypto/tls" + "crypto/x509" + "fmt" + "log/slog" + "net" + "net/http" + "os" + "os/signal" + "path/filepath" + "syscall" + + "github.com/joho/godotenv" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/health" + grpc_health_v1 "google.golang.org/grpc/health/grpc_health_v1" + "google.golang.org/grpc/reflection" + + aigrpc "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/adapters/primary/grpc" + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/adapters/secondary/hagateway" + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/adapters/secondary/ollama" + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/app" + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/config" + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/domain" + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/logger" + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/telemetry" + aiv1 "gitea.nik4nao.com/nik/home-services/gen/ai/v1" +) + +// version is set at build time via -ldflags "-X main.version=". +var version = "dev" + +func main() { + _ = godotenv.Load() + + cfg, err := config.Load() + if err != nil { + os.Stderr.WriteString("config error: " + err.Error() + "\n") + os.Exit(1) + } + + log := logger.New(cfg.LogFormat, cfg.LogLevel) + slog.SetDefault(log) + log.Info("starting ai-gateway", + "version", version, + "grpc_port", cfg.GRPCPort, + "ollama_url", cfg.OllamaURL, + "ollama_model", cfg.OllamaModel, + "ha_gateway_addr", cfg.HAGatewayAddr, + "tls_dir", cfg.TLSDir, + "otel_endpoint", cfg.OTELEndpoint, + "light_cache_ttl", cfg.LightCacheTTL.String(), + ) + + ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGTERM, syscall.SIGINT) + defer stop() + ctx = logger.WithLogger(ctx, log) + + shutdown, err := telemetry.Setup(ctx, "ai-gateway", version, cfg) + if err != nil { + log.Error("telemetry setup failed", "err", err) + os.Exit(1) + } + + ollamaClient := ollama.New(cfg.OllamaURL, cfg.OllamaModel, &http.Client{Timeout: cfg.OllamaTimeout}) + haClient, err := hagateway.New(ctx, cfg.HAGatewayAddr, cfg.TLSDir, cfg.HAGatewayServerName, log) + if err != nil { + log.Error("ha-gateway client setup failed", "err", err) + os.Exit(1) + } + defer func() { + if err := haClient.Close(); err != nil { + log.Error("ha-gateway client close failed", "err", err) + } + }() + + lightCache := domain.NewLightCache(cfg.LightCacheTTL, haClient.ListLights) + queryApp := app.NewQueryApp(ollamaClient, haClient, lightCache, log) + + serverOpts := []grpc.ServerOption{ + grpc.StatsHandler(otelgrpc.NewServerHandler()), + grpc.ChainUnaryInterceptor(aigrpc.LoggingUnaryInterceptor(log)), + } + if cfg.TLSDir != "" { + creds, err := loadServerCredentials(cfg.TLSDir) + if err != nil { + log.Error("load mTLS credentials failed", "tls_dir", cfg.TLSDir, "err", err) + os.Exit(1) + } + serverOpts = append(serverOpts, grpc.Creds(creds)) + log.Info("mTLS enabled", "tls_dir", cfg.TLSDir) + } else { + log.Info("mTLS disabled") + } + + srv := grpc.NewServer(serverOpts...) + healthSrv := health.NewServer() + healthSrv.SetServingStatus("", grpc_health_v1.HealthCheckResponse_SERVING) + + aiv1.RegisterAIServiceServer(srv, aigrpc.NewServer(queryApp)) + grpc_health_v1.RegisterHealthServer(srv, healthSrv) + if cfg.LogLevel == "debug" { + reflection.Register(srv) + } + + lis, err := net.Listen("tcp", ":"+cfg.GRPCPort) + if err != nil { + log.Error("listen failed", "err", err) + os.Exit(1) + } + + go func() { + log.Info("ai-gateway listening", "addr", lis.Addr().String()) + if err := srv.Serve(lis); err != nil { + log.Error("serve failed", "err", err) + } + }() + + <-ctx.Done() + log.Info("shutdown signal received, draining") + healthSrv.SetServingStatus("", grpc_health_v1.HealthCheckResponse_NOT_SERVING) + srv.GracefulStop() + log.Info("shutdown complete") + + if err := shutdown(context.Background()); err != nil { + log.Error("telemetry shutdown error", "err", err) + } +} + +func loadServerCredentials(tlsDir string) (credentials.TransportCredentials, error) { + cert, err := tls.LoadX509KeyPair( + filepath.Join(tlsDir, "tls.crt"), + filepath.Join(tlsDir, "tls.key"), + ) + if err != nil { + return nil, fmt.Errorf("load server key pair: %w", err) + } + + caPEM, err := os.ReadFile(filepath.Join(tlsDir, "ca.crt")) + if err != nil { + return nil, fmt.Errorf("read client CA: %w", err) + } + + clientCAs := x509.NewCertPool() + if !clientCAs.AppendCertsFromPEM(caPEM) { + return nil, fmt.Errorf("append client CA: invalid PEM") + } + + return credentials.NewTLS(&tls.Config{ + Certificates: []tls.Certificate{cert}, + ClientCAs: clientCAs, + ClientAuth: tls.RequireAndVerifyClientCert, + MinVersion: tls.VersionTLS13, + }), nil +} diff --git a/ai-gateway/go.mod b/ai-gateway/go.mod new file mode 100644 index 0000000..c744dcc --- /dev/null +++ b/ai-gateway/go.mod @@ -0,0 +1,39 @@ +module gitea.nik4nao.com/nik/home-services/ai-gateway + +go 1.26 + +require ( + gitea.nik4nao.com/nik/home-services/gen v0.0.0 + github.com/joho/godotenv v1.5.1 + go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0 + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 + go.opentelemetry.io/otel v1.39.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.35.0 + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0 + go.opentelemetry.io/otel/metric v1.39.0 + go.opentelemetry.io/otel/sdk v1.39.0 + go.opentelemetry.io/otel/sdk/metric v1.39.0 + go.opentelemetry.io/otel/trace v1.39.0 + google.golang.org/grpc v1.79.3 +) + +require ( + github.com/cenkalti/backoff/v4 v4.3.0 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/go-logr/logr v1.4.3 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 // indirect + go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0 // indirect + go.opentelemetry.io/proto/otlp v1.5.0 // indirect + golang.org/x/net v0.48.0 // indirect + golang.org/x/sys v0.39.0 // indirect + golang.org/x/text v0.32.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect + google.golang.org/protobuf v1.36.11 // indirect +) + +replace gitea.nik4nao.com/nik/home-services/gen => ../gen diff --git a/ai-gateway/go.sum b/ai-gateway/go.sum new file mode 100644 index 0000000..568f716 --- /dev/null +++ b/ai-gateway/go.sum @@ -0,0 +1,71 @@ +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= +github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1 h1:e9Rjr40Z98/clHv5Yg79Is0NtosR5LXRvdr7o/6NwbA= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.1/go.mod h1:tIxuGz/9mpox++sgp9fJjHO0+q1X9/UOWd798aAm22M= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= +go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0 h1:x7wzEgXfnzJcHDwStJT+mxOz4etr2EcexjqhBvmoakw= +go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.60.0/go.mod h1:rg+RlpR5dKwaS95IyyZqj5Wd4E13lk/msnTS0Xl9lJM= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0 h1:sbiXRNDSWJOTobXh5HyQKjq6wUC5tNybqjIqDpAY4CU= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ= +go.opentelemetry.io/otel v1.39.0 h1:8yPrr/S0ND9QEfTfdP9V+SiwT4E0G7Y5MO7p85nis48= +go.opentelemetry.io/otel v1.39.0/go.mod h1:kLlFTywNWrFyEdH0oj2xK0bFYZtHRYUdv1NklR/tgc8= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.35.0 h1:QcFwRrZLc82r8wODjvyCbP7Ifp3UANaBSmhDSFjnqSc= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.35.0/go.mod h1:CXIWhUomyWBG/oY2/r/kLp6K/cmx9e/7DLpBuuGdLCA= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0 h1:1fTNlAIJZGWLP5FVu0fikVry1IsiUnXjf7QFvoNN3Xw= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.35.0/go.mod h1:zjPK58DtkqQFn+YUMbx0M2XV3QgKU0gS9LeGohREyK4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0 h1:m639+BofXTvcY1q8CGs4ItwQarYtJPOWmVobfM1HpVI= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.35.0/go.mod h1:LjReUci/F4BUyv+y4dwnq3h/26iNOeC3wAIqgvTIZVo= +go.opentelemetry.io/otel/metric v1.39.0 h1:d1UzonvEZriVfpNKEVmHXbdf909uGTOQjA0HF0Ls5Q0= +go.opentelemetry.io/otel/metric v1.39.0/go.mod h1:jrZSWL33sD7bBxg1xjrqyDjnuzTUB0x1nBERXd7Ftcs= +go.opentelemetry.io/otel/sdk v1.39.0 h1:nMLYcjVsvdui1B/4FRkwjzoRVsMK8uL/cj0OyhKzt18= +go.opentelemetry.io/otel/sdk v1.39.0/go.mod h1:vDojkC4/jsTJsE+kh+LXYQlbL8CgrEcwmt1ENZszdJE= +go.opentelemetry.io/otel/sdk/metric v1.39.0 h1:cXMVVFVgsIf2YL6QkRF4Urbr/aMInf+2WKg+sEJTtB8= +go.opentelemetry.io/otel/sdk/metric v1.39.0/go.mod h1:xq9HEVH7qeX69/JnwEfp6fVq5wosJsY1mt4lLfYdVew= +go.opentelemetry.io/otel/trace v1.39.0 h1:2d2vfpEDmCJ5zVYz7ijaJdOF59xLomrvj7bjt6/qCJI= +go.opentelemetry.io/otel/trace v1.39.0/go.mod h1:88w4/PnZSazkGzz/w84VHpQafiU4EtqqlVdxWy+rNOA= +go.opentelemetry.io/proto/otlp v1.5.0 h1:xJvq7gMzB31/d406fB8U5CBdyQGw4P399D1aQWU/3i4= +go.opentelemetry.io/proto/otlp v1.5.0/go.mod h1:keN8WnHxOy8PG0rQZjJJ5A2ebUoafqWp0eVQ4yIXvJ4= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +golang.org/x/net v0.48.0 h1:zyQRTTrjc33Lhh0fBgT/H3oZq9WuvRR5gPC70xpDiQU= +golang.org/x/net v0.48.0/go.mod h1:+ndRgGjkh8FGtu1w1FGbEC31if4VrNVMuKTgcAAnQRY= +golang.org/x/sys v0.39.0 h1:CvCKL8MeisomCi6qNZ+wbb0DN9E5AATixKsvNtMoMFk= +golang.org/x/sys v0.39.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/text v0.32.0 h1:ZD01bjUt1FQ9WJ0ClOL5vxgxOI/sVCNgX1YtKwcY0mU= +golang.org/x/text v0.32.0/go.mod h1:o/rUWzghvpD5TXrTIBuJU77MTaN0ljMWE47kxGJQ7jY= +gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= +gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= +google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217 h1:fCvbg86sFXwdrl5LgVcTEvNC+2txB5mgROGmRL5mrls= +google.golang.org/genproto/googleapis/api v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:+rXWjjaukWZun3mLfjmVnQi18E1AsFbDN9QdJ5YXLto= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 h1:gRkg/vSppuSQoDjxyiGfN4Upv/h/DQmIR10ZU8dh4Ww= +google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217/go.mod h1:7i2o+ce6H/6BluujYR+kqX3GKH+dChPTQU19wjRPiGk= +google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE= +google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ= +google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE= +google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/ai-gateway/internal/adapters/primary/grpc/interceptor.go b/ai-gateway/internal/adapters/primary/grpc/interceptor.go new file mode 100644 index 0000000..f2523ab --- /dev/null +++ b/ai-gateway/internal/adapters/primary/grpc/interceptor.go @@ -0,0 +1,61 @@ +package grpc + +import ( + "context" + "log/slog" + "time" + + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/logger" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/peer" + "google.golang.org/grpc/status" +) + +// LoggingUnaryInterceptor logs one completion record for each unary gRPC call. +func LoggingUnaryInterceptor(log *slog.Logger) grpc.UnaryServerInterceptor { + return func(ctx context.Context, req any, info *grpc.UnaryServerInfo, handler grpc.UnaryHandler) (any, error) { + method, ok := grpc.Method(ctx) + if !ok { + method = info.FullMethod + } + reqLog := requestLogger(ctx, log, method, req) + ctx = logger.WithLogger(ctx, reqLog) + + start := time.Now() + resp, err := handler(ctx, req) + logCompletion(reqLog, "grpc call completed", status.Code(err), time.Since(start), err) + return resp, err + } +} + +func requestLogger(ctx context.Context, log *slog.Logger, method string, req any) *slog.Logger { + peerAddr := "" + if p, ok := peer.FromContext(ctx); ok && p.Addr != nil { + peerAddr = p.Addr.String() + } + source := "" + if r, ok := req.(interface{ GetSource() string }); ok { + source = r.GetSource() + } + return log.With("grpc.method", method, "grpc.peer", peerAddr, "source", source) +} + +func logCompletion(log *slog.Logger, msg string, code codes.Code, duration time.Duration, err error) { + attrs := []any{ + "duration_ms", duration.Milliseconds(), + "grpc.code", code.String(), + } + if err != nil { + attrs = append(attrs, "error", err.Error()) + } + + switch code { + case codes.OK: + log.Info(msg, attrs...) + case codes.InvalidArgument, codes.NotFound, codes.Unimplemented: + log.Warn(msg, attrs...) + default: + log.Error(msg, attrs...) + } +} diff --git a/ai-gateway/internal/adapters/primary/grpc/server.go b/ai-gateway/internal/adapters/primary/grpc/server.go new file mode 100644 index 0000000..147ce36 --- /dev/null +++ b/ai-gateway/internal/adapters/primary/grpc/server.go @@ -0,0 +1,45 @@ +package grpc + +import ( + "context" + + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/app" + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/logger" + aiv1 "gitea.nik4nao.com/nik/home-services/gen/ai/v1" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" +) + +// Server adapts the AI query app to the gRPC transport. +type Server struct { + aiv1.UnimplementedAIServiceServer + app *app.QueryApp +} + +// NewServer constructs the AIService gRPC adapter. +func NewServer(app *app.QueryApp) *Server { + return &Server{app: app} +} + +// Query handles one AI query request. +func (s *Server) Query(ctx context.Context, req *aiv1.QueryRequest) (*aiv1.QueryResponse, error) { + if req.GetText() == "" { + return nil, status.Error(codes.InvalidArgument, "text is required") + } + + log := logger.FromContext(ctx) + if req.GetSource() != "" { + log = log.With("source", req.GetSource()) + ctx = logger.WithLogger(ctx, log) + } + + result, err := s.app.Query(ctx, req.GetText()) + if err != nil { + return nil, status.Errorf(codes.Unavailable, "query failed: %v", err) + } + return &aiv1.QueryResponse{ + Reply: result.Reply, + Intent: result.Intent, + ActionTaken: result.ActionTaken, + }, nil +} diff --git a/ai-gateway/internal/adapters/secondary/hagateway/client.go b/ai-gateway/internal/adapters/secondary/hagateway/client.go new file mode 100644 index 0000000..c008cd6 --- /dev/null +++ b/ai-gateway/internal/adapters/secondary/hagateway/client.go @@ -0,0 +1,162 @@ +package hagateway + +import ( + "context" + "crypto/tls" + "crypto/x509" + "fmt" + "log/slog" + "os" + "path/filepath" + "strconv" + "time" + + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/ports/driven" + hav1 "gitea.nik4nao.com/nik/home-services/gen/ha/v1" + "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" + "google.golang.org/grpc" + "google.golang.org/grpc/credentials" + "google.golang.org/grpc/credentials/insecure" +) + +// Client implements the HA driven port over gRPC. +type Client struct { + conn *grpc.ClientConn + lightClient hav1.LightServiceClient + log *slog.Logger +} + +// New constructs a gRPC client for ha-gateway with optional mTLS. +func New(ctx context.Context, addr, tlsDir, serverName string, log *slog.Logger) (*Client, error) { + transportCreds := insecure.NewCredentials() + if tlsDir != "" { + creds, err := loadTransportCredentials(tlsDir, serverName) + if err != nil { + return nil, fmt.Errorf("load mTLS credentials: %w", err) + } + transportCreds = creds + } + + conn, err := grpc.NewClient( + addr, + grpc.WithTransportCredentials(transportCreds), + grpc.WithStatsHandler(otelgrpc.NewClientHandler()), + ) + if err != nil { + return nil, fmt.Errorf("dial ha-gateway: %w", err) + } + + return &Client{ + conn: conn, + lightClient: hav1.NewLightServiceClient(conn), + log: log, + }, nil +} + +// Close closes the underlying gRPC connection. +func (c *Client) Close() error { + if err := c.conn.Close(); err != nil { + return fmt.Errorf("close ha-gateway client: %w", err) + } + return nil +} + +// ListLights loads the discovery list used by prompt-building and list replies. +func (c *Client) ListLights(ctx context.Context) ([]driven.Light, error) { + start := time.Now() + resp, err := c.lightClient.ListLights(ctx, &hav1.ListLightsRequest{}) + if err != nil { + return nil, fmt.Errorf("list lights: %w", err) + } + c.log.Debug("grpc call completed", "grpc.method", "LightService/ListLights", "duration_ms", time.Since(start).Milliseconds()) + + lights := make([]driven.Light, 0, len(resp.GetLights())) + for _, light := range resp.GetLights() { + lights = append(lights, driven.Light{ + EntityID: light.GetEntityId(), + FriendlyName: light.GetFriendlyName(), + State: light.GetState(), + }) + } + return lights, nil +} + +// TurnOnLight forwards a turn-on request to ha-gateway. +func (c *Client) TurnOnLight(ctx context.Context, entity string, params map[string]string) error { + start := time.Now() + req := &hav1.TurnOnRequest{EntityId: entity} + if v, ok := firstParam(params, "brightness", "brightness_pct"); ok { + brightness, err := parseUint32(v) + if err != nil { + return err + } + req.BrightnessPct = &brightness + } + if v, ok := firstParam(params, "color_temp", "color_temp_kelvin"); ok { + colorTemp, err := parseUint32(v) + if err != nil { + return err + } + req.ColorTempKelvin = &colorTemp + } + + if _, err := c.lightClient.TurnOn(ctx, req); err != nil { + return fmt.Errorf("turn on light %s: %w", entity, err) + } + c.log.Debug("grpc call completed", "grpc.method", "LightService/TurnOn", "duration_ms", time.Since(start).Milliseconds()) + return nil +} + +// TurnOffLight forwards a turn-off request to ha-gateway. +func (c *Client) TurnOffLight(ctx context.Context, entity string) error { + start := time.Now() + if _, err := c.lightClient.TurnOff(ctx, &hav1.TurnOffRequest{EntityId: entity}); err != nil { + return fmt.Errorf("turn off light %s: %w", entity, err) + } + c.log.Debug("grpc call completed", "grpc.method", "LightService/TurnOff", "duration_ms", time.Since(start).Milliseconds()) + return nil +} + +func loadTransportCredentials(tlsDir, serverName string) (credentials.TransportCredentials, error) { + cert, err := tls.LoadX509KeyPair( + filepath.Join(tlsDir, "tls.crt"), + filepath.Join(tlsDir, "tls.key"), + ) + if err != nil { + return nil, fmt.Errorf("load client key pair: %w", err) + } + + caPEM, err := os.ReadFile(filepath.Join(tlsDir, "ca.crt")) + if err != nil { + return nil, fmt.Errorf("read server CA: %w", err) + } + + rootCAs := x509.NewCertPool() + if !rootCAs.AppendCertsFromPEM(caPEM) { + return nil, fmt.Errorf("append server CA: invalid PEM") + } + + return credentials.NewTLS(&tls.Config{ + Certificates: []tls.Certificate{cert}, + RootCAs: rootCAs, + ServerName: serverName, + MinVersion: tls.VersionTLS13, + }), nil +} + +func firstParam(params map[string]string, keys ...string) (string, bool) { + for _, key := range keys { + if v, ok := params[key]; ok { + return v, true + } + } + return "", false +} + +func parseUint32(v string) (uint32, error) { + n, err := strconv.ParseUint(v, 10, 32) + if err != nil { + return 0, fmt.Errorf("parse uint32 value %q: %w", v, err) + } + return uint32(n), nil +} diff --git a/ai-gateway/internal/adapters/secondary/ollama/client.go b/ai-gateway/internal/adapters/secondary/ollama/client.go new file mode 100644 index 0000000..35a9b08 --- /dev/null +++ b/ai-gateway/internal/adapters/secondary/ollama/client.go @@ -0,0 +1,73 @@ +package ollama + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "net/http" + + "go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp" +) + +type generateRequest struct { + Model string `json:"model"` + Prompt string `json:"prompt"` + Stream bool `json:"stream"` +} + +type generateResponse struct { + Response string `json:"response"` +} + +// Client implements the LLM driven port with the Ollama generate API. +type Client struct { + baseURL string + model string + http *http.Client +} + +// New constructs an Ollama client with OTel-instrumented transport. +func New(baseURL, model string, httpClient *http.Client) *Client { + if httpClient == nil { + httpClient = &http.Client{Transport: otelhttp.NewTransport(http.DefaultTransport)} + } + if httpClient.Transport == nil { + httpClient.Transport = otelhttp.NewTransport(http.DefaultTransport) + } + return &Client{baseURL: baseURL, model: model, http: httpClient} +} + +// Generate sends one non-streaming prompt to Ollama. +func (c *Client) Generate(ctx context.Context, prompt string) (string, error) { + body, err := json.Marshal(generateRequest{ + Model: c.model, + Prompt: prompt, + Stream: false, + }) + if err != nil { + return "", fmt.Errorf("marshal ollama request: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, http.MethodPost, c.baseURL+"/api/generate", bytes.NewReader(body)) + if err != nil { + return "", fmt.Errorf("build ollama request: %w", err) + } + req.Header.Set("Content-Type", "application/json") + + resp, err := c.http.Do(req) + if err != nil { + return "", fmt.Errorf("call ollama: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return "", fmt.Errorf("ollama returned status %s", resp.Status) + } + + var out generateResponse + if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { + return "", fmt.Errorf("decode ollama response: %w", err) + } + return out.Response, nil +} diff --git a/ai-gateway/internal/app/query.go b/ai-gateway/internal/app/query.go new file mode 100644 index 0000000..c5ae649 --- /dev/null +++ b/ai-gateway/internal/app/query.go @@ -0,0 +1,204 @@ +package app + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "slices" + "strconv" + "strings" + + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/domain" + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/ports/driven" +) + +// QueryResult is the app-layer response mapped onto the gRPC API. +type QueryResult struct { + Reply string + Intent string + ActionTaken bool +} + +// QueryApp orchestrates one AI query request. +type QueryApp struct { + llm driven.LLMClient + ha driven.HAClient + cache *domain.LightCache + log *slog.Logger +} + +// NewQueryApp constructs the AI query application service. +func NewQueryApp(llm driven.LLMClient, ha driven.HAClient, cache *domain.LightCache, log *slog.Logger) *QueryApp { + return &QueryApp{llm: llm, ha: ha, cache: cache, log: log} +} + +// Query runs the full intent parsing and dispatch flow for one user request. +func (a *QueryApp) Query(ctx context.Context, text string) (QueryResult, error) { + lights, err := a.cache.Get(ctx) + if err != nil { + a.log.Error("light cache refresh failed", "err", err) + return QueryResult{ + Reply: "I couldn't reach Home Assistant right now.", + ActionTaken: false, + }, nil + } + + prompt := domain.BuildPrompt(text, promptLightLines(lights)) + raw, err := a.llm.Generate(ctx, prompt) + if err != nil { + return QueryResult{}, err + } + + var intent domain.Intent + if err := json.Unmarshal([]byte(raw), &intent); err != nil { + a.log.Warn("llm returned invalid json", "text", text, "raw_output", raw) + return QueryResult{ + Reply: "I didn't understand that.", + Intent: domain.IntentNone, + ActionTaken: false, + }, nil + } + + switch intent.Name { + case domain.IntentTurnOnLight: + entityID, ok := resolveLightEntity(intent.Entity, lights) + if !ok { + return QueryResult{Reply: "I couldn't find that light.", Intent: intent.Name}, nil + } + params, err := ParseLightParams(intent.Params) + if err != nil { + return QueryResult{ + Reply: "I couldn't understand the light settings.", + Intent: intent.Name, + ActionTaken: false, + }, nil + } + if err := a.ha.TurnOnLight(ctx, entityID, params); err != nil { + a.log.Error("turn on light failed", "entity_id", entityID, "err", err) + return QueryResult{ + Reply: "I couldn't reach Home Assistant right now.", + Intent: intent.Name, + ActionTaken: false, + }, nil + } + return QueryResult{ + Reply: fallbackReply(intent.Reply, fmt.Sprintf("Turned on `%s`.", displayLightName(entityID, lights))), + Intent: intent.Name, + ActionTaken: true, + }, nil + case domain.IntentTurnOffLight: + entityID, ok := resolveLightEntity(intent.Entity, lights) + if !ok { + return QueryResult{Reply: "I couldn't find that light.", Intent: intent.Name}, nil + } + if err := a.ha.TurnOffLight(ctx, entityID); err != nil { + a.log.Error("turn off light failed", "entity_id", entityID, "err", err) + return QueryResult{ + Reply: "I couldn't reach Home Assistant right now.", + Intent: intent.Name, + ActionTaken: false, + }, nil + } + return QueryResult{ + Reply: fallbackReply(intent.Reply, fmt.Sprintf("Turned off `%s`.", displayLightName(entityID, lights))), + Intent: intent.Name, + ActionTaken: true, + }, nil + case domain.IntentListLights: + return QueryResult{ + Reply: formatLightListReply(lights), + Intent: intent.Name, + ActionTaken: false, + }, nil + case domain.IntentNone: + fallthrough + default: + return QueryResult{ + Reply: fallbackReply(intent.Reply, "I didn't understand that."), + Intent: intent.Name, + ActionTaken: false, + }, nil + } +} + +func promptLightLines(lights []driven.Light) []string { + lines := make([]string, 0, len(lights)) + for _, light := range lights { + label := light.FriendlyName + if label == "" { + label = light.EntityID + } + lines = append(lines, fmt.Sprintf("- %s (%s) state=%s", label, light.EntityID, light.State)) + } + return lines +} + +func resolveLightEntity(value string, lights []driven.Light) (string, bool) { + needle := strings.TrimSpace(strings.ToLower(value)) + if needle == "" { + return "", false + } + idx := slices.IndexFunc(lights, func(light driven.Light) bool { + return strings.ToLower(light.EntityID) == needle || strings.ToLower(light.FriendlyName) == needle + }) + if idx != -1 { + return lights[idx].EntityID, true + } + idx = slices.IndexFunc(lights, func(light driven.Light) bool { + return strings.Contains(strings.ToLower(light.FriendlyName), needle) + }) + if idx != -1 { + return lights[idx].EntityID, true + } + return "", false +} + +func displayLightName(entityID string, lights []driven.Light) string { + idx := slices.IndexFunc(lights, func(light driven.Light) bool { return light.EntityID == entityID }) + if idx == -1 || lights[idx].FriendlyName == "" { + return entityID + } + return lights[idx].FriendlyName +} + +func fallbackReply(reply, fallback string) string { + if strings.TrimSpace(reply) == "" { + return fallback + } + return reply +} + +func formatLightListReply(lights []driven.Light) string { + if len(lights) == 0 { + return "No lights found." + } + lines := make([]string, 0, len(lights)+1) + lines = append(lines, "Known lights:") + for _, light := range lights { + label := light.FriendlyName + if label == "" { + label = light.EntityID + } + lines = append(lines, fmt.Sprintf("- %s (%s) [%s]", label, light.EntityID, light.State)) + } + return strings.Join(lines, "\n") +} + +// ParseLightParams converts string params into the protobuf-compatible values the HA adapter expects. +func ParseLightParams(params map[string]string) (map[string]string, error) { + if len(params) == 0 { + return map[string]string{}, nil + } + normalized := make(map[string]string, len(params)) + for key, value := range params { + switch key { + case "brightness", "brightness_pct", "color_temp", "color_temp_kelvin": + if _, err := strconv.ParseUint(value, 10, 32); err != nil { + return nil, fmt.Errorf("invalid %s value %q: %w", key, value, err) + } + } + normalized[key] = value + } + return normalized, nil +} diff --git a/ai-gateway/internal/app/query_test.go b/ai-gateway/internal/app/query_test.go new file mode 100644 index 0000000..f4312dc --- /dev/null +++ b/ai-gateway/internal/app/query_test.go @@ -0,0 +1,166 @@ +package app + +import ( + "context" + "errors" + "log/slog" + "reflect" + "testing" + "time" + + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/domain" + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/ports/driven" +) + +type fakeLLM struct { + generate func(context.Context, string) (string, error) +} + +func (f *fakeLLM) Generate(ctx context.Context, prompt string) (string, error) { + return f.generate(ctx, prompt) +} + +type fakeHA struct { + lights []driven.Light + listErr error + turnOnErr error + turnOffErr error + lastTurnOnID string + lastTurnOffID string + lastTurnParams map[string]string + listCalls int +} + +func (f *fakeHA) TurnOnLight(ctx context.Context, entity string, params map[string]string) error { + f.lastTurnOnID = entity + f.lastTurnParams = params + return f.turnOnErr +} + +func (f *fakeHA) TurnOffLight(ctx context.Context, entity string) error { + f.lastTurnOffID = entity + return f.turnOffErr +} + +func (f *fakeHA) ListLights(ctx context.Context) ([]driven.Light, error) { + f.listCalls++ + if f.listErr != nil { + return nil, f.listErr + } + return append([]driven.Light(nil), f.lights...), nil +} + +func TestQueryAppTurnOnLight(t *testing.T) { + ha := &fakeHA{lights: []driven.Light{{EntityID: "light.kitchen", FriendlyName: "Kitchen", State: "off"}}} + cache := domain.NewLightCache(time.Hour, ha.ListLights) + app := NewQueryApp(&fakeLLM{ + generate: func(ctx context.Context, prompt string) (string, error) { + return `{"intent":"turn_on_light","entity":"Kitchen","params":{"brightness":"80"},"reply":"Turning on Kitchen."}`, nil + }, + }, ha, cache, slog.Default()) + + got, err := app.Query(context.Background(), "turn on kitchen") + if err != nil { + t.Fatalf("Query() error = %v", err) + } + if got.Intent != domain.IntentTurnOnLight || !got.ActionTaken || got.Reply != "Turning on Kitchen." { + t.Fatalf("Query() = %+v", got) + } + if ha.lastTurnOnID != "light.kitchen" { + t.Fatalf("TurnOnLight entity = %q", ha.lastTurnOnID) + } + if !reflect.DeepEqual(ha.lastTurnParams, map[string]string{"brightness": "80"}) { + t.Fatalf("TurnOnLight params = %#v", ha.lastTurnParams) + } +} + +func TestQueryAppInvalidJSON(t *testing.T) { + ha := &fakeHA{lights: []driven.Light{{EntityID: "light.kitchen", FriendlyName: "Kitchen", State: "off"}}} + app := NewQueryApp(&fakeLLM{ + generate: func(ctx context.Context, prompt string) (string, error) { + return `not-json`, nil + }, + }, ha, domain.NewLightCache(time.Hour, ha.ListLights), slog.Default()) + + got, err := app.Query(context.Background(), "turn on kitchen") + if err != nil { + t.Fatalf("Query() error = %v", err) + } + if got.Reply != "I didn't understand that." || got.ActionTaken { + t.Fatalf("Query() = %+v", got) + } + if ha.lastTurnOnID != "" { + t.Fatalf("expected no HA call, got %q", ha.lastTurnOnID) + } +} + +func TestQueryAppIntentNone(t *testing.T) { + ha := &fakeHA{lights: []driven.Light{{EntityID: "light.kitchen", FriendlyName: "Kitchen", State: "off"}}} + app := NewQueryApp(&fakeLLM{ + generate: func(ctx context.Context, prompt string) (string, error) { + return `{"intent":"none","entity":"","params":{},"reply":"Hello there."}`, nil + }, + }, ha, domain.NewLightCache(time.Hour, ha.ListLights), slog.Default()) + + got, err := app.Query(context.Background(), "hello") + if err != nil { + t.Fatalf("Query() error = %v", err) + } + if got.Reply != "Hello there." || got.ActionTaken { + t.Fatalf("Query() = %+v", got) + } +} + +func TestQueryAppHAFailure(t *testing.T) { + ha := &fakeHA{ + lights: []driven.Light{{EntityID: "light.kitchen", FriendlyName: "Kitchen", State: "off"}}, + turnOnErr: errors.New("boom"), + } + app := NewQueryApp(&fakeLLM{ + generate: func(ctx context.Context, prompt string) (string, error) { + return `{"intent":"turn_on_light","entity":"light.kitchen","params":{},"reply":"Turning on Kitchen."}`, nil + }, + }, ha, domain.NewLightCache(time.Hour, ha.ListLights), slog.Default()) + + got, err := app.Query(context.Background(), "turn on kitchen") + if err != nil { + t.Fatalf("Query() error = %v", err) + } + if got.Reply != "I couldn't reach Home Assistant right now." || got.ActionTaken { + t.Fatalf("Query() = %+v", got) + } +} + +func TestQueryAppListLights(t *testing.T) { + ha := &fakeHA{lights: []driven.Light{{EntityID: "light.kitchen", FriendlyName: "Kitchen", State: "on"}}} + app := NewQueryApp(&fakeLLM{ + generate: func(ctx context.Context, prompt string) (string, error) { + return `{"intent":"list_lights","entity":"","params":{},"reply":""}`, nil + }, + }, ha, domain.NewLightCache(time.Hour, ha.ListLights), slog.Default()) + + got, err := app.Query(context.Background(), "what lights exist") + if err != nil { + t.Fatalf("Query() error = %v", err) + } + want := "Known lights:\n- Kitchen (light.kitchen) [on]" + if got.Reply != want || got.ActionTaken { + t.Fatalf("Query() = %+v", got) + } +} + +func TestLightCacheRefreshAfterTTL(t *testing.T) { + ha := &fakeHA{lights: []driven.Light{{EntityID: "light.kitchen", FriendlyName: "Kitchen", State: "off"}}} + cache := domain.NewLightCache(10*time.Millisecond, ha.ListLights) + + if _, err := cache.Get(context.Background()); err != nil { + t.Fatalf("Get() error = %v", err) + } + time.Sleep(20 * time.Millisecond) + if _, err := cache.Get(context.Background()); err != nil { + t.Fatalf("Get() error = %v", err) + } + if ha.listCalls < 2 { + t.Fatalf("ListLights calls = %d, want at least 2", ha.listCalls) + } +} diff --git a/ai-gateway/internal/config/config.go b/ai-gateway/internal/config/config.go new file mode 100644 index 0000000..226b737 --- /dev/null +++ b/ai-gateway/internal/config/config.go @@ -0,0 +1,89 @@ +package config + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "time" +) + +// Config holds runtime configuration for the AI gRPC gateway. +type Config struct { + GRPCPort string + OllamaURL string + OllamaModel string + OllamaTimeout time.Duration + HAGatewayAddr string + HAGatewayServerName string + TLSDir string + OTELEndpoint string + LogLevel string + LogFormat string + LightCacheTTL time.Duration +} + +// Load reads configuration from environment variables and applies defaults. +func Load() (*Config, error) { + ollamaTimeout, err := parseDurationEnv("OLLAMA_TIMEOUT", 30*time.Second) + if err != nil { + return nil, err + } + cacheTTL, err := parseDurationEnv("LIGHT_CACHE_TTL", 60*time.Second) + if err != nil { + return nil, err + } + + cfg := &Config{ + GRPCPort: getenvDefault("GRPC_PORT", "50052"), + OllamaURL: getenvDefault("OLLAMA_URL", "http://192.168.7.96:11434"), + OllamaModel: getenvDefault("OLLAMA_MODEL", "llama3"), + OllamaTimeout: ollamaTimeout, + HAGatewayAddr: getenvDefault("HA_GATEWAY_ADDR", "ha-gateway.home-services.svc.cluster.local:50051"), + HAGatewayServerName: getenvDefault("HA_GATEWAY_SERVER_NAME", "ha-gateway.home-services.svc.cluster.local"), + TLSDir: os.Getenv("TLS_DIR"), + OTELEndpoint: os.Getenv("OTEL_ENDPOINT"), + LogLevel: getenvDefault("LOG_LEVEL", "info"), + LogFormat: getenvDefault("LOG_FORMAT", "json"), + LightCacheTTL: cacheTTL, + } + if cfg.TLSDir != "" { + if err := validateTLSDir(cfg.TLSDir); err != nil { + return nil, err + } + } + return cfg, nil +} + +func getenvDefault(key, fallback string) string { + if v := os.Getenv(key); v != "" { + return v + } + return fallback +} + +func parseDurationEnv(key string, fallback time.Duration) (time.Duration, error) { + if v := os.Getenv(key); v != "" { + d, err := time.ParseDuration(v) + if err != nil { + return 0, fmt.Errorf("parse %s: %w", key, err) + } + return d, nil + } + return fallback, nil +} + +func validateTLSDir(dir string) error { + required := []string{"tls.crt", "tls.key", "ca.crt"} + for _, name := range required { + path := filepath.Join(dir, name) + info, err := os.Stat(path) + if err != nil { + return fmt.Errorf("tls dir validation failed for %s: %w", path, err) + } + if info.IsDir() { + return fmt.Errorf("tls dir validation failed for %s: %w", path, errors.New("expected file")) + } + } + return nil +} diff --git a/ai-gateway/internal/core/domain/intent.go b/ai-gateway/internal/core/domain/intent.go new file mode 100644 index 0000000..660b2ba --- /dev/null +++ b/ai-gateway/internal/core/domain/intent.go @@ -0,0 +1,16 @@ +package domain + +// Intent describes the JSON contract expected from the LLM. +type Intent struct { + Name string `json:"intent"` + Entity string `json:"entity"` + Params map[string]string `json:"params"` + Reply string `json:"reply"` +} + +const ( + IntentNone = "none" + IntentTurnOnLight = "turn_on_light" + IntentTurnOffLight = "turn_off_light" + IntentListLights = "list_lights" +) diff --git a/ai-gateway/internal/core/domain/light_cache.go b/ai-gateway/internal/core/domain/light_cache.go new file mode 100644 index 0000000..0478418 --- /dev/null +++ b/ai-gateway/internal/core/domain/light_cache.go @@ -0,0 +1,49 @@ +package domain + +import ( + "context" + "sync" + "time" + + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/core/ports/driven" +) + +// LightCache keeps recently-fetched light discovery data in memory. +type LightCache struct { + ttl time.Duration + loadFunc func(context.Context) ([]driven.Light, error) + + mu sync.RWMutex + lights []driven.Light + loadedAt time.Time +} + +// NewLightCache constructs a TTL-based in-memory light cache. +func NewLightCache(ttl time.Duration, loadFunc func(context.Context) ([]driven.Light, error)) *LightCache { + return &LightCache{ttl: ttl, loadFunc: loadFunc} +} + +// Get returns the cached lights, refreshing lazily when empty or stale. +func (c *LightCache) Get(ctx context.Context) ([]driven.Light, error) { + c.mu.RLock() + if len(c.lights) > 0 && time.Since(c.loadedAt) < c.ttl { + lights := append([]driven.Light(nil), c.lights...) + c.mu.RUnlock() + return lights, nil + } + c.mu.RUnlock() + + c.mu.Lock() + defer c.mu.Unlock() + if len(c.lights) > 0 && time.Since(c.loadedAt) < c.ttl { + return append([]driven.Light(nil), c.lights...), nil + } + + lights, err := c.loadFunc(ctx) + if err != nil { + return nil, err + } + c.lights = append([]driven.Light(nil), lights...) + c.loadedAt = time.Now() + return append([]driven.Light(nil), c.lights...), nil +} diff --git a/ai-gateway/internal/core/domain/prompt.go b/ai-gateway/internal/core/domain/prompt.go new file mode 100644 index 0000000..9123d34 --- /dev/null +++ b/ai-gateway/internal/core/domain/prompt.go @@ -0,0 +1,33 @@ +package domain + +import ( + "fmt" + "strings" +) + +const systemPrompt = `You are a home automation assistant. Respond with a single JSON object and nothing else. + +Schema: +{ + "intent": "turn_on_light" | "turn_off_light" | "list_lights" | "none", + "entity": "", + "params": { "": "" }, + "reply": "" +} + +Rules: +- Always include all four fields. +- Return only valid JSON. No markdown, no code fences, no extra prose. +- Use "none" for non-actionable requests. +- If the user asks what lights exist, use "list_lights". +- Prefer returning the exact entity_id when it is obvious from the known light list. +- TODO: switch discovery is intentionally omitted until ha-gateway switch support is ready.` + +// BuildPrompt combines the system instructions, known lights, and the user request. +func BuildPrompt(userText string, knownLights []string) string { + lightLines := "- none known" + if len(knownLights) > 0 { + lightLines = strings.Join(knownLights, "\n") + } + return fmt.Sprintf("%s\n\nKnown lights:\n%s\n\nUser request: %s", systemPrompt, lightLines, userText) +} diff --git a/ai-gateway/internal/core/ports/driven/ha.go b/ai-gateway/internal/core/ports/driven/ha.go new file mode 100644 index 0000000..0d2501f --- /dev/null +++ b/ai-gateway/internal/core/ports/driven/ha.go @@ -0,0 +1,17 @@ +package driven + +import "context" + +// HAClient exposes the Home Assistant functions ai-gateway needs. +type HAClient interface { + TurnOnLight(ctx context.Context, entity string, params map[string]string) error + TurnOffLight(ctx context.Context, entity string) error + ListLights(ctx context.Context) ([]Light, error) +} + +// Light is the discovery view ai-gateway uses to build prompts and replies. +type Light struct { + EntityID string + FriendlyName string + State string +} diff --git a/ai-gateway/internal/core/ports/driven/llm.go b/ai-gateway/internal/core/ports/driven/llm.go new file mode 100644 index 0000000..4a5ad06 --- /dev/null +++ b/ai-gateway/internal/core/ports/driven/llm.go @@ -0,0 +1,8 @@ +package driven + +import "context" + +// LLMClient generates one model response for a prompt. +type LLMClient interface { + Generate(ctx context.Context, prompt string) (string, error) +} diff --git a/ai-gateway/internal/logger/logger.go b/ai-gateway/internal/logger/logger.go new file mode 100644 index 0000000..498bf86 --- /dev/null +++ b/ai-gateway/internal/logger/logger.go @@ -0,0 +1,38 @@ +package logger + +import ( + "context" + "fmt" + "log/slog" + "os" +) + +type contextKey struct{} + +// New constructs a root logger for the configured format and level. +func New(format, level string) *slog.Logger { + var parsed slog.Level + if err := parsed.UnmarshalText([]byte(level)); err != nil { + _, _ = fmt.Fprintf(os.Stderr, "invalid log level %q, falling back to info\n", level) + parsed = slog.LevelInfo + } + + opts := &slog.HandlerOptions{Level: parsed} + if format == "json" { + return slog.New(slog.NewJSONHandler(os.Stdout, opts)) + } + return slog.New(slog.NewTextHandler(os.Stdout, opts)) +} + +// WithLogger attaches a logger to the provided context. +func WithLogger(ctx context.Context, l *slog.Logger) context.Context { + return context.WithValue(ctx, contextKey{}, l) +} + +// FromContext retrieves a logger from context and falls back to slog.Default(). +func FromContext(ctx context.Context) *slog.Logger { + if l, ok := ctx.Value(contextKey{}).(*slog.Logger); ok && l != nil { + return l + } + return slog.Default() +} diff --git a/ai-gateway/internal/telemetry/telemetry.go b/ai-gateway/internal/telemetry/telemetry.go new file mode 100644 index 0000000..7a64ed2 --- /dev/null +++ b/ai-gateway/internal/telemetry/telemetry.go @@ -0,0 +1,82 @@ +package telemetry + +import ( + "context" + "errors" + "time" + + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/config" + "gitea.nik4nao.com/nik/home-services/ai-gateway/internal/logger" + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" + "go.opentelemetry.io/otel/metric/noop" + "go.opentelemetry.io/otel/propagation" + sdkmetric "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/sdk/resource" + sdktrace "go.opentelemetry.io/otel/sdk/trace" + semconv "go.opentelemetry.io/otel/semconv/v1.26.0" + tracenoop "go.opentelemetry.io/otel/trace/noop" +) + +// Setup initialises OTel trace and metric providers for one service. +func Setup(ctx context.Context, serviceName, version string, cfg *config.Config) (shutdown func(context.Context) error, err error) { + if cfg.OTELEndpoint == "" { + otel.SetTracerProvider(tracenoop.NewTracerProvider()) + otel.SetMeterProvider(noop.NewMeterProvider()) + logger.FromContext(ctx).Debug("otel disabled", "reason", "OTEL_ENDPOINT not set") + return func(context.Context) error { return nil }, nil + } + + res := resource.NewWithAttributes( + semconv.SchemaURL, + semconv.ServiceNameKey.String(serviceName), + semconv.ServiceVersionKey.String(version), + ) + + traceExp, err := otlptracegrpc.New(ctx, + otlptracegrpc.WithEndpoint(cfg.OTELEndpoint), + otlptracegrpc.WithInsecure(), + ) + if err != nil { + return nil, err + } + tp := sdktrace.NewTracerProvider( + sdktrace.WithBatcher(traceExp), + sdktrace.WithResource(res), + sdktrace.WithSampler(sdktrace.AlwaysSample()), + ) + otel.SetTracerProvider(tp) + otel.SetTextMapPropagator(propagation.NewCompositeTextMapPropagator( + propagation.TraceContext{}, + propagation.Baggage{}, + )) + + metricExp, err := otlpmetricgrpc.New(ctx, + otlpmetricgrpc.WithEndpoint(cfg.OTELEndpoint), + otlpmetricgrpc.WithInsecure(), + ) + if err != nil { + _ = tp.Shutdown(ctx) + return nil, err + } + mp := sdkmetric.NewMeterProvider( + sdkmetric.WithReader(sdkmetric.NewPeriodicReader(metricExp, + sdkmetric.WithInterval(30*time.Second))), + sdkmetric.WithResource(res), + ) + otel.SetMeterProvider(mp) + + return func(ctx context.Context) error { + shutdownCtx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + var shutdownErr error + if err := tp.Shutdown(shutdownCtx); err != nil { + shutdownErr = errors.Join(shutdownErr, err) + } + if err := mp.Shutdown(shutdownCtx); err != nil { + shutdownErr = errors.Join(shutdownErr, err) + } + return shutdownErr + }, nil +} diff --git a/gen/ai/v1/ai.pb.go b/gen/ai/v1/ai.pb.go new file mode 100644 index 0000000..7fdadf4 --- /dev/null +++ b/gen/ai/v1/ai.pb.go @@ -0,0 +1,200 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.36.11 +// protoc (unknown) +// source: ai/v1/ai.proto + +package aiv1 + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" + unsafe "unsafe" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type QueryRequest struct { + state protoimpl.MessageState `protogen:"open.v1"` + Text string `protobuf:"bytes,1,opt,name=text,proto3" json:"text,omitempty"` + Source string `protobuf:"bytes,2,opt,name=source,proto3" json:"source,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *QueryRequest) Reset() { + *x = QueryRequest{} + mi := &file_ai_v1_ai_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *QueryRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*QueryRequest) ProtoMessage() {} + +func (x *QueryRequest) ProtoReflect() protoreflect.Message { + mi := &file_ai_v1_ai_proto_msgTypes[0] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use QueryRequest.ProtoReflect.Descriptor instead. +func (*QueryRequest) Descriptor() ([]byte, []int) { + return file_ai_v1_ai_proto_rawDescGZIP(), []int{0} +} + +func (x *QueryRequest) GetText() string { + if x != nil { + return x.Text + } + return "" +} + +func (x *QueryRequest) GetSource() string { + if x != nil { + return x.Source + } + return "" +} + +type QueryResponse struct { + state protoimpl.MessageState `protogen:"open.v1"` + Reply string `protobuf:"bytes,1,opt,name=reply,proto3" json:"reply,omitempty"` + Intent string `protobuf:"bytes,2,opt,name=intent,proto3" json:"intent,omitempty"` + ActionTaken bool `protobuf:"varint,3,opt,name=action_taken,json=actionTaken,proto3" json:"action_taken,omitempty"` + unknownFields protoimpl.UnknownFields + sizeCache protoimpl.SizeCache +} + +func (x *QueryResponse) Reset() { + *x = QueryResponse{} + mi := &file_ai_v1_ai_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) +} + +func (x *QueryResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*QueryResponse) ProtoMessage() {} + +func (x *QueryResponse) ProtoReflect() protoreflect.Message { + mi := &file_ai_v1_ai_proto_msgTypes[1] + if x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use QueryResponse.ProtoReflect.Descriptor instead. +func (*QueryResponse) Descriptor() ([]byte, []int) { + return file_ai_v1_ai_proto_rawDescGZIP(), []int{1} +} + +func (x *QueryResponse) GetReply() string { + if x != nil { + return x.Reply + } + return "" +} + +func (x *QueryResponse) GetIntent() string { + if x != nil { + return x.Intent + } + return "" +} + +func (x *QueryResponse) GetActionTaken() bool { + if x != nil { + return x.ActionTaken + } + return false +} + +var File_ai_v1_ai_proto protoreflect.FileDescriptor + +const file_ai_v1_ai_proto_rawDesc = "" + + "\n" + + "\x0eai/v1/ai.proto\x12\x05ai.v1\":\n" + + "\fQueryRequest\x12\x12\n" + + "\x04text\x18\x01 \x01(\tR\x04text\x12\x16\n" + + "\x06source\x18\x02 \x01(\tR\x06source\"`\n" + + "\rQueryResponse\x12\x14\n" + + "\x05reply\x18\x01 \x01(\tR\x05reply\x12\x16\n" + + "\x06intent\x18\x02 \x01(\tR\x06intent\x12!\n" + + "\faction_taken\x18\x03 \x01(\bR\vactionTaken2?\n" + + "\tAIService\x122\n" + + "\x05Query\x12\x13.ai.v1.QueryRequest\x1a\x14.ai.v1.QueryResponseB4Z2gitea.nik4nao.com/nik/home-services/gen/ai/v1;aiv1b\x06proto3" + +var ( + file_ai_v1_ai_proto_rawDescOnce sync.Once + file_ai_v1_ai_proto_rawDescData []byte +) + +func file_ai_v1_ai_proto_rawDescGZIP() []byte { + file_ai_v1_ai_proto_rawDescOnce.Do(func() { + file_ai_v1_ai_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_ai_v1_ai_proto_rawDesc), len(file_ai_v1_ai_proto_rawDesc))) + }) + return file_ai_v1_ai_proto_rawDescData +} + +var file_ai_v1_ai_proto_msgTypes = make([]protoimpl.MessageInfo, 2) +var file_ai_v1_ai_proto_goTypes = []any{ + (*QueryRequest)(nil), // 0: ai.v1.QueryRequest + (*QueryResponse)(nil), // 1: ai.v1.QueryResponse +} +var file_ai_v1_ai_proto_depIdxs = []int32{ + 0, // 0: ai.v1.AIService.Query:input_type -> ai.v1.QueryRequest + 1, // 1: ai.v1.AIService.Query:output_type -> ai.v1.QueryResponse + 1, // [1:2] is the sub-list for method output_type + 0, // [0:1] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_ai_v1_ai_proto_init() } +func file_ai_v1_ai_proto_init() { + if File_ai_v1_ai_proto != nil { + return + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: unsafe.Slice(unsafe.StringData(file_ai_v1_ai_proto_rawDesc), len(file_ai_v1_ai_proto_rawDesc)), + NumEnums: 0, + NumMessages: 2, + NumExtensions: 0, + NumServices: 1, + }, + GoTypes: file_ai_v1_ai_proto_goTypes, + DependencyIndexes: file_ai_v1_ai_proto_depIdxs, + MessageInfos: file_ai_v1_ai_proto_msgTypes, + }.Build() + File_ai_v1_ai_proto = out.File + file_ai_v1_ai_proto_goTypes = nil + file_ai_v1_ai_proto_depIdxs = nil +} diff --git a/gen/ai/v1/ai_grpc.pb.go b/gen/ai/v1/ai_grpc.pb.go new file mode 100644 index 0000000..0ca5f82 --- /dev/null +++ b/gen/ai/v1/ai_grpc.pb.go @@ -0,0 +1,121 @@ +// Code generated by protoc-gen-go-grpc. DO NOT EDIT. +// versions: +// - protoc-gen-go-grpc v1.6.1 +// - protoc (unknown) +// source: ai/v1/ai.proto + +package aiv1 + +import ( + context "context" + grpc "google.golang.org/grpc" + codes "google.golang.org/grpc/codes" + status "google.golang.org/grpc/status" +) + +// This is a compile-time assertion to ensure that this generated file +// is compatible with the grpc package it is being compiled against. +// Requires gRPC-Go v1.64.0 or later. +const _ = grpc.SupportPackageIsVersion9 + +const ( + AIService_Query_FullMethodName = "/ai.v1.AIService/Query" +) + +// AIServiceClient is the client API for AIService service. +// +// For semantics around ctx use and closing/ending streaming RPCs, please refer to https://pkg.go.dev/google.golang.org/grpc/?tab=doc#ClientConn.NewStream. +type AIServiceClient interface { + Query(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (*QueryResponse, error) +} + +type aIServiceClient struct { + cc grpc.ClientConnInterface +} + +func NewAIServiceClient(cc grpc.ClientConnInterface) AIServiceClient { + return &aIServiceClient{cc} +} + +func (c *aIServiceClient) Query(ctx context.Context, in *QueryRequest, opts ...grpc.CallOption) (*QueryResponse, error) { + cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...) + out := new(QueryResponse) + err := c.cc.Invoke(ctx, AIService_Query_FullMethodName, in, out, cOpts...) + if err != nil { + return nil, err + } + return out, nil +} + +// AIServiceServer is the server API for AIService service. +// All implementations must embed UnimplementedAIServiceServer +// for forward compatibility. +type AIServiceServer interface { + Query(context.Context, *QueryRequest) (*QueryResponse, error) + mustEmbedUnimplementedAIServiceServer() +} + +// UnimplementedAIServiceServer must be embedded to have +// forward compatible implementations. +// +// NOTE: this should be embedded by value instead of pointer to avoid a nil +// pointer dereference when methods are called. +type UnimplementedAIServiceServer struct{} + +func (UnimplementedAIServiceServer) Query(context.Context, *QueryRequest) (*QueryResponse, error) { + return nil, status.Error(codes.Unimplemented, "method Query not implemented") +} +func (UnimplementedAIServiceServer) mustEmbedUnimplementedAIServiceServer() {} +func (UnimplementedAIServiceServer) testEmbeddedByValue() {} + +// UnsafeAIServiceServer may be embedded to opt out of forward compatibility for this service. +// Use of this interface is not recommended, as added methods to AIServiceServer will +// result in compilation errors. +type UnsafeAIServiceServer interface { + mustEmbedUnimplementedAIServiceServer() +} + +func RegisterAIServiceServer(s grpc.ServiceRegistrar, srv AIServiceServer) { + // If the following call panics, it indicates UnimplementedAIServiceServer was + // embedded by pointer and is nil. This will cause panics if an + // unimplemented method is ever invoked, so we test this at initialization + // time to prevent it from happening at runtime later due to I/O. + if t, ok := srv.(interface{ testEmbeddedByValue() }); ok { + t.testEmbeddedByValue() + } + s.RegisterService(&AIService_ServiceDesc, srv) +} + +func _AIService_Query_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) { + in := new(QueryRequest) + if err := dec(in); err != nil { + return nil, err + } + if interceptor == nil { + return srv.(AIServiceServer).Query(ctx, in) + } + info := &grpc.UnaryServerInfo{ + Server: srv, + FullMethod: AIService_Query_FullMethodName, + } + handler := func(ctx context.Context, req interface{}) (interface{}, error) { + return srv.(AIServiceServer).Query(ctx, req.(*QueryRequest)) + } + return interceptor(ctx, in, info, handler) +} + +// AIService_ServiceDesc is the grpc.ServiceDesc for AIService service. +// It's only intended for direct use with grpc.RegisterService, +// and not to be introspected or modified (even as a copy) +var AIService_ServiceDesc = grpc.ServiceDesc{ + ServiceName: "ai.v1.AIService", + HandlerType: (*AIServiceServer)(nil), + Methods: []grpc.MethodDesc{ + { + MethodName: "Query", + Handler: _AIService_Query_Handler, + }, + }, + Streams: []grpc.StreamDesc{}, + Metadata: "ai/v1/ai.proto", +} diff --git a/go.work b/go.work index 426ea0c..3153dd7 100644 --- a/go.work +++ b/go.work @@ -1,6 +1,7 @@ go 1.26 use ( + ./ai-gateway ./discord-bot ./gen ./ha-gateway diff --git a/go.work.sum b/go.work.sum index 83791ff..68fba69 100644 --- a/go.work.sum +++ b/go.work.sum @@ -5,8 +5,6 @@ cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCB github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.25.0/go.mod h1:obipzmGjfSjam60XLwGfqUkJsfiheAl+TUjG+4yzyPM= github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.30.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= -github.com/bwmarrin/discordgo v0.29.0 h1:FmWeXFaKUwrcL3Cx65c20bTRW+vOb6k8AnaP+EgjDno= -github.com/bwmarrin/discordgo v0.29.0/go.mod h1:NJZpH+1AfhIcyQsPeuBKsUtYrRnjkyu0kIVMCHkZtRY= github.com/cncf/xds/go v0.0.0-20241223141626-cff3c89139a3/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= @@ -18,6 +16,7 @@ github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9O github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4= github.com/envoyproxy/protoc-gen-validate v1.2.1/go.mod h1:d/C80l/jxXLdfEIhX1W2TmLfsJ31lvEjwamM4DxlWXU= github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/go-jose/go-jose/v4 v4.1.3/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -25,8 +24,6 @@ github.com/golang/glog v1.2.4/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwm github.com/golang/glog v1.2.5/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= -github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= @@ -39,6 +36,7 @@ go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJyS go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= go.opentelemetry.io/contrib/detectors/gcp v1.34.0/go.mod h1:cV4BMFcscUR/ckqLkbfQmF0PRsq8w/lMGzdbCSveBHo= go.opentelemetry.io/contrib/detectors/gcp v1.39.0/go.mod h1:t/OGqzHBa5v6RHZwrDBJ2OirWc+4q/w2fTbLZwAKjTk= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.60.0/go.mod h1:69uWxva0WgAA/4bu2Yy70SLDBwZXuQ6PbBpbsa5iZrQ= go.opentelemetry.io/otel v1.35.0 h1:xKWKPxrxB6OtMCbmMY021CqC45J+3Onta9MqjhnusiQ= go.opentelemetry.io/otel v1.35.0/go.mod h1:UEqy8Zp11hpkUrL73gSlELM0DupHoiq72dR+Zqel/+Y= go.opentelemetry.io/otel/metric v1.35.0 h1:0znxYu2SNyuMSQT4Y9WDWej0VpcsxkuklLa4/siN90M= @@ -51,26 +49,18 @@ go.opentelemetry.io/otel/trace v1.35.0 h1:dPpEfJu1sDIqruz7BHFG3c7528f6ddfSWfFDVt go.opentelemetry.io/otel/trace v1.35.0/go.mod h1:WUk7DtFp1Aw2MkvqGdwiXYDZZNvA/1J8o6xRXLrIkyc= go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4= go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE= -golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= -golang.org/x/crypto v0.46.0 h1:cKRW/pmt1pKAfetfu+RCEvjvZkA9RimPbh7bhFjGVBU= -golang.org/x/crypto v0.46.0/go.mod h1:Evb/oLKmMraqjZ2iQTwDwvCtJkczlDuTmdJXoZVzqU0= golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8= golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk= golang.org/x/oauth2 v0.25.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/oauth2 v0.34.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.38.0/go.mod h1:bSEAKrOT1W+VSu9TSCMtoGEOUcKxOKgl3LE5QEF/xVg= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/genproto/googleapis/api v0.0.0-20250218202821-56aae31c358a h1:nwKuGPlUAt+aR+pcrkfFRrTU1BVrSmYyYMxYbUIVHr0= diff --git a/plan.md b/plan.md new file mode 100644 index 0000000..58bee62 --- /dev/null +++ b/plan.md @@ -0,0 +1,586 @@ +# ai-gateway — Implementation Plan + +This plan describes the implementation of a new Go microservice, `ai-gateway`, in the `home-services` monorepo (`gitea.nik4nao.com/nik/home-services`). It centralizes all AI/LLM logic behind a gRPC API so callers (`discord-bot`, `alexa-bridge`) remain thin transport adapters with zero AI knowledge. + +--- + +## 1. Goals & Non-Goals + +### Goals +- New gRPC service `ai-gateway` listening on `:50052`. +- Owns **all** AI logic: Ollama connection, prompt construction, LLM intent parsing, dispatch to `ha-gateway`. +- Callers send raw user text via `QueryRequest`; receive a human-readable reply in `QueryResponse`. +- mTLS client authentication when calling `ha-gateway` (ha-gateway requires mTLS). +- Hexagonal architecture, matching the existing `ha-gateway` layout. +- Structured logging via `slog`, OTel OTLP gRPC traces/metrics. +- Deployed to the `home-services` namespace on K3s. + +### Non-Goals +- No auth on `ai-gateway`'s own inbound gRPC surface in this iteration (in-cluster only; match current `ha-gateway` posture). +- No streaming responses — unary only. +- No conversation memory — each `Query` is stateless. +- No new Home Assistant features beyond what `ha-gateway` already exposes (LightService + EntityService). + +--- + +## 2. Repository Layout + +All paths are relative to the `home-services` repo root. + +``` +proto/ + ai/v1/ai.proto # NEW + +gen/ + ai/v1/ # NEW (generated; committed) + ai.pb.go + ai_grpc.pb.go + +services/ + ai-gateway/ # NEW + go.mod + cmd/ + ai-gateway/ + main.go + config/ + config.go + domain/ + prompt.go + service.go + intent.go + adapters/ + inbound/ + grpc/ + server.go + outbound/ + ollama/ + client.go + hagateway/ + client.go + internal/ + observability/ + logging.go + otel.go + Dockerfile + .dockerignore + discord-bot/ # MODIFIED + adapters/outbound/aigateway/client.go # NEW + (remove any direct Ollama code if present) +``` + +Also update: +- `go.work` — add `./services/ai-gateway` and keep `replace` directive to `../gen`. +- `buf.gen.yaml` / `buf.yaml` — include the new `ai/v1` proto package. + +--- + +## 3. Proto Definition + +### File: `proto/ai/v1/ai.proto` + +```proto +syntax = "proto3"; + +package ai.v1; + +option go_package = "gitea.nik4nao.com/nik/home-services/gen/ai/v1;aiv1"; + +// AIService accepts free-form natural language queries and returns a +// human-readable reply. It encapsulates LLM prompting, intent parsing, +// and dispatch to downstream services (e.g. ha-gateway). +service AIService { + rpc Query(QueryRequest) returns (QueryResponse); +} + +message QueryRequest { + // Raw user text, e.g. "turn on the living room light". + string text = 1; + + // Optional caller identifier for logging/tracing (e.g. "discord-bot"). + string source = 2; +} + +message QueryResponse { + // Human-readable reply to show the user. + string reply = 1; + + // Parsed intent name, if any. Empty if no actionable intent was detected. + string intent = 2; + + // True if an action was dispatched to a downstream service. + bool action_taken = 3; +} +``` + +### Generation +- Run `buf generate` from repo root. +- Commit `gen/ai/v1/*.pb.go` and `gen/ai/v1/*_grpc.pb.go` (per existing convention — `gen/` is committed to avoid CI codegen dependency). + +--- + +## 4. Configuration (`services/ai-gateway/config/config.go`) + +Load from environment. Use `os.Getenv` with defaults (matches existing ha-gateway style — no new dep). + +| Env Var | Default | Purpose | +| ----------------------------- | ----------------------------------------------------- | ------------------------------------------------ | +| `GRPC_LISTEN_ADDR` | `:50052` | Inbound gRPC bind address | +| `OLLAMA_URL` | `http://192.168.7.96:11434` | Ollama HTTP API (direct LAN IP; no K8s Service) | +| `OLLAMA_MODEL` | `llama3` | Model name | +| `OLLAMA_TIMEOUT` | `30s` | HTTP timeout for Ollama calls | +| `HA_GATEWAY_ADDR` | `ha-gateway.home-services.svc.cluster.local:50051` | ha-gateway gRPC endpoint | +| `HA_GATEWAY_TLS_CA_FILE` | `/etc/ai-gateway/tls/ca.crt` | CA cert that signed ha-gateway's server cert | +| `HA_GATEWAY_TLS_CERT_FILE` | `/etc/ai-gateway/tls/tls.crt` | ai-gateway's client cert (for mTLS) | +| `HA_GATEWAY_TLS_KEY_FILE` | `/etc/ai-gateway/tls/tls.key` | ai-gateway's client key | +| `HA_GATEWAY_SERVER_NAME` | `ha-gateway.home-services.svc.cluster.local` | SNI / cert verification name | +| `LOG_LEVEL` | `info` | `debug`/`info`/`warn`/`error` | +| `LOG_FORMAT` | `json` | `json` or `text` | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | `otel-collector-opentelemetry-collector.monitoring.svc.cluster.local:4317` | OTLP gRPC endpoint | +| `OTEL_SERVICE_NAME` | `ai-gateway` | Service name for traces/metrics | + +Provide a `Config` struct with a `Load()` function returning `(Config, error)`. Validate required files exist at startup. + +--- + +## 5. Domain Layer + +### `domain/intent.go` + +Define the intent contract the LLM must produce: + +```go +package domain + +type Intent struct { + Name string `json:"intent"` // e.g. "turn_on_light", "turn_off_light", "none" + Entity string `json:"entity"` // e.g. "living_room" (friendly name or entity_id) + Params map[string]string `json:"params"` // optional, e.g. {"brightness":"80"} + Reply string `json:"reply"` // what to say back to the user +} + +const ( + IntentNone = "none" + IntentTurnOnLight = "turn_on_light" + IntentTurnOffLight = "turn_off_light" + IntentListEntities = "list_entities" +) +``` + +### `domain/prompt.go` + +Build the Ollama prompt. The system prompt MUST instruct the model to return **only** a single JSON object matching the `Intent` schema. No markdown fences, no prose. + +```go +package domain + +import "fmt" + +const systemPrompt = `You are a home automation assistant. Given a user request, respond with a single JSON object and nothing else — no markdown, no code fences, no explanation. + +Schema: +{ + "intent": "turn_on_light" | "turn_off_light" | "list_entities" | "none", + "entity": "", + "params": { "": "" }, + "reply": "" +} + +Rules: +- If the request is not actionable, use intent="none" and put the conversational answer in "reply". +- Always include all four fields. Use "" or {} for empty values. +- Do not wrap the JSON in backticks.` + +func BuildPrompt(userText string) string { + return fmt.Sprintf("%s\n\nUser: %s", systemPrompt, userText) +} +``` + +### `domain/service.go` + +The orchestrator. Depends on two ports (interfaces) defined here: + +```go +package domain + +import "context" + +type LLMClient interface { + Generate(ctx context.Context, prompt string) (string, error) +} + +type HAClient interface { + TurnOnLight(ctx context.Context, entity string, params map[string]string) error + TurnOffLight(ctx context.Context, entity string) error + ListEntities(ctx context.Context) ([]string, error) +} + +type Service struct { + llm LLMClient + ha HAClient + log *slog.Logger +} + +func NewService(llm LLMClient, ha HAClient, log *slog.Logger) *Service { /* ... */ } + +type QueryResult struct { + Reply string + Intent string + ActionTaken bool +} + +func (s *Service) Query(ctx context.Context, text string) (QueryResult, error) { + // 1. BuildPrompt(text) + // 2. s.llm.Generate(ctx, prompt) + // 3. json.Unmarshal into Intent + // - On unmarshal error: log at warn, return reply = "I didn't understand that." + // 4. switch intent.Name: + // turn_on_light -> s.ha.TurnOnLight(...) + // turn_off_light -> s.ha.TurnOffLight(...) + // list_entities -> s.ha.ListEntities(...); format into reply + // none / default -> reply = intent.Reply + // 5. Return QueryResult +} +``` + +**Error handling:** +- LLM call failure → return error; inbound adapter maps to gRPC `Unavailable`. +- JSON parse failure → do NOT error; return a friendly "I didn't understand" reply and log the raw LLM output at `warn` with the original text (not error). +- HA dispatch failure → log at `error`, return reply "I couldn't reach Home Assistant right now."; `ActionTaken=false`. + +--- + +## 6. Outbound Adapters + +### `adapters/outbound/ollama/client.go` + +- Plain `net/http.Client` with configured timeout. +- POST to `{OLLAMA_URL}/api/generate` with body: + ```json + { "model": "", "prompt": "", "stream": false } + ``` +- Decode JSON response, return the `response` field as a string. +- Implement `domain.LLMClient`. +- Wrap the HTTP client with OTel instrumentation (`otelhttp.NewTransport`). + +### `adapters/outbound/hagateway/client.go` + +This is the mTLS-critical piece. + +- Construct a `*grpc.ClientConn` to `HA_GATEWAY_ADDR` with TLS credentials built from the three cert files: + ```go + func loadTLSCredentials(caFile, certFile, keyFile, serverName string) (credentials.TransportCredentials, error) { + caPEM, err := os.ReadFile(caFile) + if err != nil { return nil, fmt.Errorf("read ca: %w", err) } + cp := x509.NewCertPool() + if !cp.AppendCertsFromPEM(caPEM) { + return nil, errors.New("failed to append CA cert") + } + clientCert, err := tls.LoadX509KeyPair(certFile, keyFile) + if err != nil { return nil, fmt.Errorf("load client keypair: %w", err) } + return credentials.NewTLS(&tls.Config{ + Certificates: []tls.Certificate{clientCert}, + RootCAs: cp, + ServerName: serverName, + MinVersion: tls.VersionTLS13, + }), nil + } + ``` +- Use `grpc.NewClient(addr, grpc.WithTransportCredentials(creds), grpc.WithStatsHandler(otelgrpc.NewClientHandler()))`. +- Wrap the generated ha-gateway clients (`LightServiceClient`, `EntityServiceClient`) to satisfy `domain.HAClient`. +- Expose a `Close()` method for graceful shutdown. + +**Cert source:** the cert files will be projected into the pod via a Kubernetes `Secret` mounted at `/etc/ai-gateway/tls/`. See deployment manifest below. Issuing the cert is covered in §10. + +--- + +## 7. Inbound Adapter + +### `adapters/inbound/grpc/server.go` + +- Implements `aiv1.AIServiceServer`. +- `Query(ctx, req)` → calls `domain.Service.Query(ctx, req.Text)` → maps `QueryResult` to `QueryResponse`. +- Attach OTel interceptor: `grpc.StatsHandler(otelgrpc.NewServerHandler())`. +- Attach a slog unary interceptor that logs method, duration, caller `source`, and error code. +- Register reflection service only if `LOG_LEVEL=debug` (convenience for `grpcurl`). + +--- + +## 8. Observability (`internal/observability/`) + +Copy the pattern from `ha-gateway`: + +### `logging.go` +- `NewLogger(level, format string) *slog.Logger` returning either `slog.NewJSONHandler` or `slog.NewTextHandler` wrapping `os.Stdout`. + +### `otel.go` +- `InitOTel(ctx, endpoint, serviceName) (shutdown func(context.Context) error, err error)`. +- Uses `otlptracegrpc` + `otlpmetricgrpc` exporters, insecure credentials (in-cluster). +- Sets global `TracerProvider` and `MeterProvider`. +- Resource attributes: `service.name`, `service.namespace=home-services`. + +--- + +## 9. Entry Point (`cmd/ai-gateway/main.go`) + +Standard startup sequence: + +1. Load config. +2. Build logger. +3. Init OTel; defer shutdown. +4. Build Ollama client. +5. Build ha-gateway client (mTLS); defer `Close()`. +6. Build domain service. +7. Build gRPC server with interceptors, register `AIService`. +8. Listen on `GRPC_LISTEN_ADDR`. +9. Handle `SIGINT`/`SIGTERM` for graceful shutdown: `server.GracefulStop()` with a 10s timeout, then OTel shutdown. + +--- + +## 10. TLS / mTLS Plumbing + +`ha-gateway` requires mTLS. `ai-gateway` needs a client certificate signed by the same CA that ha-gateway trusts. + +### Approach: cert-manager + internal-ca-issuer + +Create a `Certificate` resource for `ai-gateway` (file: `manifests/home-services/ai-gateway-client-cert.yaml`): + +```yaml +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: ai-gateway-client + namespace: home-services +spec: + secretName: ai-gateway-client-tls + duration: 2160h # 90d + renewBefore: 360h # 15d + subject: + organizations: [home-services] + commonName: ai-gateway + usages: + - client auth + issuerRef: + name: internal-ca-issuer + kind: ClusterIssuer + group: cert-manager.io +``` + +**Important:** use `internal-ca-issuer` (the CA issuer), **never** `internal-ca` (the bootstrap self-signed issuer). This matches the homelab convention. + +The resulting secret `ai-gateway-client-tls` contains `tls.crt`, `tls.key`, and `ca.crt` — mount all three. + +### Verify ha-gateway's CA trust +Confirm ha-gateway's server TLS config trusts `internal-ca-issuer`'s CA (it should, since both use the same cluster CA). If ha-gateway uses a separate client-auth CA, adjust the issuer accordingly. + +--- + +## 11. Kubernetes Manifest + +### File: `manifests/home-services/ai-gateway.yaml` + +Single file with `---` separators per repo convention. + +```yaml +apiVersion: v1 +kind: Service +metadata: + name: ai-gateway + namespace: home-services +spec: + selector: { app: ai-gateway } + ports: + - name: grpc + port: 50052 + targetPort: 50052 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: ai-gateway + namespace: home-services +spec: + replicas: 1 + selector: { matchLabels: { app: ai-gateway } } + template: + metadata: + labels: { app: ai-gateway } + spec: + containers: + - name: ai-gateway + image: gitea.nik4nao.com/nik/ai-gateway:latest + imagePullPolicy: Always + ports: + - containerPort: 50052 + name: grpc + env: + - { name: GRPC_LISTEN_ADDR, value: ":50052" } + - { name: OLLAMA_URL, value: "http://192.168.7.96:11434" } + - { name: OLLAMA_MODEL, value: "llama3" } + - { name: HA_GATEWAY_ADDR, value: "ha-gateway.home-services.svc.cluster.local:50051" } + - { name: HA_GATEWAY_TLS_CA_FILE, value: "/etc/ai-gateway/tls/ca.crt" } + - { name: HA_GATEWAY_TLS_CERT_FILE, value: "/etc/ai-gateway/tls/tls.crt" } + - { name: HA_GATEWAY_TLS_KEY_FILE, value: "/etc/ai-gateway/tls/tls.key" } + - { name: HA_GATEWAY_SERVER_NAME, value: "ha-gateway.home-services.svc.cluster.local" } + - { name: LOG_LEVEL, value: "info" } + - { name: LOG_FORMAT, value: "json" } + - { name: OTEL_EXPORTER_OTLP_ENDPOINT, + value: "otel-collector-opentelemetry-collector.monitoring.svc.cluster.local:4317" } + - { name: OTEL_SERVICE_NAME, value: "ai-gateway" } + volumeMounts: + - name: tls + mountPath: /etc/ai-gateway/tls + readOnly: true + readinessProbe: + tcpSocket: { port: 50052 } + initialDelaySeconds: 3 + periodSeconds: 10 + livenessProbe: + tcpSocket: { port: 50052 } + initialDelaySeconds: 10 + periodSeconds: 20 + volumes: + - name: tls + secret: + secretName: ai-gateway-client-tls + imagePullSecrets: + - name: gitea-registry +``` + +No resource `limits`/`requests` yet — matches current repo convention (memory limits not yet enforced on pods). + +--- + +## 12. discord-bot Changes + +### New: `services/discord-bot/adapters/outbound/aigateway/client.go` +- gRPC client to `ai-gateway.home-services.svc.cluster.local:50052`, **plaintext** (no auth on ai-gateway's inbound surface yet). +- Exposes `Query(ctx, text string) (reply string, err error)`. +- Inject into existing command handler. + +### Removed / simplified +- If `discord-bot` currently contains any direct Ollama calls, remove them. +- Slash command handler for free-form queries simply calls `aigateway.Query(ctx, msg.Content)` and posts the returned reply. +- Event-notification path (existing Discord → notify flow) is untouched. + +### Config additions to discord-bot +- `AI_GATEWAY_ADDR` (default `ai-gateway.home-services.svc.cluster.local:50052`). + +--- + +## 13. CI / Build + +### `services/ai-gateway/Dockerfile` +Multi-stage build matching existing services: + +```dockerfile +FROM golang:1.26 AS build +WORKDIR /src +COPY go.work go.work.sum ./ +COPY gen ./gen +COPY services/ai-gateway ./services/ai-gateway +WORKDIR /src/services/ai-gateway +RUN CGO_ENABLED=0 go build -o /out/ai-gateway ./cmd/ai-gateway + +FROM gcr.io/distroless/static-debian12:nonroot +COPY --from=build /out/ai-gateway /ai-gateway +USER nonroot:nonroot +ENTRYPOINT ["/ai-gateway"] +``` + +### Gitea Actions workflow +Mirror the existing `ha-gateway` workflow: +- Trigger on pushes touching `services/ai-gateway/**`, `gen/ai/**`, or `proto/ai/**`. +- `docker buildx` multiarch build (`linux/amd64,linux/arm64`). +- Push to `gitea.nik4nao.com/nik/ai-gateway:latest` and `:${{ github.sha }}`. +- Use the Gitea API token (`read:package` + `write:package`) as registry password — **not** the account password. +- Remember: buildkit CA must be injected each run (existing runner pattern). + +--- + +## 14. Workspace Wiring + +### `go.work` — add line: +``` +use ./services/ai-gateway +``` +Keep the existing `replace gitea.nik4nao.com/nik/home-services/gen => ../gen` in `services/ai-gateway/go.mod`. + +### `services/ai-gateway/go.mod` dependencies +- `google.golang.org/grpc` +- `google.golang.org/protobuf` +- `go.opentelemetry.io/otel` +- `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc` +- `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc` +- `go.opentelemetry.io/otel/sdk` +- `go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc` +- `go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp` + +--- + +## 15. Testing + +### Unit tests (`services/ai-gateway/domain/service_test.go`) +- Fake `LLMClient` returning canned JSON strings for each intent. +- Fake `HAClient` recording calls. +- Assert: + - Valid `turn_on_light` JSON → `HAClient.TurnOnLight` called with correct entity; reply matches. + - Invalid JSON → graceful reply, no panic, no HA call. + - `intent="none"` → no HA call; reply passed through. + - HA call returning error → reply contains "couldn't reach Home Assistant"; `ActionTaken=false`. + +### Integration smoke test (manual, post-deploy) +```bash +# From inside the cluster: +grpcurl -plaintext -d '{"text":"turn on the living room light","source":"manual"}' \ + ai-gateway.home-services.svc.cluster.local:50052 ai.v1.AIService/Query +``` + +### mTLS verification +```bash +# Should succeed (using mounted cert): +kubectl exec -n home-services deploy/ai-gateway -- /ai-gateway --selftest # if implemented +# Or inspect via openssl from within the pod if distroless allows a debug sidecar. +``` + +--- + +## 16. Rollout Order + +Implement in this order. Each step should compile and tests should pass before the next. + +1. **Proto + gen** — add `proto/ai/v1/ai.proto`, run `buf generate`, commit `gen/ai/v1/`. +2. **Scaffold** — create `services/ai-gateway/` with `go.mod`, `main.go` (stub), update `go.work`. +3. **Domain** — `intent.go`, `prompt.go`, `service.go` + unit tests with fakes. +4. **Ollama adapter** — HTTP client, manual curl-based validation against `192.168.7.96:11434`. +5. **ha-gateway adapter** — mTLS dial, wrap generated clients, satisfy `domain.HAClient`. +6. **Inbound gRPC adapter** — server, interceptors. +7. **Observability** — logging + OTel init. +8. **Entry point** — wire everything in `cmd/ai-gateway/main.go`. +9. **Dockerfile + CI** — build and push image to Gitea registry. +10. **Cert-manager Certificate** — apply `ai-gateway-client-cert.yaml`; verify `ai-gateway-client-tls` secret is created. +11. **Deployment manifest** — apply `ai-gateway.yaml`; verify pod ready, logs clean, `grpcurl` smoke test passes. +12. **discord-bot update** — add `aigateway` outbound adapter, remove any direct Ollama usage, redeploy. +13. **End-to-end test** — issue a Discord slash command, observe: + - Discord → ai-gateway → Ollama → ai-gateway → ha-gateway (mTLS) → HA → reply back. + - Traces visible in Tempo, logs in Loki, metrics in Prometheus. + +--- + +## 17. Open Questions / Deferred + +- **Auth on ai-gateway's inbound surface:** currently none. Revisit when `alexa-bridge` lands — Alexa path is public-ingress, so ai-gateway may eventually need mTLS inbound too. +- **Intent schema evolution:** if the set of intents grows meaningfully, consider moving the schema into the proto (enum + oneof) rather than free-form JSON. For now, JSON keeps the LLM prompt simple. +- **Conversation memory:** out of scope. If needed later, add a per-`source` session store (Valkey in `home-services`). +- **Prompt templates per model:** `llama3` works with the current system prompt. If swapping to a smaller model, prompt may need tuning — keep `BuildPrompt` easy to override via config. + +--- + +## 18. Acceptance Criteria + +- [ ] `ai-gateway` pod runs ready in `home-services` namespace. +- [ ] `grpcurl` smoke test (§15) returns a structured `QueryResponse` for a light command. +- [ ] Light actually turns on/off in Home Assistant when tested end-to-end. +- [ ] ha-gateway logs show mTLS handshake succeeded with CN=`ai-gateway`. +- [ ] Traces for a full Discord query show three spans: `discord-bot` → `ai-gateway` → `ha-gateway`. +- [ ] `discord-bot` contains no direct references to `OLLAMA_URL` or Ollama HTTP client code. +- [ ] Unit tests pass in CI; Docker image builds multiarch. \ No newline at end of file diff --git a/proto/ai/v1/ai.proto b/proto/ai/v1/ai.proto new file mode 100644 index 0000000..1cddf31 --- /dev/null +++ b/proto/ai/v1/ai.proto @@ -0,0 +1,20 @@ +syntax = "proto3"; + +package ai.v1; + +option go_package = "gitea.nik4nao.com/nik/home-services/gen/ai/v1;aiv1"; + +service AIService { + rpc Query(QueryRequest) returns (QueryResponse); +} + +message QueryRequest { + string text = 1; + string source = 2; +} + +message QueryResponse { + string reply = 1; + string intent = 2; + bool action_taken = 3; +}