From 862c04a612d862a55d92f98862d42bb3f02ce58e Mon Sep 17 00:00:00 2001 From: Oskar Kapala Date: Mon, 1 Jun 2026 21:44:37 +0200 Subject: [PATCH] feat(vps): migracja npm/outline/joplin/ai-cluster do GitOps (manifesty; cutover NIE wykonany) --- .../ai-cluster/docker-compose.override.yml | 33 ++++++ .../joplin/docker-compose.override.yml | 6 + .../node_exporter/docker-compose.override.yml | 3 + .../runtime/npm/docker-compose.override.yml | 6 + .../outline/docker-compose.override.yml | 9 ++ hosts/vps/services.yaml | 78 +++++++++++++ services/ai-cluster/docker-compose.yml | 110 ++++++++++++++++++ services/ai-cluster/env.example | 14 +++ services/ai-cluster/healthcheck.sh | 15 +++ services/ai-cluster/service.yaml | 37 ++++++ services/joplin/docker-compose.yml | 44 +++++++ services/joplin/env.example | 20 ++++ services/joplin/healthcheck.sh | 15 +++ services/joplin/service.yaml | 31 +++++ services/npm/docker-compose.yml | 6 +- services/npm/service.yaml | 8 +- services/outline/docker-compose.yml | 68 +++++++++++ services/outline/env.example | 40 +++++++ services/outline/healthcheck.sh | 15 +++ services/outline/service.yaml | 36 ++++++ 20 files changed, 586 insertions(+), 8 deletions(-) create mode 100644 hosts/vps/runtime/ai-cluster/docker-compose.override.yml create mode 100644 hosts/vps/runtime/joplin/docker-compose.override.yml create mode 100644 hosts/vps/runtime/node_exporter/docker-compose.override.yml create mode 100644 hosts/vps/runtime/npm/docker-compose.override.yml create mode 100644 hosts/vps/runtime/outline/docker-compose.override.yml create mode 100644 services/ai-cluster/docker-compose.yml create mode 100644 services/ai-cluster/env.example create mode 100644 services/ai-cluster/healthcheck.sh create mode 100644 services/ai-cluster/service.yaml create mode 100644 services/joplin/docker-compose.yml create mode 100644 services/joplin/env.example create mode 100644 services/joplin/healthcheck.sh create mode 100644 services/joplin/service.yaml create mode 100644 services/outline/docker-compose.yml create mode 100644 services/outline/env.example create mode 100644 services/outline/healthcheck.sh create mode 100644 services/outline/service.yaml diff --git a/hosts/vps/runtime/ai-cluster/docker-compose.override.yml b/hosts/vps/runtime/ai-cluster/docker-compose.override.yml new file mode 100644 index 0000000..4760743 --- /dev/null +++ b/hosts/vps/runtime/ai-cluster/docker-compose.override.yml @@ -0,0 +1,33 @@ +# AI cluster memory limits — HARD caps, containers are OOM-killed and auto-restarted +# by Docker rather than consuming host memory. ai-cluster is the primary OOM suspect +# (unbounded Python workers, no limits since deployment). +# +# Architectural note: compute workloads here should migrate to SOLARIA (GPU node). +# Until migration: contain the blast radius with per-container limits. +# +# Pre-cutover: service-ops-worker still mounts compose/env from old paths. +# After cutover and git pull, these overrides are removed and base compose paths are used. + +services: + codex-worker: + mem_limit: 64m + + openclaw: + mem_limit: 128m + + planner-worker: + mem_limit: 64m + + service-ops-worker: + mem_limit: 64m + # Pre-cutover: override bind mounts to keep pointing at old dockeruser paths + volumes: + - /home/dockeruser/docker/ai-cluster/docker-compose.yml:/app/docker-compose.yml:ro + - /home/dockeruser/docker/ai-cluster/.env:/app/.env:ro + - /var/run/docker.sock:/var/run/docker.sock:rw + + redis: + mem_limit: 32m + + mosquitto: + mem_limit: 32m diff --git a/hosts/vps/runtime/joplin/docker-compose.override.yml b/hosts/vps/runtime/joplin/docker-compose.override.yml new file mode 100644 index 0000000..8d7c394 --- /dev/null +++ b/hosts/vps/runtime/joplin/docker-compose.override.yml @@ -0,0 +1,6 @@ +services: + app: + mem_limit: 224m + + db: + mem_limit: 128m diff --git a/hosts/vps/runtime/node_exporter/docker-compose.override.yml b/hosts/vps/runtime/node_exporter/docker-compose.override.yml new file mode 100644 index 0000000..1c51a73 --- /dev/null +++ b/hosts/vps/runtime/node_exporter/docker-compose.override.yml @@ -0,0 +1,3 @@ +services: + node_exporter: + mem_limit: 32m diff --git a/hosts/vps/runtime/npm/docker-compose.override.yml b/hosts/vps/runtime/npm/docker-compose.override.yml new file mode 100644 index 0000000..ff78fac --- /dev/null +++ b/hosts/vps/runtime/npm/docker-compose.override.yml @@ -0,0 +1,6 @@ +services: + npm: + mem_limit: 160m + # Public ingress — elevated OOM protection so TLS termination + proxy host + # config survive memory pressure. Host OOM-killer will not target this container. + oom_score_adj: -800 diff --git a/hosts/vps/runtime/outline/docker-compose.override.yml b/hosts/vps/runtime/outline/docker-compose.override.yml new file mode 100644 index 0000000..f5fd935 --- /dev/null +++ b/hosts/vps/runtime/outline/docker-compose.override.yml @@ -0,0 +1,9 @@ +services: + outline: + mem_limit: 512m + + postgres: + mem_limit: 96m + + redis: + mem_limit: 32m diff --git a/hosts/vps/services.yaml b/hosts/vps/services.yaml index b1b97d3..b9f7126 100644 --- a/hosts/vps/services.yaml +++ b/hosts/vps/services.yaml @@ -41,3 +41,81 @@ services: depends_on: local: [] external: [] + + npm: + role: reverse-proxy-ingress + deployment_model: docker-compose + exposure: public + offline_required: false + depends_on: + local: [] + external: [] + ports: + - name: http + container_port: 80 + protocol: tcp + - name: https + container_port: 443 + protocol: tcp + - name: admin + container_port: 81 + protocol: tcp + runtime: + data_path: /home/dockeruser/docker/npm/data + config_path: /opt/homelab/config/npm + + outline: + role: team-wiki + deployment_model: docker-compose + exposure: public + offline_required: false + depends_on: + local: + - npm + external: [] + ports: + - name: http + container_port: 3000 + protocol: tcp + runtime: + config_path: /opt/homelab/config/outline + + joplin: + role: note-sync-server + deployment_model: docker-compose + exposure: tailscale-internal + offline_required: false + depends_on: + local: + - npm + external: [] + ports: + - name: http + container_port: 22300 + bind: 127.0.0.1 + protocol: tcp + runtime: + config_path: /opt/homelab/config/joplin + + ai-cluster: + role: ai-worker-cluster + deployment_model: docker-compose + exposure: tailscale-internal + offline_required: false + depends_on: + local: [] + external: + - piha:gateway + ports: + - name: openclaw-api + container_port: 8000 + protocol: tcp + - name: mqtt + container_port: 1883 + protocol: tcp + bind: tailscale + runtime: + config_path: /opt/homelab/config/ai-cluster + notes: + - "Local images must be built on VPS — not pulled from registry" + - "Compute workloads belong on SOLARIA; migrate when possible" diff --git a/services/ai-cluster/docker-compose.yml b/services/ai-cluster/docker-compose.yml new file mode 100644 index 0000000..62fa81f --- /dev/null +++ b/services/ai-cluster/docker-compose.yml @@ -0,0 +1,110 @@ +services: + codex-worker: + image: ai-cluster-codex-worker + restart: unless-stopped + environment: + - AGENT_ID=vps-dev-1 + - ROLE=dev + - MQTT_HOST=mosquitto + - MQTT_PORT=1883 + - MQTT_USERNAME=${MQTT_USERNAME:-codex} + - MQTT_PASSWORD=${MQTT_PASSWORD} + - GATEWAY_BASE_URL=${GATEWAY_BASE_URL:-http://piha:8080} + - REQUEST_TIMEOUT_SECONDS=30 + command: ["python", "worker.py"] + networks: + - internal + + openclaw: + image: ai-cluster-openclaw + restart: unless-stopped + environment: + - MQTT_HOST=mosquitto + - MQTT_PORT=1883 + - MQTT_USERNAME=${MQTT_USERNAME:-codex} + - MQTT_PASSWORD=${MQTT_PASSWORD} + command: ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] + ports: + - "8000:8000" + networks: + - internal + - npm_default + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 15s + + planner-worker: + image: ai-cluster-planner-worker + restart: unless-stopped + environment: + - AGENT_ID=vps-planner-1 + - ROLE=planner + - MQTT_HOST=mosquitto + - MQTT_PORT=1883 + - MQTT_USERNAME=${MQTT_USERNAME:-codex} + - MQTT_PASSWORD=${MQTT_PASSWORD} + command: ["python", "planner_worker.py"] + networks: + - internal + + service-ops-worker: + image: ai-cluster-service-ops-worker + restart: unless-stopped + environment: + - AGENT_ID=vps-service-ops-1 + - ROLE=service-ops + - MQTT_HOST=mosquitto + - MQTT_PORT=1883 + - MQTT_USERNAME=${MQTT_USERNAME:-codex} + - MQTT_PASSWORD=${MQTT_PASSWORD} + - COMPOSE_PROJECT_NAME=ai-cluster + command: ["python", "service_ops_worker.py"] + volumes: + # Post-migration: compose definition and env are in the repo/runtime paths. + # Pre-cutover these are overridden to old paths via docker-compose.override.yml. + - /home/oskar/homelab-codex-ws/services/ai-cluster/docker-compose.yml:/app/docker-compose.yml:ro + - /opt/homelab/config/ai-cluster/.env:/app/.env:ro + - /var/run/docker.sock:/var/run/docker.sock:rw + networks: + - internal + + redis: + image: redis:7-alpine + restart: unless-stopped + command: ["redis-server"] + volumes: + - redis_data:/data + networks: + - internal + + mosquitto: + image: eclipse-mosquitto:2 + container_name: mosquitto + restart: unless-stopped + command: ["/usr/sbin/mosquitto", "-c", "/mosquitto/config/mosquitto.conf"] + ports: + # Tailscale IP binding — matches running container + - "100.95.58.48:1883:1883" + volumes: + # Config: kept at old path until mosquitto config migration is complete + - /home/dockeruser/docker/ai-cluster/mosquitto:/mosquitto/config:ro + - mosquitto_data:/mosquitto/data + - mosquitto_log:/mosquitto/log + networks: + - internal + +volumes: + redis_data: + mosquitto_data: + mosquitto_log: + +networks: + internal: + driver: bridge + name: ai-cluster_ai-cluster + npm_default: + external: true + name: npm_default diff --git a/services/ai-cluster/env.example b/services/ai-cluster/env.example new file mode 100644 index 0000000..893933f --- /dev/null +++ b/services/ai-cluster/env.example @@ -0,0 +1,14 @@ +# AI Cluster — /opt/homelab/config/ai-cluster/.env +# Read by all worker containers and mounted into service-ops-worker as /app/.env + +# MQTT broker credentials +MQTT_HOST=mosquitto +MQTT_PORT=1883 +MQTT_USERNAME=codex +MQTT_PASSWORD= + +# API gateway (piha) +GATEWAY_BASE_URL=http://piha:8080 + +# Compose project name (required for service-ops-worker docker-compose operations) +COMPOSE_PROJECT_NAME=ai-cluster diff --git a/services/ai-cluster/healthcheck.sh b/services/ai-cluster/healthcheck.sh new file mode 100644 index 0000000..61e1ca2 --- /dev/null +++ b/services/ai-cluster/healthcheck.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Healthcheck for AI cluster (checks openclaw API gateway is responding) + +if ! docker ps --filter "name=ai-cluster-openclaw-1" --filter "status=running" | grep -q "openclaw"; then + echo "[FAIL] openclaw container is not running" + exit 1 +fi + +if ! curl -sf http://localhost:8000/health > /dev/null; then + echo "[FAIL] openclaw HTTP health endpoint not responding" + exit 1 +fi + +echo "[OK] ai-cluster is healthy" +exit 0 diff --git a/services/ai-cluster/service.yaml b/services/ai-cluster/service.yaml new file mode 100644 index 0000000..b0a900f --- /dev/null +++ b/services/ai-cluster/service.yaml @@ -0,0 +1,37 @@ +service: + name: ai-cluster + owner_node: vps + exposure: tailscale-internal + dependencies: + - mosquitto + - redis + ports: + - container: 8000 + host: 8000 + protocol: tcp + service: openclaw + - container: 1883 + host: 1883 + protocol: tcp + bind: 100.95.58.48 # Tailscale only + service: mosquitto + healthcheck: + type: http + endpoint: http://localhost:8000/health + interval: 30s + timeout: 10s + retries: 3 + restart_policy: unless-stopped + persistence: + paths: + - volume:mosquitto_config_bind # /home/dockeruser/docker/ai-cluster/mosquitto (bind, not volume) + runtime: + env_file: /opt/homelab/config/ai-cluster/.env + env_vars: + - MQTT_PASSWORD + - MQTT_USERNAME + - GATEWAY_BASE_URL + notes: + - "Local images (ai-cluster-*) must be built on VPS before deployment" + - "service-ops-worker mounts docker.sock and the compose file — needs post-migration path update" + - "Recommendation: move ai-cluster compute workloads to SOLARIA (GPU/compute node)" diff --git a/services/joplin/docker-compose.yml b/services/joplin/docker-compose.yml new file mode 100644 index 0000000..a58208f --- /dev/null +++ b/services/joplin/docker-compose.yml @@ -0,0 +1,44 @@ +services: + app: + image: joplin/server:latest + container_name: joplin-server + restart: unless-stopped + env_file: + - /opt/homelab/config/joplin/.env + ports: + - "127.0.0.1:22300:22300" + depends_on: + db: + condition: service_healthy + networks: + - joplin_net + - npm_default + + db: + image: postgres:18 + container_name: joplin-db + restart: unless-stopped + env_file: + - /opt/homelab/config/joplin/.env + volumes: + - postgres_data:/var/lib/postgresql + networks: + - joplin_net + healthcheck: + test: ["CMD-SHELL", "pg_isready -U joplin -d joplin"] + interval: 10s + timeout: 5s + retries: 5 + +volumes: + postgres_data: + external: true + name: joplin_postgres_data + +networks: + joplin_net: + driver: bridge + name: joplin-net + npm_default: + external: true + name: npm_default diff --git a/services/joplin/env.example b/services/joplin/env.example new file mode 100644 index 0000000..101296a --- /dev/null +++ b/services/joplin/env.example @@ -0,0 +1,20 @@ +# Joplin Server — /opt/homelab/config/joplin/.env +# Both the `app` (joplin-server) and `db` (postgres) containers read this file. + +# Application +APP_BASE_URL=https://joplin.example.com +APP_PORT=22300 +TRUST_PROXY=1 +RUNNING_IN_DOCKER=1 + +# Database connection (joplin-server reads these) +DB_CLIENT=pg +POSTGRES_HOST=db +POSTGRES_PORT=5432 +POSTGRES_USER=joplin +POSTGRES_DB=joplin +POSTGRES_DATABASE=joplin +POSTGRES_PASSWORD= + +# Runtime +PM2_HOME=/opt/pm2 diff --git a/services/joplin/healthcheck.sh b/services/joplin/healthcheck.sh new file mode 100644 index 0000000..db99df1 --- /dev/null +++ b/services/joplin/healthcheck.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Healthcheck for Joplin Server + +if ! docker ps --filter "name=joplin-server" --filter "status=running" | grep -q "joplin-server"; then + echo "[FAIL] joplin-server container is not running" + exit 1 +fi + +if ! curl -sf http://localhost:22300/api/ping > /dev/null; then + echo "[FAIL] Joplin Server HTTP endpoint not responding" + exit 1 +fi + +echo "[OK] Joplin Server is healthy" +exit 0 diff --git a/services/joplin/service.yaml b/services/joplin/service.yaml new file mode 100644 index 0000000..494f1ce --- /dev/null +++ b/services/joplin/service.yaml @@ -0,0 +1,31 @@ +service: + name: joplin + owner_node: vps + exposure: tailscale-internal + dependencies: + - db + ports: + - container: 22300 + host: 22300 + protocol: tcp + bind: 127.0.0.1 + healthcheck: + type: http + endpoint: http://localhost:22300/api/ping + interval: 30s + timeout: 10s + retries: 3 + restart_policy: unless-stopped + persistence: + paths: + - volume:joplin_postgres_data # Joplin notes DB + runtime: + env_file: /opt/homelab/config/joplin/.env + env_vars: + - APP_BASE_URL + - APP_PORT + - DB_CLIENT + - POSTGRES_HOST + - POSTGRES_USER + - POSTGRES_PASSWORD + - POSTGRES_DB diff --git a/services/npm/docker-compose.yml b/services/npm/docker-compose.yml index 07ce9f6..0a9e734 100644 --- a/services/npm/docker-compose.yml +++ b/services/npm/docker-compose.yml @@ -8,5 +8,7 @@ services: - '81:81' - '443:443' volumes: - - /opt/homelab/data/npm/data:/data - - /opt/homelab/data/npm/letsencrypt:/etc/letsencrypt + # Data lives at dockeruser's path — do NOT move these without a migration plan. + # Proxy hosts, SSL certs, and DB are stored here. + - /home/dockeruser/docker/npm/data:/data + - /home/dockeruser/docker/npm/letsencrypt:/etc/letsencrypt diff --git a/services/npm/service.yaml b/services/npm/service.yaml index b8e71f8..d2a8108 100644 --- a/services/npm/service.yaml +++ b/services/npm/service.yaml @@ -22,10 +22,6 @@ service: restart_policy: unless-stopped persistence: paths: - - /opt/homelab/data/npm/data - - /opt/homelab/data/npm/letsencrypt - runtime: - directories: - - /opt/homelab/data/npm/data - - /opt/homelab/data/npm/letsencrypt + - /home/dockeruser/docker/npm/data + - /home/dockeruser/docker/npm/letsencrypt env_vars: [] diff --git a/services/outline/docker-compose.yml b/services/outline/docker-compose.yml new file mode 100644 index 0000000..bf825e0 --- /dev/null +++ b/services/outline/docker-compose.yml @@ -0,0 +1,68 @@ +services: + outline: + image: outlinewiki/outline:1.6.1 + container_name: outline-outline-1 + restart: unless-stopped + env_file: + - /opt/homelab/config/outline/.env + ports: + - "3000:3000" + volumes: + - outline_storage:/var/lib/outline/data + depends_on: + - postgres + - redis + networks: + - outline_internal + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost:3000/_health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 30s + + postgres: + image: postgres:16-alpine + container_name: outline-postgres-1 + restart: unless-stopped + env_file: + - /opt/homelab/config/outline/.env + volumes: + - postgres_data:/var/lib/postgresql/data + networks: + - outline_internal + healthcheck: + test: ["CMD-SHELL", "pg_isready -U outline -d outline"] + interval: 10s + timeout: 5s + retries: 5 + + redis: + image: redis:7-alpine + container_name: outline-redis-1 + restart: unless-stopped + volumes: + - redis_data:/data + networks: + - outline_internal + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 3 + +volumes: + outline_storage: + external: true + name: outline_outline_storage + postgres_data: + external: true + name: outline_postgres_data + redis_data: + external: true + name: outline_redis_data + +networks: + outline_internal: + driver: bridge + name: outline_outline_internal diff --git a/services/outline/env.example b/services/outline/env.example new file mode 100644 index 0000000..368db56 --- /dev/null +++ b/services/outline/env.example @@ -0,0 +1,40 @@ +# Outline Wiki — /opt/homelab/config/outline/.env +# Both the `outline` and `postgres` containers read this file. + +# Application +URL=https://outline.example.com +NODE_ENV=production +PORT=3000 +FILE_STORAGE=local +FILE_STORAGE_LOCAL_ROOT_DIR=/var/lib/outline/data +FORCE_HTTPS=true + +# Secrets — generate with: openssl rand -hex 32 +SECRET_KEY= +UTILS_SECRET= + +# Database +DATABASE_URL=postgres://outline:@postgres:5432/outline +PGSSLMODE=disable + +# Redis +REDIS_URL=redis://redis:6379 + +# Postgres sidecar vars (read by the postgres container) +POSTGRES_USER=outline +POSTGRES_DB=outline +POSTGRES_PASSWORD= + +# Google OAuth (optional) +GOOGLE_CLIENT_ID= +GOOGLE_CLIENT_SECRET= + +# SMTP +SMTP_HOST= +SMTP_PORT=587 +SMTP_USERNAME= +SMTP_PASSWORD= +SMTP_FROM_EMAIL=outline@example.com +SMTP_REPLY_EMAIL=outline@example.com +SMTP_SECURE=false +ALLOWED_DOMAINS= diff --git a/services/outline/healthcheck.sh b/services/outline/healthcheck.sh new file mode 100644 index 0000000..9609193 --- /dev/null +++ b/services/outline/healthcheck.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Healthcheck for Outline Wiki stack + +if ! docker ps --filter "name=outline-outline-1" --filter "status=running" | grep -q "outline-outline-1"; then + echo "[FAIL] outline container is not running" + exit 1 +fi + +if ! curl -sf http://localhost:3000/_health > /dev/null; then + echo "[FAIL] Outline HTTP health endpoint not responding" + exit 1 +fi + +echo "[OK] Outline is healthy" +exit 0 diff --git a/services/outline/service.yaml b/services/outline/service.yaml new file mode 100644 index 0000000..fb54ba2 --- /dev/null +++ b/services/outline/service.yaml @@ -0,0 +1,36 @@ +service: + name: outline + owner_node: vps + exposure: public + dependencies: + - postgres + - redis + ports: + - container: 3000 + host: 3000 + protocol: tcp + healthcheck: + type: http + endpoint: http://localhost:3000/_health + interval: 30s + timeout: 10s + retries: 3 + restart_policy: unless-stopped + persistence: + paths: + # Docker named volumes — data stays at Docker volume paths + - volume:outline_outline_storage # /var/lib/outline/data inside container + - volume:outline_postgres_data # Postgres data directory + - volume:outline_redis_data # Redis persistence + runtime: + env_file: /opt/homelab/config/outline/.env + env_vars: + - URL + - DATABASE_URL + - REDIS_URL + - SECRET_KEY + - UTILS_SECRET + - FILE_STORAGE + - POSTGRES_USER + - POSTGRES_PASSWORD + - POSTGRES_DB