diff --git a/hosts/lustro/runtime/node-agent/docker-compose.override.yml b/hosts/lustro/runtime/node-agent/docker-compose.override.yml new file mode 100644 index 0000000..6e9baf7 --- /dev/null +++ b/hosts/lustro/runtime/node-agent/docker-compose.override.yml @@ -0,0 +1,19 @@ +services: + node-agent: + # Docker GID on LUSTRO is 991 (not the Debian default 999). + # Compose concatenates group_add lists; 991 is what gives socket access here. + group_add: + - "991" + mem_limit: 256m # RPi4 4 GiB; MagicMirror consumes ~1.9 GiB — agent must be bounded + environment: + - NODE_NAME=lustro + - NODE_TYPE=sd_card + - VPS_EVENTS_HOST=100.95.58.48 + - VPS_EVENTS_USER=oskar + - VPS_EVENTS_PATH=/opt/homelab/events + - CHECK_INTERVAL=60 + volumes: + # pi's SSH key for rsync event shipping to VPS (push-based node, no repo checkout) + - /home/pi/.ssh:/root/.ssh:ro + # Override ../.. from the base compose to the pushed deploy dir (no repo on node) + - /opt/homelab/deploy/node-agent:/repo:ro diff --git a/scripts/onboard/steps/30-node-agent.sh b/scripts/onboard/steps/30-node-agent.sh new file mode 100644 index 0000000..afd9a43 --- /dev/null +++ b/scripts/onboard/steps/30-node-agent.sh @@ -0,0 +1,132 @@ +#!/usr/bin/env bash +# scripts/onboard/steps/30-node-agent.sh — deploy node-agent to remote node +# +# Push-based deploy (git_control=false on LUSTRO): rsync services/node-agent/ +# and the host override to /opt/homelab/deploy/node-agent/ on the remote, then +# docker compose build + up via SSH. Mirrors the PIHA pattern but pushes files +# instead of git-pulling them on the node. +# +# Stages: +# 1. push — rsync base compose+src, copy override to remote deploy dir +# 2. up — docker compose up -d --build (guarded: skip if already running) +# 3. verify — container running + fresh event in /opt/homelab/events// +# +# Dry-run: probes run unconditionally; rsync/rrun mutations honour DRY_RUN. + +set -euo pipefail + +STEP_NAME="30-node-agent" + +: "${REPO_ROOT:?REPO_ROOT is not set — run via onboard.sh}" +: "${NODE_YAML:?NODE_YAML is not set — run via onboard.sh}" +: "${DRY_RUN:=0}" + +# Source common.sh when run standalone (orchestrator sources it before calling steps) +if ! declare -f log >/dev/null 2>&1; then + # shellcheck source=../lib/common.sh + source "${REPO_ROOT}/scripts/onboard/lib/common.sh" +fi + +# ── parse node.yaml ─────────────────────────────────────────────────────────── +SSH_USER=$(yaml_get "$NODE_YAML" "ssh_user") +TS_HOSTNAME=$(yaml_get "$NODE_YAML" "tailscale.hostname") + +[[ -z "$SSH_USER" ]] && die "ssh_user not set in $NODE_YAML" +[[ -z "$TS_HOSTNAME" ]] && die "tailscale.hostname not set in $NODE_YAML" + +export ONBOARD_SSH_USER="${ONBOARD_SSH_USER:-${SSH_USER}}" +export ONBOARD_SSH_HOST="${ONBOARD_SSH_HOST:-${TS_HOSTNAME}}" + +# shellcheck source=../lib/remote.sh +source "${REPO_ROOT}/scripts/onboard/lib/remote.sh" + +REMOTE_DEPLOY_DIR="/opt/homelab/deploy/node-agent" +COMPOSE_BASE="${REMOTE_DEPLOY_DIR}/docker-compose.yml" +COMPOSE_OVERRIDE="${REMOTE_DEPLOY_DIR}/docker-compose.override.yml" + +LOCAL_SVC_DIR="${REPO_ROOT}/services/node-agent" +LOCAL_OVERRIDE="${REPO_ROOT}/hosts/${TS_HOSTNAME}/runtime/node-agent/docker-compose.override.yml" + +# ── rprobe: read-only remote probe — always runs, even in dry-run ───────────── +rprobe() { + ssh "${_SSH_OPTS[@]}" "${ONBOARD_SSH_USER}@${ONBOARD_SSH_HOST}" -- "$@" +} + +# ═══════════════════════════════════════════════════════════════════════════════ +# Stage 1 — push compose files to remote +# ═══════════════════════════════════════════════════════════════════════════════ +step "[$STEP_NAME] 1/3 push compose → ${ONBOARD_SSH_HOST}:${REMOTE_DEPLOY_DIR}" + +# Guard by EFFECT: is node-agent already running? +_running=0 +if rprobe "docker ps --filter name=^node-agent\$ --filter status=running --format '{{.Names}}' 2>/dev/null | grep -q node-agent" 2>/dev/null; then + _running=1 + log "node-agent container already running — skip push+build+up" +fi + +if [[ "$_running" -eq 0 ]]; then + [[ -f "$LOCAL_OVERRIDE" ]] \ + || die "Override not found: $LOCAL_OVERRIDE" + + # Push base compose + Dockerfile + src/ (rsync_dir handles DRY_RUN) + rsync_dir "${LOCAL_SVC_DIR}/" "${REMOTE_DEPLOY_DIR}/" + + # Push host-specific override (rcopy handles DRY_RUN) + rcopy "${LOCAL_OVERRIDE}" "${REMOTE_DEPLOY_DIR}/docker-compose.override.yml" +fi + +# ═══════════════════════════════════════════════════════════════════════════════ +# Stage 2 — docker compose build + up +# ═══════════════════════════════════════════════════════════════════════════════ +step "[$STEP_NAME] 2/3 docker compose up node-agent" + +if [[ "$_running" -eq 1 ]]; then + log "node-agent already running — skip" +else + # Build image on remote (arm64 native); then start the service. + # --build rebuilds if context changed; idempotent if image is current. + rrun docker compose \ + -f "${COMPOSE_BASE}" \ + -f "${COMPOSE_OVERRIDE}" \ + up -d --build node-agent +fi + +# ═══════════════════════════════════════════════════════════════════════════════ +# Stage 3 — verify +# ═══════════════════════════════════════════════════════════════════════════════ +step "[$STEP_NAME] 3/3 verify" + +if [ "${DRY_RUN:-0}" = 1 ]; then + log "dry-run: skipping verify (mutations may not have run)" +else + # Verify: container running (docker ps — not command -v) + if rprobe "docker ps --filter name=^node-agent\$ --filter status=running --format '{{.Names}}' 2>/dev/null | grep -q node-agent" 2>/dev/null; then + log "Verify OK: node-agent container running" + rprobe "docker ps --filter name=node-agent --format 'table {{.Names}}\t{{.Status}}\t{{.Image}}'" || true + else + die "node-agent container is NOT running — check: docker logs node-agent on ${TS_HOSTNAME}" + fi + + # Verify: fresh events appear in /opt/homelab/events// (confirms agent writes) + # First cycle runs at start then sleeps CHECK_INTERVAL; allow 90s. + log "Waiting for first event (up to 90 s, CHECK_INTERVAL=60)..." + _event_ok=0 + for _i in $(seq 1 9); do + if rprobe "ls /opt/homelab/events/${TS_HOSTNAME}/*.json 2>/dev/null | head -1 | grep -q .json" 2>/dev/null; then + _event_ok=1 + break + fi + log " ... ${_i}0 s elapsed, waiting..." + sleep 10 + done + + if [[ "$_event_ok" -eq 1 ]]; then + log "Verify OK: events present in /opt/homelab/events/${TS_HOSTNAME}/" + rprobe "ls -lth /opt/homelab/events/${TS_HOSTNAME}/ | head -5" || true + else + warn "No events yet in /opt/homelab/events/${TS_HOSTNAME}/ after 90 s — agent may still be initialising (CHECK_INTERVAL=60)" + warn "Re-run verify manually: docker logs node-agent on ${TS_HOSTNAME}" + fi +fi + +log "[$STEP_NAME] done"