#!/usr/bin/env bash # scripts/deploy/deploy.sh — Saturn-side deploy dispatcher # Usage: deploy.sh [--dry-run] [--no-gate] # target ∈ {control-plane, vps, piha, solaria, chelsty-infra} # Exit codes: 0=ok 1=preflight 2=gate 3=execute 4=verify 5=handoff(sudo) set -uo pipefail REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" SSH_USER="${SSH_USER:-oskar}" START_TIME=$(date +%s) TARGET="" DRY_RUN=false NO_GATE=false usage() { cat >&2 <<'EOF' Usage: deploy.sh [--dry-run] [--no-gate] Targets: control-plane observer/supervisor/executor/operator-ui on VPS vps all VPS GitOps services piha PIHA services solaria SOLARIA compute services chelsty-infra CHELSTY edge node (LTE, longer SSH timeout) Flags: --dry-run run preflight + gate only; stop before deploy --no-gate skip pytest + docker build (emergency only; logged as WARNING) Exit codes: 0=ok 1=preflight 2=gate 3=execute 4=verify 5=handoff(sudo) EOF exit 1 } while [[ $# -gt 0 ]]; do case $1 in control-plane|vps|piha|solaria|chelsty-infra) TARGET="$1"; shift ;; --dry-run) DRY_RUN=true; shift ;; --no-gate) NO_GATE=true; shift ;; -h|--help) usage ;; *) echo "Unknown argument: $1" >&2 usage ;; esac done [[ -z "$TARGET" ]] && { echo "Error: target is required." >&2; usage; } case "$TARGET" in control-plane) SSH_HOST="vps" ;; *) SSH_HOST="$TARGET" ;; esac case "$TARGET" in chelsty-*) SSH_TIMEOUT=30 ;; *) SSH_TIMEOUT=5 ;; esac # ── PREFLIGHT ──────────────────────────────────────────────────────────────── preflight() { echo "=== PREFLIGHT ===" local branch branch=$(git -C "$REPO_ROOT" rev-parse --abbrev-ref HEAD) if [[ "$branch" != "master" ]]; then echo "ERROR: On branch '${branch}', not master. Switch to master and push first." >&2 exit 1 fi echo "[ok] branch: master" if ! git -C "$REPO_ROOT" diff --quiet; then echo "ERROR: Unstaged changes in working tree. Commit or stash before deploying." >&2 exit 1 fi if ! git -C "$REPO_ROOT" diff --cached --quiet; then echo "ERROR: Staged but uncommitted changes. Commit before deploying." >&2 exit 1 fi echo "[ok] working tree clean" git -C "$REPO_ROOT" fetch origin master --quiet local unpushed unpushed=$(git -C "$REPO_ROOT" log origin/master..HEAD --oneline) if [[ -n "$unpushed" ]]; then echo "ERROR: Unpushed commits on master:" >&2 echo "$unpushed" >&2 echo "Push first: git push origin master" >&2 exit 1 fi echo "[ok] no unpushed commits" echo "Checking SSH: ${SSH_USER}@${SSH_HOST} (ConnectTimeout=${SSH_TIMEOUT}s)..." if ! ssh -o "ConnectTimeout=${SSH_TIMEOUT}" -o BatchMode=yes \ "${SSH_USER}@${SSH_HOST}" true 2>/dev/null; then echo "ERROR: Cannot reach ${SSH_HOST} via SSH (timeout ${SSH_TIMEOUT}s)." >&2 exit 1 fi echo "[ok] ${SSH_HOST} reachable" } # ── GATE ───────────────────────────────────────────────────────────────────── gate() { if [[ "$NO_GATE" == "true" ]]; then echo "=== GATE: SKIPPED ===" echo "WARNING: --no-gate active — pytest + docker build bypassed (emergency mode)." >&2 return 0 fi echo "=== GATE ===" local services=() if [[ "$TARGET" == "control-plane" ]]; then services=("control-plane") else local svc_yaml="${REPO_ROOT}/hosts/${TARGET}/services.yaml" if [[ ! -f "$svc_yaml" ]]; then echo "ERROR: ${svc_yaml} not found." >&2 exit 2 fi local svc_list svc_list=$(python3 -c " import yaml with open('${svc_yaml}') as f: data = yaml.safe_load(f) svcs = data.get('services', {}) if isinstance(svcs, dict): print('\n'.join(svcs.keys())) elif isinstance(svcs, list): print('\n'.join(svcs)) ") while IFS= read -r svc; do [[ -z "$svc" ]] && continue if [[ -f "${REPO_ROOT}/services/${svc}/Dockerfile" ]]; then services+=("$svc") fi done <<< "$svc_list" fi if [[ ${#services[@]} -eq 0 ]]; then echo "[info] No services with local Dockerfile found for ${TARGET} — gate trivially passes." return 0 fi echo "Services under gate: ${services[*]}" local gate_failed=false for svc in "${services[@]}"; do local svc_dir="${REPO_ROOT}/services/${svc}" if [[ -d "${svc_dir}/tests" ]]; then echo "--- pytest: ${svc} ---" if ! python3 -m pytest "${svc_dir}/tests" -q; then echo "GATE FAIL: pytest failed for ${svc}" >&2 gate_failed=true fi fi echo "--- docker build: ${svc} ---" if ! docker build --quiet "${svc_dir}" >/dev/null; then echo "GATE FAIL: docker build failed for ${svc}" >&2 gate_failed=true fi done if [[ "$gate_failed" == "true" ]]; then exit 2 fi echo "[ok] gate passed" } # ── EXECUTE ────────────────────────────────────────────────────────────────── execute() { echo "=== EXECUTE ===" local cmd_output local cmd_exit=0 if [[ "$TARGET" == "control-plane" ]]; then echo "Running deploy-control-plane.sh --ssh..." cmd_output=$("${REPO_ROOT}/scripts/deploy/deploy-control-plane.sh" --ssh 2>&1) \ || cmd_exit=$? else echo "SSHing to ${SSH_HOST}: git pull + deploy-node.sh..." cmd_output=$(ssh -o "ConnectTimeout=${SSH_TIMEOUT}" -o BatchMode=yes \ "${SSH_USER}@${SSH_HOST}" \ 'cd ~/homelab-codex-ws && git pull && ./scripts/deploy/deploy-node.sh' 2>&1) \ || cmd_exit=$? fi echo "$cmd_output" if echo "$cmd_output" | grep -qF "[sudo] password"; then echo "" >&2 echo "ERROR (exit 5): Deploy hit an interactive sudo prompt." >&2 echo "Run manually:" >&2 if [[ "$TARGET" == "control-plane" ]]; then echo " ssh -t ${SSH_USER}@${SSH_HOST} 'cd ~/homelab-codex-ws && git pull origin master && cd services/control-plane && bash deploy-local.sh'" >&2 else echo " ssh -t ${SSH_USER}@${SSH_HOST} 'cd ~/homelab-codex-ws && git pull && ./scripts/deploy/deploy-node.sh'" >&2 fi exit 5 fi if [[ $cmd_exit -ne 0 ]]; then echo "ERROR: Deploy command exited ${cmd_exit}." >&2 exit 3 fi echo "[ok] execute completed" } # ── VERIFY ─────────────────────────────────────────────────────────────────── verify() { echo "=== VERIFY ===" local ps_output local ps_exit=0 ps_output=$(ssh -o "ConnectTimeout=${SSH_TIMEOUT}" -o BatchMode=yes \ "${SSH_USER}@${SSH_HOST}" \ 'docker ps --format "{{.Names}}\t{{.Status}}"' 2>&1) \ || ps_exit=$? if [[ $ps_exit -ne 0 ]]; then echo "ERROR: docker ps failed on ${SSH_HOST}:" >&2 echo "$ps_output" >&2 exit 4 fi echo "$ps_output" local failed=false local not_up not_up=$(echo "$ps_output" | grep -v '^$' | grep -v $'\tUp' || true) if [[ -n "$not_up" ]]; then echo "ERROR: Containers not in Up state:" >&2 echo "$not_up" >&2 failed=true fi local unhealthy unhealthy=$(echo "$ps_output" | grep '(unhealthy)' || true) if [[ -n "$unhealthy" ]]; then echo "ERROR: Unhealthy containers:" >&2 echo "$unhealthy" >&2 failed=true fi if [[ "$TARGET" == "control-plane" ]]; then for cp_svc in supervisor observer executor operator-ui; do if ! echo "$ps_output" | grep -q "$cp_svc"; then echo "ERROR: control-plane component absent from docker ps: ${cp_svc}" >&2 failed=true fi done fi if [[ "$failed" == "true" ]]; then echo "" >&2 echo "Full docker ps output above." >&2 exit 4 fi echo "[ok] all containers healthy" } # ── REPORT ─────────────────────────────────────────────────────────────────── report() { local mode="${1:-deploy}" local end_time end_time=$(date +%s) local elapsed elapsed=$(( end_time - START_TIME )) local commit_hash commit_hash=$(git -C "$REPO_ROOT" rev-parse --short HEAD) local gate_s verify_s if [[ "$NO_GATE" == "true" ]]; then gate_s="skip" else gate_s="ok" fi if [[ "$mode" == "dry-run" ]]; then verify_s="skip(dry-run)" else verify_s="green" fi echo "" if [[ "$mode" == "dry-run" ]]; then echo "DRY RUN OK | target=${TARGET} | commit=${commit_hash} | gate=${gate_s} | verify=${verify_s} | ${elapsed}s" else echo "DEPLOY OK | target=${TARGET} | commit=${commit_hash} | gate=${gate_s} | verify=${verify_s} | ${elapsed}s" fi } # ── MAIN ───────────────────────────────────────────────────────────────────── preflight gate if [[ "$DRY_RUN" == "true" ]]; then report dry-run exit 0 fi execute verify report