2026-05-11 20:46:50 +02:00
|
|
|
#!/usr/bin/env bash
|
2026-06-03 16:06:36 +02:00
|
|
|
# scripts/deploy/deploy.sh — Saturn-side deploy dispatcher
|
|
|
|
|
# Usage: deploy.sh <target> [--dry-run] [--no-gate]
|
|
|
|
|
# target ∈ {control-plane, vps, piha, solaria, chelsty-infra}
|
|
|
|
|
# Exit codes: 0=ok 1=preflight 2=gate 3=execute 4=verify 5=handoff(sudo)
|
|
|
|
|
|
|
|
|
|
set -uo pipefail
|
|
|
|
|
|
|
|
|
|
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
|
|
|
SSH_USER="${SSH_USER:-oskar}"
|
|
|
|
|
START_TIME=$(date +%s)
|
|
|
|
|
TARGET=""
|
|
|
|
|
DRY_RUN=false
|
|
|
|
|
NO_GATE=false
|
|
|
|
|
|
|
|
|
|
usage() {
|
|
|
|
|
cat >&2 <<'EOF'
|
|
|
|
|
Usage: deploy.sh <target> [--dry-run] [--no-gate]
|
|
|
|
|
|
|
|
|
|
Targets:
|
|
|
|
|
control-plane observer/supervisor/executor/operator-ui on VPS
|
|
|
|
|
vps all VPS GitOps services
|
|
|
|
|
piha PIHA services
|
|
|
|
|
solaria SOLARIA compute services
|
|
|
|
|
chelsty-infra CHELSTY edge node (LTE, longer SSH timeout)
|
|
|
|
|
|
|
|
|
|
Flags:
|
|
|
|
|
--dry-run run preflight + gate only; stop before deploy
|
|
|
|
|
--no-gate skip pytest + docker build (emergency only; logged as WARNING)
|
|
|
|
|
|
|
|
|
|
Exit codes: 0=ok 1=preflight 2=gate 3=execute 4=verify 5=handoff(sudo)
|
|
|
|
|
EOF
|
|
|
|
|
exit 1
|
|
|
|
|
}
|
2026-05-11 21:04:24 +02:00
|
|
|
|
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
|
|
|
case $1 in
|
2026-06-03 16:06:36 +02:00
|
|
|
control-plane|vps|piha|solaria|chelsty-infra)
|
|
|
|
|
TARGET="$1"; shift ;;
|
|
|
|
|
--dry-run)
|
|
|
|
|
DRY_RUN=true; shift ;;
|
|
|
|
|
--no-gate)
|
|
|
|
|
NO_GATE=true; shift ;;
|
|
|
|
|
-h|--help)
|
|
|
|
|
usage ;;
|
2026-05-11 21:04:24 +02:00
|
|
|
*)
|
2026-06-03 16:06:36 +02:00
|
|
|
echo "Unknown argument: $1" >&2
|
|
|
|
|
usage ;;
|
2026-05-11 21:04:24 +02:00
|
|
|
esac
|
|
|
|
|
done
|
|
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
[[ -z "$TARGET" ]] && { echo "Error: target is required." >&2; usage; }
|
2026-05-11 20:46:50 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
case "$TARGET" in
|
|
|
|
|
control-plane) SSH_HOST="vps" ;;
|
|
|
|
|
*) SSH_HOST="$TARGET" ;;
|
|
|
|
|
esac
|
2026-05-11 21:20:13 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
case "$TARGET" in
|
|
|
|
|
chelsty-*) SSH_TIMEOUT=30 ;;
|
|
|
|
|
*) SSH_TIMEOUT=5 ;;
|
|
|
|
|
esac
|
2026-05-11 21:04:24 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
# ── PREFLIGHT ────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
preflight() {
|
|
|
|
|
echo "=== PREFLIGHT ==="
|
|
|
|
|
|
|
|
|
|
local branch
|
|
|
|
|
branch=$(git -C "$REPO_ROOT" rev-parse --abbrev-ref HEAD)
|
|
|
|
|
if [[ "$branch" != "master" ]]; then
|
|
|
|
|
echo "ERROR: On branch '${branch}', not master. Switch to master and push first." >&2
|
|
|
|
|
exit 1
|
2026-05-11 21:04:24 +02:00
|
|
|
fi
|
2026-06-03 16:06:36 +02:00
|
|
|
echo "[ok] branch: master"
|
2026-05-11 21:04:24 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
if ! git -C "$REPO_ROOT" diff --quiet; then
|
|
|
|
|
echo "ERROR: Unstaged changes in working tree. Commit or stash before deploying." >&2
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
if ! git -C "$REPO_ROOT" diff --cached --quiet; then
|
|
|
|
|
echo "ERROR: Staged but uncommitted changes. Commit before deploying." >&2
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
echo "[ok] working tree clean"
|
|
|
|
|
|
|
|
|
|
git -C "$REPO_ROOT" fetch origin master --quiet
|
|
|
|
|
local unpushed
|
|
|
|
|
unpushed=$(git -C "$REPO_ROOT" log origin/master..HEAD --oneline)
|
|
|
|
|
if [[ -n "$unpushed" ]]; then
|
|
|
|
|
echo "ERROR: Unpushed commits on master:" >&2
|
|
|
|
|
echo "$unpushed" >&2
|
|
|
|
|
echo "Push first: git push origin master" >&2
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
echo "[ok] no unpushed commits"
|
2026-05-11 21:04:24 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
echo "Checking SSH: ${SSH_USER}@${SSH_HOST} (ConnectTimeout=${SSH_TIMEOUT}s)..."
|
|
|
|
|
if ! ssh -o "ConnectTimeout=${SSH_TIMEOUT}" -o BatchMode=yes \
|
|
|
|
|
"${SSH_USER}@${SSH_HOST}" true 2>/dev/null; then
|
|
|
|
|
echo "ERROR: Cannot reach ${SSH_HOST} via SSH (timeout ${SSH_TIMEOUT}s)." >&2
|
|
|
|
|
exit 1
|
|
|
|
|
fi
|
|
|
|
|
echo "[ok] ${SSH_HOST} reachable"
|
2026-05-11 21:04:24 +02:00
|
|
|
}
|
|
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
# ── GATE ─────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
gate() {
|
|
|
|
|
if [[ "$NO_GATE" == "true" ]]; then
|
|
|
|
|
echo "=== GATE: SKIPPED ==="
|
|
|
|
|
echo "WARNING: --no-gate active — pytest + docker build bypassed (emergency mode)." >&2
|
2026-05-11 21:20:13 +02:00
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
echo "=== GATE ==="
|
2026-05-11 21:04:24 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
local services=()
|
2026-05-11 21:04:24 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
if [[ "$TARGET" == "control-plane" ]]; then
|
|
|
|
|
services=("control-plane")
|
|
|
|
|
else
|
|
|
|
|
local svc_yaml="${REPO_ROOT}/hosts/${TARGET}/services.yaml"
|
|
|
|
|
if [[ ! -f "$svc_yaml" ]]; then
|
|
|
|
|
echo "ERROR: ${svc_yaml} not found." >&2
|
|
|
|
|
exit 2
|
|
|
|
|
fi
|
|
|
|
|
local svc_list
|
|
|
|
|
svc_list=$(python3 -c "
|
|
|
|
|
import yaml
|
|
|
|
|
with open('${svc_yaml}') as f:
|
|
|
|
|
data = yaml.safe_load(f)
|
|
|
|
|
svcs = data.get('services', {})
|
|
|
|
|
if isinstance(svcs, dict):
|
|
|
|
|
print('\n'.join(svcs.keys()))
|
|
|
|
|
elif isinstance(svcs, list):
|
|
|
|
|
print('\n'.join(svcs))
|
|
|
|
|
")
|
|
|
|
|
while IFS= read -r svc; do
|
|
|
|
|
[[ -z "$svc" ]] && continue
|
|
|
|
|
if [[ -f "${REPO_ROOT}/services/${svc}/Dockerfile" ]]; then
|
|
|
|
|
services+=("$svc")
|
|
|
|
|
fi
|
|
|
|
|
done <<< "$svc_list"
|
|
|
|
|
fi
|
2026-05-11 20:46:50 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
if [[ ${#services[@]} -eq 0 ]]; then
|
|
|
|
|
echo "[info] No services with local Dockerfile found for ${TARGET} — gate trivially passes."
|
2026-05-11 21:20:13 +02:00
|
|
|
return 0
|
|
|
|
|
fi
|
|
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
echo "Services under gate: ${services[*]}"
|
|
|
|
|
local gate_failed=false
|
2026-05-11 21:04:24 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
for svc in "${services[@]}"; do
|
|
|
|
|
local svc_dir="${REPO_ROOT}/services/${svc}"
|
2026-05-11 21:04:24 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
if [[ -d "${svc_dir}/tests" ]]; then
|
|
|
|
|
echo "--- pytest: ${svc} ---"
|
|
|
|
|
if ! python3 -m pytest "${svc_dir}/tests" -q; then
|
|
|
|
|
echo "GATE FAIL: pytest failed for ${svc}" >&2
|
|
|
|
|
gate_failed=true
|
2026-05-11 21:04:24 +02:00
|
|
|
fi
|
|
|
|
|
fi
|
|
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
echo "--- docker build: ${svc} ---"
|
|
|
|
|
if ! docker build --quiet "${svc_dir}" >/dev/null; then
|
|
|
|
|
echo "GATE FAIL: docker build failed for ${svc}" >&2
|
|
|
|
|
gate_failed=true
|
2026-05-11 21:04:24 +02:00
|
|
|
fi
|
|
|
|
|
done
|
2026-06-03 16:06:36 +02:00
|
|
|
|
|
|
|
|
if [[ "$gate_failed" == "true" ]]; then
|
|
|
|
|
exit 2
|
|
|
|
|
fi
|
|
|
|
|
echo "[ok] gate passed"
|
2026-05-11 20:46:50 +02:00
|
|
|
}
|
|
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
# ── EXECUTE ──────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
execute() {
|
|
|
|
|
echo "=== EXECUTE ==="
|
|
|
|
|
|
|
|
|
|
local cmd_output
|
|
|
|
|
local cmd_exit=0
|
|
|
|
|
|
|
|
|
|
if [[ "$TARGET" == "control-plane" ]]; then
|
|
|
|
|
echo "Running deploy-control-plane.sh --ssh..."
|
|
|
|
|
cmd_output=$("${REPO_ROOT}/scripts/deploy/deploy-control-plane.sh" --ssh 2>&1) \
|
|
|
|
|
|| cmd_exit=$?
|
|
|
|
|
else
|
|
|
|
|
echo "SSHing to ${SSH_HOST}: git pull + deploy-node.sh..."
|
|
|
|
|
cmd_output=$(ssh -o "ConnectTimeout=${SSH_TIMEOUT}" -o BatchMode=yes \
|
|
|
|
|
"${SSH_USER}@${SSH_HOST}" \
|
|
|
|
|
'cd ~/homelab-codex-ws && git pull && ./scripts/deploy/deploy-node.sh' 2>&1) \
|
|
|
|
|
|| cmd_exit=$?
|
2026-05-11 21:20:13 +02:00
|
|
|
fi
|
|
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
echo "$cmd_output"
|
|
|
|
|
|
|
|
|
|
if echo "$cmd_output" | grep -qF "[sudo] password"; then
|
|
|
|
|
echo "" >&2
|
|
|
|
|
echo "ERROR (exit 5): Deploy hit an interactive sudo prompt." >&2
|
|
|
|
|
echo "Run manually:" >&2
|
|
|
|
|
if [[ "$TARGET" == "control-plane" ]]; then
|
|
|
|
|
echo " ssh -t ${SSH_USER}@${SSH_HOST} 'cd ~/homelab-codex-ws && git pull origin master && cd services/control-plane && bash deploy-local.sh'" >&2
|
2026-05-11 21:04:24 +02:00
|
|
|
else
|
2026-06-03 16:06:36 +02:00
|
|
|
echo " ssh -t ${SSH_USER}@${SSH_HOST} 'cd ~/homelab-codex-ws && git pull && ./scripts/deploy/deploy-node.sh'" >&2
|
2026-05-11 21:04:24 +02:00
|
|
|
fi
|
2026-06-03 16:06:36 +02:00
|
|
|
exit 5
|
|
|
|
|
fi
|
2026-05-11 20:46:50 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
if [[ $cmd_exit -ne 0 ]]; then
|
|
|
|
|
echo "ERROR: Deploy command exited ${cmd_exit}." >&2
|
|
|
|
|
exit 3
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
echo "[ok] execute completed"
|
2026-05-11 20:46:50 +02:00
|
|
|
}
|
|
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
# ── VERIFY ───────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
verify() {
|
|
|
|
|
echo "=== VERIFY ==="
|
|
|
|
|
|
|
|
|
|
local ps_output
|
|
|
|
|
local ps_exit=0
|
|
|
|
|
ps_output=$(ssh -o "ConnectTimeout=${SSH_TIMEOUT}" -o BatchMode=yes \
|
|
|
|
|
"${SSH_USER}@${SSH_HOST}" \
|
|
|
|
|
'docker ps --format "{{.Names}}\t{{.Status}}"' 2>&1) \
|
|
|
|
|
|| ps_exit=$?
|
|
|
|
|
|
|
|
|
|
if [[ $ps_exit -ne 0 ]]; then
|
|
|
|
|
echo "ERROR: docker ps failed on ${SSH_HOST}:" >&2
|
|
|
|
|
echo "$ps_output" >&2
|
|
|
|
|
exit 4
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
echo "$ps_output"
|
|
|
|
|
|
|
|
|
|
local failed=false
|
|
|
|
|
|
|
|
|
|
local not_up
|
|
|
|
|
not_up=$(echo "$ps_output" | grep -v '^$' | grep -v $'\tUp' || true)
|
|
|
|
|
if [[ -n "$not_up" ]]; then
|
|
|
|
|
echo "ERROR: Containers not in Up state:" >&2
|
|
|
|
|
echo "$not_up" >&2
|
|
|
|
|
failed=true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
local unhealthy
|
|
|
|
|
unhealthy=$(echo "$ps_output" | grep '(unhealthy)' || true)
|
|
|
|
|
if [[ -n "$unhealthy" ]]; then
|
|
|
|
|
echo "ERROR: Unhealthy containers:" >&2
|
|
|
|
|
echo "$unhealthy" >&2
|
|
|
|
|
failed=true
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
if [[ "$TARGET" == "control-plane" ]]; then
|
|
|
|
|
for cp_svc in supervisor observer executor operator-ui; do
|
|
|
|
|
if ! echo "$ps_output" | grep -q "$cp_svc"; then
|
|
|
|
|
echo "ERROR: control-plane component absent from docker ps: ${cp_svc}" >&2
|
|
|
|
|
failed=true
|
|
|
|
|
fi
|
|
|
|
|
done
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
if [[ "$failed" == "true" ]]; then
|
|
|
|
|
echo "" >&2
|
|
|
|
|
echo "Full docker ps output above." >&2
|
|
|
|
|
exit 4
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
echo "[ok] all containers healthy"
|
2026-05-11 20:46:50 +02:00
|
|
|
}
|
|
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
# ── REPORT ───────────────────────────────────────────────────────────────────
|
2026-05-11 20:46:50 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
report() {
|
|
|
|
|
local mode="${1:-deploy}"
|
|
|
|
|
local end_time
|
|
|
|
|
end_time=$(date +%s)
|
|
|
|
|
local elapsed
|
|
|
|
|
elapsed=$(( end_time - START_TIME ))
|
|
|
|
|
local commit_hash
|
|
|
|
|
commit_hash=$(git -C "$REPO_ROOT" rev-parse --short HEAD)
|
|
|
|
|
local gate_s verify_s
|
2026-05-11 20:46:50 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
if [[ "$NO_GATE" == "true" ]]; then
|
|
|
|
|
gate_s="skip"
|
|
|
|
|
else
|
|
|
|
|
gate_s="ok"
|
|
|
|
|
fi
|
2026-05-11 20:46:50 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
if [[ "$mode" == "dry-run" ]]; then
|
|
|
|
|
verify_s="skip(dry-run)"
|
2026-05-11 21:04:24 +02:00
|
|
|
else
|
2026-06-03 16:06:36 +02:00
|
|
|
verify_s="green"
|
2026-05-11 21:04:24 +02:00
|
|
|
fi
|
2026-05-11 20:46:50 +02:00
|
|
|
|
2026-06-03 16:06:36 +02:00
|
|
|
echo ""
|
|
|
|
|
if [[ "$mode" == "dry-run" ]]; then
|
|
|
|
|
echo "DRY RUN OK | target=${TARGET} | commit=${commit_hash} | gate=${gate_s} | verify=${verify_s} | ${elapsed}s"
|
|
|
|
|
else
|
|
|
|
|
echo "DEPLOY OK | target=${TARGET} | commit=${commit_hash} | gate=${gate_s} | verify=${verify_s} | ${elapsed}s"
|
|
|
|
|
fi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# ── MAIN ─────────────────────────────────────────────────────────────────────
|
|
|
|
|
|
|
|
|
|
preflight
|
|
|
|
|
gate
|
|
|
|
|
|
|
|
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
|
|
|
report dry-run
|
|
|
|
|
exit 0
|
2026-05-11 21:20:13 +02:00
|
|
|
fi
|
2026-06-03 16:06:36 +02:00
|
|
|
|
|
|
|
|
execute
|
|
|
|
|
verify
|
|
|
|
|
report
|