feat(onboard): add node onboarding scaffold (bash, idempotent)
- scripts/onboard/onboard.sh: orchestrator with --node/--step/--from/--dry-run flags,
deploy_autonomy + git_control gates, lexicographic step ordering
- scripts/onboard/lib/common.sh: log/warn/die/step helpers, yaml_get (yq+grep/sed fallback),
ensure_line, git() wrapper enforcing --no-pager
- scripts/onboard/lib/remote.sh: rrun/rcopy/rsync_dir/rcheck SSH wrappers, dry-run aware
- scripts/onboard/steps/00-preflight.sh: read-only fact collection (arch, RAM, disk, docker,
tailscale, MagicMirror runtime, swap), human report + machine YAML snippet
- scripts/onboard/steps/10-50: stub files with TODO headers, no mutations
- hosts/lustro/node.yaml: LUSTRO edge node draft (KEN, role=edge, deploy_autonomy=true,
git_control=false); hardware fields marked TODO for preflight population
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 14:23:21 +02:00
|
|
|
|
#!/usr/bin/env bash
|
2026-06-09 20:36:00 +02:00
|
|
|
|
# scripts/onboard/steps/50-verify.sh — restart observera + smoke test węzła w panelu
|
feat(onboard): add node onboarding scaffold (bash, idempotent)
- scripts/onboard/onboard.sh: orchestrator with --node/--step/--from/--dry-run flags,
deploy_autonomy + git_control gates, lexicographic step ordering
- scripts/onboard/lib/common.sh: log/warn/die/step helpers, yaml_get (yq+grep/sed fallback),
ensure_line, git() wrapper enforcing --no-pager
- scripts/onboard/lib/remote.sh: rrun/rcopy/rsync_dir/rcheck SSH wrappers, dry-run aware
- scripts/onboard/steps/00-preflight.sh: read-only fact collection (arch, RAM, disk, docker,
tailscale, MagicMirror runtime, swap), human report + machine YAML snippet
- scripts/onboard/steps/10-50: stub files with TODO headers, no mutations
- hosts/lustro/node.yaml: LUSTRO edge node draft (KEN, role=edge, deploy_autonomy=true,
git_control=false); hardware fields marked TODO for preflight population
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 14:23:21 +02:00
|
|
|
|
#
|
2026-06-09 20:36:00 +02:00
|
|
|
|
# Uruchamiaj PO: merge task/node-onboarding → master + git pull na VPS.
|
|
|
|
|
|
#
|
|
|
|
|
|
# Sprawdzenia:
|
|
|
|
|
|
# 1. SSH <node>: node-agent container running
|
|
|
|
|
|
# 2. SSH <node>: eventy obecne w /opt/homelab/events/<node>/
|
|
|
|
|
|
# 3. SSH VPS: docker restart control-plane-observer + poll observer.heartbeat
|
|
|
|
|
|
# 4. SSH VPS: <node> widoczny w /opt/homelab/world/nodes.json
|
|
|
|
|
|
#
|
|
|
|
|
|
# Exit 0 — wszystkie OK | Exit 1 — co najmniej jedno FAIL (tabela podsumowująca)
|
feat(onboard): add node onboarding scaffold (bash, idempotent)
- scripts/onboard/onboard.sh: orchestrator with --node/--step/--from/--dry-run flags,
deploy_autonomy + git_control gates, lexicographic step ordering
- scripts/onboard/lib/common.sh: log/warn/die/step helpers, yaml_get (yq+grep/sed fallback),
ensure_line, git() wrapper enforcing --no-pager
- scripts/onboard/lib/remote.sh: rrun/rcopy/rsync_dir/rcheck SSH wrappers, dry-run aware
- scripts/onboard/steps/00-preflight.sh: read-only fact collection (arch, RAM, disk, docker,
tailscale, MagicMirror runtime, swap), human report + machine YAML snippet
- scripts/onboard/steps/10-50: stub files with TODO headers, no mutations
- hosts/lustro/node.yaml: LUSTRO edge node draft (KEN, role=edge, deploy_autonomy=true,
git_control=false); hardware fields marked TODO for preflight population
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-08 14:23:21 +02:00
|
|
|
|
|
|
|
|
|
|
set -euo pipefail
|
|
|
|
|
|
|
|
|
|
|
|
STEP_NAME="50-verify"
|
2026-06-09 20:36:00 +02:00
|
|
|
|
|
|
|
|
|
|
: "${REPO_ROOT:?REPO_ROOT is not set — run via onboard.sh}"
|
|
|
|
|
|
: "${NODE_YAML:?NODE_YAML is not set — run via onboard.sh}"
|
|
|
|
|
|
: "${DRY_RUN:=0}"
|
|
|
|
|
|
|
|
|
|
|
|
if ! declare -f log >/dev/null 2>&1; then
|
|
|
|
|
|
# shellcheck source=../lib/common.sh
|
|
|
|
|
|
source "${REPO_ROOT}/scripts/onboard/lib/common.sh"
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
SSH_USER=$(yaml_get "${NODE_YAML}" "ssh_user")
|
|
|
|
|
|
TS_HOSTNAME=$(yaml_get "${NODE_YAML}" "tailscale.hostname")
|
|
|
|
|
|
[[ -z "${SSH_USER}" ]] && die "ssh_user not set in ${NODE_YAML}"
|
|
|
|
|
|
[[ -z "${TS_HOSTNAME}" ]] && die "tailscale.hostname not set in ${NODE_YAML}"
|
|
|
|
|
|
|
|
|
|
|
|
VPS_SSH_USER="oskar"
|
|
|
|
|
|
VPS_SSH_HOST="100.95.58.48"
|
|
|
|
|
|
VPS_REPO_PATH="/home/oskar/homelab-codex-ws"
|
|
|
|
|
|
|
|
|
|
|
|
_SSH_OPTS=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes)
|
|
|
|
|
|
|
|
|
|
|
|
_ssh_node() { ssh "${_SSH_OPTS[@]}" "${SSH_USER}@${TS_HOSTNAME}" -- "$@"; }
|
|
|
|
|
|
_ssh_vps() { ssh "${_SSH_OPTS[@]}" "${VPS_SSH_USER}@${VPS_SSH_HOST}" -- "$@"; }
|
|
|
|
|
|
|
|
|
|
|
|
declare -A RESULTS=()
|
|
|
|
|
|
|
|
|
|
|
|
# ── 1. node-agent running on <node> ──────────────────────────────────────────
|
|
|
|
|
|
step "[${STEP_NAME}] 1/4 ${TS_HOSTNAME}: node-agent container"
|
|
|
|
|
|
|
|
|
|
|
|
if [ "${DRY_RUN:-0}" = 1 ]; then
|
|
|
|
|
|
dryrun "ssh ${SSH_USER}@${TS_HOSTNAME} docker ps --filter name=^node-agent\$"
|
|
|
|
|
|
RESULTS["node-agent-running"]="skip"
|
|
|
|
|
|
elif _ssh_node "docker ps --filter name=^node-agent\$ --filter status=running --format '{{.Names}}'" 2>/dev/null \
|
|
|
|
|
|
| grep -q "node-agent"; then
|
|
|
|
|
|
log "OK: node-agent running"
|
|
|
|
|
|
_ssh_node "docker ps --filter name=node-agent --format 'table {{.Names}}\t{{.Status}}'" 2>/dev/null || true
|
|
|
|
|
|
RESULTS["node-agent-running"]="PASS"
|
|
|
|
|
|
else
|
|
|
|
|
|
warn "FAIL: node-agent nie działa na ${TS_HOSTNAME}"
|
|
|
|
|
|
RESULTS["node-agent-running"]="FAIL"
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# ── 2. eventy w /opt/homelab/events/<node>/ ───────────────────────────────────
|
|
|
|
|
|
step "[${STEP_NAME}] 2/4 ${TS_HOSTNAME}: eventy"
|
|
|
|
|
|
|
|
|
|
|
|
if [ "${DRY_RUN:-0}" = 1 ]; then
|
|
|
|
|
|
dryrun "ssh ${SSH_USER}@${TS_HOSTNAME} find /opt/homelab/events/${TS_HOSTNAME}/ -name '*.json'"
|
|
|
|
|
|
RESULTS["events-present"]="skip"
|
|
|
|
|
|
elif _ssh_node "find /opt/homelab/events/${TS_HOSTNAME}/ -name '*.json' 2>/dev/null | head -1" 2>/dev/null \
|
|
|
|
|
|
| grep -q ".json"; then
|
|
|
|
|
|
_latest=$(_ssh_node "ls -t /opt/homelab/events/${TS_HOSTNAME}/*.json 2>/dev/null | head -1" || echo "?")
|
|
|
|
|
|
log "OK: eventy obecne (ostatni: ${_latest})"
|
|
|
|
|
|
RESULTS["events-present"]="PASS"
|
|
|
|
|
|
else
|
|
|
|
|
|
warn "FAIL: brak eventów w /opt/homelab/events/${TS_HOSTNAME}/"
|
|
|
|
|
|
RESULTS["events-present"]="FAIL"
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# ── 3. restart observera + healthcheck ────────────────────────────────────────
|
|
|
|
|
|
step "[${STEP_NAME}] 3/4 VPS: restart control-plane-observer"
|
|
|
|
|
|
|
|
|
|
|
|
if [ "${DRY_RUN:-0}" = 1 ]; then
|
|
|
|
|
|
dryrun "ssh ${VPS_SSH_USER}@${VPS_SSH_HOST} docker restart control-plane-observer"
|
|
|
|
|
|
dryrun "poll /opt/homelab/state/observer.heartbeat (max 30s)"
|
|
|
|
|
|
RESULTS["observer-healthy"]="skip"
|
|
|
|
|
|
else
|
|
|
|
|
|
log "Restarting control-plane-observer na VPS..."
|
|
|
|
|
|
_ssh_vps "docker restart control-plane-observer"
|
|
|
|
|
|
|
|
|
|
|
|
log "Polling observer.heartbeat (max 30s)..."
|
|
|
|
|
|
_ok=0
|
|
|
|
|
|
for _i in $(seq 1 6); do
|
|
|
|
|
|
sleep 5
|
|
|
|
|
|
_age=$(_ssh_vps "python3 -c \
|
|
|
|
|
|
\"import os,time; s=os.stat('/opt/homelab/state/observer.heartbeat'); \
|
|
|
|
|
|
print(int(time.time()-s.st_mtime))\" 2>/dev/null" || echo "999")
|
|
|
|
|
|
if [[ "${_age}" -lt 20 ]]; then
|
|
|
|
|
|
log "OK: observer.heartbeat fresh (${_age}s temu)"
|
|
|
|
|
|
_ok=1
|
|
|
|
|
|
break
|
|
|
|
|
|
fi
|
|
|
|
|
|
log " ... ${_i}×5s, heartbeat ${_age}s old..."
|
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
if [[ "${_ok}" -eq 1 ]]; then
|
|
|
|
|
|
RESULTS["observer-healthy"]="PASS"
|
|
|
|
|
|
else
|
|
|
|
|
|
warn "FAIL: observer.heartbeat nie odświeżony po 30s"
|
|
|
|
|
|
warn "Sprawdź: ssh ${VPS_SSH_USER}@${VPS_SSH_HOST} docker logs control-plane-observer --tail 30"
|
|
|
|
|
|
RESULTS["observer-healthy"]="FAIL"
|
|
|
|
|
|
fi
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# ── 4. <node> widoczny w world/nodes.json ─────────────────────────────────────
|
|
|
|
|
|
step "[${STEP_NAME}] 4/4 VPS: ${TS_HOSTNAME} w world/nodes.json"
|
|
|
|
|
|
|
|
|
|
|
|
if [ "${DRY_RUN:-0}" = 1 ]; then
|
|
|
|
|
|
dryrun "ssh ${VPS_SSH_USER}@${VPS_SSH_HOST} python3 -c \"json.load(.../world/nodes.json)['${TS_HOSTNAME}']\""
|
|
|
|
|
|
RESULTS["world-state"]="skip"
|
|
|
|
|
|
else
|
|
|
|
|
|
_node_status=$(_ssh_vps "python3 -c \"
|
|
|
|
|
|
import json, sys
|
|
|
|
|
|
try:
|
|
|
|
|
|
d = json.load(open('/opt/homelab/world/nodes.json'))
|
|
|
|
|
|
node = d.get('${TS_HOSTNAME}', {})
|
|
|
|
|
|
print(node.get('status', 'missing'))
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print('error:' + str(e))
|
|
|
|
|
|
\"" 2>/dev/null || echo "ssh-error")
|
|
|
|
|
|
|
|
|
|
|
|
case "${_node_status}" in
|
|
|
|
|
|
online|offline)
|
|
|
|
|
|
log "OK: ${TS_HOSTNAME} w world/nodes.json (status=${_node_status})"
|
|
|
|
|
|
RESULTS["world-state"]="PASS"
|
|
|
|
|
|
;;
|
|
|
|
|
|
missing)
|
|
|
|
|
|
warn "FAIL: ${TS_HOSTNAME} nie ma wpisu w world/nodes.json"
|
|
|
|
|
|
warn "Możliwa przyczyna: observer nie przetworzyл jeszcze eventów (poczekaj 60s i spróbuj ponownie)"
|
|
|
|
|
|
RESULTS["world-state"]="FAIL"
|
|
|
|
|
|
;;
|
|
|
|
|
|
*)
|
|
|
|
|
|
warn "FAIL: nieoczekiwana odpowiedź: ${_node_status}"
|
|
|
|
|
|
RESULTS["world-state"]="FAIL"
|
|
|
|
|
|
;;
|
|
|
|
|
|
esac
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# ── tabela podsumowująca ──────────────────────────────────────────────────────
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
printf '%s\n' "══════════════════════════════════════════"
|
|
|
|
|
|
printf " %-30s %s\n" "CHECK" "RESULT"
|
|
|
|
|
|
printf '%s\n' "──────────────────────────────────────────"
|
|
|
|
|
|
for _key in "node-agent-running" "events-present" "observer-healthy" "world-state"; do
|
|
|
|
|
|
_val="${RESULTS[${_key}]:-???}"
|
|
|
|
|
|
printf " %-30s %s\n" "${_key}" "${_val}"
|
|
|
|
|
|
done
|
|
|
|
|
|
printf '%s\n' "══════════════════════════════════════════"
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
|
|
|
|
|
|
for _val in "${RESULTS[@]}"; do
|
|
|
|
|
|
[[ "${_val}" == "FAIL" ]] && { warn "Verify: co najmniej jeden check nie przeszedł"; exit 1; }
|
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
log "[${STEP_NAME}] Verify OK — ${TS_HOSTNAME} zarejestrowany i widoczny w panelu"
|