- scripts/onboard/onboard.sh: orchestrator with --node/--step/--from/--dry-run flags, deploy_autonomy + git_control gates, lexicographic step ordering - scripts/onboard/lib/common.sh: log/warn/die/step helpers, yaml_get (yq+grep/sed fallback), ensure_line, git() wrapper enforcing --no-pager - scripts/onboard/lib/remote.sh: rrun/rcopy/rsync_dir/rcheck SSH wrappers, dry-run aware - scripts/onboard/steps/00-preflight.sh: read-only fact collection (arch, RAM, disk, docker, tailscale, MagicMirror runtime, swap), human report + machine YAML snippet - scripts/onboard/steps/10-50: stub files with TODO headers, no mutations - hosts/lustro/node.yaml: LUSTRO edge node draft (KEN, role=edge, deploy_autonomy=true, git_control=false); hardware fields marked TODO for preflight population Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
188 lines
6.8 KiB
Bash
Executable file
188 lines
6.8 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# scripts/onboard/onboard.sh — node onboarding orchestrator
|
|
#
|
|
# Usage:
|
|
# onboard.sh --node <name> [--step <name>] [--from <step>] [--dry-run]
|
|
#
|
|
# Flags:
|
|
# --node <name> node name matching hosts/<name>/node.yaml (required)
|
|
# --step <name> run only this step (e.g. 00-preflight)
|
|
# --from <step> start from this step, run all subsequent steps
|
|
# --dry-run print what would be done without mutating anything
|
|
#
|
|
# Steps run in lexicographic order from scripts/onboard/steps/.
|
|
# Steps that require deploy_autonomy=true are skipped (with a warning) when
|
|
# that flag is false in node.yaml. Steps that require git_control=true are
|
|
# similarly gated.
|
|
|
|
set -euo pipefail
|
|
|
|
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
STEPS_DIR="${REPO_ROOT}/scripts/onboard/steps"
|
|
LIB_DIR="${REPO_ROOT}/scripts/onboard/lib"
|
|
|
|
# ── load helpers ──────────────────────────────────────────────────────────────
|
|
# shellcheck source=lib/common.sh
|
|
source "${LIB_DIR}/common.sh"
|
|
|
|
# ── defaults ──────────────────────────────────────────────────────────────────
|
|
NODE_NAME=""
|
|
ONLY_STEP=""
|
|
FROM_STEP=""
|
|
DRY_RUN="false"
|
|
export DRY_RUN REPO_ROOT
|
|
|
|
# ── argument parsing ──────────────────────────────────────────────────────────
|
|
usage() {
|
|
cat >&2 <<'EOF'
|
|
Usage: onboard.sh --node <name> [--step <name>] [--from <step>] [--dry-run]
|
|
|
|
--node <name> node name matching hosts/<name>/node.yaml (required)
|
|
--step <name> run only this single step (e.g. 00-preflight)
|
|
--from <step> start from this step, continue to end
|
|
--dry-run no mutations; show what would run
|
|
|
|
Examples:
|
|
onboard.sh --node lustro
|
|
onboard.sh --node lustro --step 00-preflight
|
|
onboard.sh --node lustro --from 20-install-docker
|
|
onboard.sh --node lustro --dry-run
|
|
EOF
|
|
exit 1
|
|
}
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--node) NODE_NAME="${2:?--node requires a value}"; shift 2 ;;
|
|
--step) ONLY_STEP="${2:?--step requires a value}"; shift 2 ;;
|
|
--from) FROM_STEP="${2:?--from requires a value}"; shift 2 ;;
|
|
--dry-run) DRY_RUN="true"; shift ;;
|
|
-h|--help) usage ;;
|
|
*) die "Unknown argument: $1" ;;
|
|
esac
|
|
done
|
|
|
|
[[ -z "$NODE_NAME" ]] && { warn "--node is required"; usage; }
|
|
|
|
export NODE_NAME
|
|
|
|
# ── load node.yaml ────────────────────────────────────────────────────────────
|
|
require_node_yaml "$NODE_NAME"
|
|
|
|
log "Loading manifest: $NODE_YAML"
|
|
|
|
DEPLOY_AUTONOMY=$(yaml_get "$NODE_YAML" "deploy_autonomy")
|
|
GIT_CONTROL=$(yaml_get "$NODE_YAML" "git_control")
|
|
SSH_USER=$(yaml_get "$NODE_YAML" "ssh_user")
|
|
TS_HOSTNAME=$(yaml_get "$NODE_YAML" "tailscale.hostname")
|
|
|
|
DEPLOY_AUTONOMY="${DEPLOY_AUTONOMY:-false}"
|
|
GIT_CONTROL="${GIT_CONTROL:-false}"
|
|
|
|
[[ -z "$SSH_USER" ]] && die "ssh_user not set in $NODE_YAML"
|
|
[[ -z "$TS_HOSTNAME" ]] && die "tailscale.hostname not set in $NODE_YAML"
|
|
|
|
export ONBOARD_SSH_USER="$SSH_USER"
|
|
export ONBOARD_SSH_HOST="$TS_HOSTNAME"
|
|
|
|
log "Node: ${NODE_NAME} | host: ${TS_HOSTNAME} | user: ${SSH_USER}"
|
|
log "deploy_autonomy=${DEPLOY_AUTONOMY} git_control=${GIT_CONTROL} dry_run=${DRY_RUN}"
|
|
|
|
# ── collect steps ─────────────────────────────────────────────────────────────
|
|
# Steps are NN-name.sh files in lexicographic order.
|
|
mapfile -t ALL_STEPS < <(find "$STEPS_DIR" -maxdepth 1 -name '[0-9][0-9]-*.sh' | sort)
|
|
|
|
if [[ ${#ALL_STEPS[@]} -eq 0 ]]; then
|
|
die "No steps found in $STEPS_DIR"
|
|
fi
|
|
|
|
# Determine which steps to run based on flags.
|
|
declare -a STEPS_TO_RUN=()
|
|
|
|
for step_path in "${ALL_STEPS[@]}"; do
|
|
step_file=$(basename "$step_path" .sh)
|
|
|
|
if [[ -n "$ONLY_STEP" ]]; then
|
|
# Match on prefix (e.g. "00-preflight" matches "00-preflight.sh")
|
|
[[ "$step_file" == "$ONLY_STEP" ]] || continue
|
|
elif [[ -n "$FROM_STEP" ]]; then
|
|
# Skip steps before FROM_STEP
|
|
[[ "$step_file" < "$FROM_STEP" && "$step_file" != "$FROM_STEP" ]] && continue
|
|
fi
|
|
|
|
STEPS_TO_RUN+=("$step_path")
|
|
done
|
|
|
|
if [[ ${#STEPS_TO_RUN[@]} -eq 0 ]]; then
|
|
die "No matching steps found (--step='${ONLY_STEP}' --from='${FROM_STEP}')"
|
|
fi
|
|
|
|
log "Steps to run (${#STEPS_TO_RUN[@]}):"
|
|
for s in "${STEPS_TO_RUN[@]}"; do
|
|
printf " %s\n" "$(basename "$s")"
|
|
done
|
|
echo ""
|
|
|
|
# ── step execution loop ───────────────────────────────────────────────────────
|
|
# Steps that start at 10+ are "mutating" and require deploy_autonomy=true.
|
|
# Steps that start at 30+ and deal with git/repo sync require git_control=true.
|
|
# Step 00-preflight is always allowed (read-only).
|
|
|
|
_step_needs_autonomy() {
|
|
local num="${1%%[^0-9]*}" # leading digits
|
|
[[ "$num" -ge 10 ]] 2>/dev/null
|
|
}
|
|
|
|
_step_needs_git_control() {
|
|
local name="$1"
|
|
[[ "$name" == *"git"* || "$name" == *"repo"* || "$name" == *"clone"* ]]
|
|
}
|
|
|
|
FAILED_STEPS=()
|
|
|
|
for step_path in "${STEPS_TO_RUN[@]}"; do
|
|
step_file=$(basename "$step_path" .sh)
|
|
step_num="${step_file%%[^0-9]*}"
|
|
|
|
# autonomy gate
|
|
if _step_needs_autonomy "$step_num" && [[ "$DEPLOY_AUTONOMY" != "true" ]]; then
|
|
warn "Skipping $step_file — deploy_autonomy=false in $NODE_YAML"
|
|
warn "Run this step manually or set deploy_autonomy: true"
|
|
continue
|
|
fi
|
|
|
|
# git_control gate
|
|
if _step_needs_git_control "$step_file" && [[ "$GIT_CONTROL" != "true" ]]; then
|
|
warn "Skipping $step_file — git_control=false in $NODE_YAML"
|
|
continue
|
|
fi
|
|
|
|
step "Running: $step_file"
|
|
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
dryrun "Would execute: $step_path"
|
|
continue
|
|
fi
|
|
|
|
if bash "$step_path"; then
|
|
log "$step_file — OK"
|
|
else
|
|
rc=$?
|
|
warn "$step_file — FAILED (exit $rc)"
|
|
FAILED_STEPS+=("$step_file")
|
|
fi
|
|
|
|
echo ""
|
|
done
|
|
|
|
# ── summary ───────────────────────────────────────────────────────────────────
|
|
if [[ ${#FAILED_STEPS[@]} -gt 0 ]]; then
|
|
die "Onboarding finished with failures: ${FAILED_STEPS[*]}"
|
|
fi
|
|
|
|
if [[ "$DRY_RUN" == "true" ]]; then
|
|
log "Dry-run complete — no changes made."
|
|
else
|
|
log "All steps completed successfully for node ${NODE_NAME}."
|
|
fi
|