feat(onboard): add 00-access step + update lustro node.yaml

00-access.sh implements a 3-stage idempotent access bootstrap:
  1. ensure_ssh_key  — ssh-copy-id to first_contact (pi@pimirror2.local),
     skips if BatchMode key-auth already passes
  2. ensure_tailscale — install via install.sh if missing, then tailscale up
     --hostname=lustro; prints interactive auth URL to operator, blocks until
     authenticated; skips if BackendState already Running
  3. verify — SSH over Tailscale to pi@lustro, asserts 'ok' + arch=aarch64

Reads first_contact and tailscale.hostname from node.yaml.
Respects --dry-run. No NOPASSWD or /opt/homelab mutations.

hosts/lustro/node.yaml: fill known hardware facts (arm64, 4096 MB RAM,
zram swap, docker_present, mm_runtime=systemd:magicmirror.service),
add ssh_user=pi, first_contact=pi@pimirror2.local,
services.node-agent.runtime engine=docker mem_limit=256m.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Oskar Kapala 2026-06-08 14:43:16 +02:00
parent adb84079ab
commit 9012a36827
2 changed files with 168 additions and 10 deletions

View file

@ -1,26 +1,31 @@
# hosts/lustro/node.yaml — LUSTRO edge node manifest # hosts/lustro/node.yaml — LUSTRO edge node manifest
# Run scripts/onboard/onboard.sh --node lustro --step 00-preflight # First-contact bootstrap: scripts/onboard/onboard.sh --node lustro --step 00-access
# to auto-populate the TODO fields below. # Full onboarding: scripts/onboard/onboard.sh --node lustro
name: LUSTRO name: LUSTRO
role: edge role: edge
location: KEN location: KEN
ssh_user: oskar ssh_user: pi
first_contact: pi@pimirror2.local # LAN/mDNS target before Tailscale is set up
tailscale: tailscale:
hostname: lustro hostname: lustro
# ip: TODO — fill after tailscale join (step 30) # ip: TODO — fill after tailscale join (step 30-install-tailscale)
deploy_autonomy: true # onboard.sh may run mutating steps autonomously deploy_autonomy: true # onboard.sh may run mutating steps autonomously
git_control: false # node does NOT pull from Forgejo directly; push-based git_control: false # node does NOT pull from Forgejo; push-based via SATURN
hardware: hardware:
arch: TODO # populated by 00-preflight (e.g. aarch64, x86_64) arch: arm64
ram_mb: TODO # populated by 00-preflight ram_mb: 4096
swap: TODO # populated by 00-preflight (none / <size>) swap:
mm_runtime: TODO # populated by 00-preflight (systemd / pm2 / process / none) kind: zram
docker_present: true
mm_runtime: systemd:magicmirror.service
services: services:
node-agent: node-agent:
runtime: TODO # populated by 40-deploy-node-agent (image tag, config hash) runtime:
engine: docker
mem_limit: 256m

View file

@ -0,0 +1,153 @@
#!/usr/bin/env bash
# scripts/onboard/steps/00-access.sh — establish remote access channel
#
# Stages:
# 1. ensure_ssh_key — copy SATURN public key to first_contact (idempotent)
# 2. ensure_tailscale — install Tailscale and join network (interactive auth URL)
# 3. verify — confirm SSH over Tailscale, assert arch=aarch64
#
# Does NOT configure NOPASSWD, /opt/homelab, or any host mutation beyond Tailscale.
# Reads: first_contact (e.g. pi@pimirror2.local) and tailscale.hostname from node.yaml.
# pi user on Raspberry Pi OS has passwordless sudo by default — required for `tailscale up`.
set -euo pipefail
STEP_NAME="00-access"
: "${REPO_ROOT:?REPO_ROOT is not set — run via onboard.sh}"
: "${NODE_YAML:?NODE_YAML is not set — run via onboard.sh}"
: "${DRY_RUN:=false}"
# Source common.sh when run standalone (orchestrator sources it before calling steps)
if ! declare -f log >/dev/null 2>&1; then
# shellcheck source=../lib/common.sh
source "${REPO_ROOT}/scripts/onboard/lib/common.sh"
fi
# ── parse node.yaml ───────────────────────────────────────────────────────────
FIRST_CONTACT=$(yaml_get "$NODE_YAML" "first_contact")
TS_HOSTNAME=$(yaml_get "$NODE_YAML" "tailscale.hostname")
[[ -z "$FIRST_CONTACT" ]] && die "first_contact not set in $NODE_YAML"
[[ -z "$TS_HOSTNAME" ]] && die "tailscale.hostname not set in $NODE_YAML"
FC_USER="${FIRST_CONTACT%%@*}"
# remote.sh binds to ONBOARD_SSH_USER / ONBOARD_SSH_HOST — set from orchestrator
# (post-Tailscale target); fall back to first_contact coordinates for standalone use.
export ONBOARD_SSH_USER="${ONBOARD_SSH_USER:-${FC_USER}}"
export ONBOARD_SSH_HOST="${ONBOARD_SSH_HOST:-${TS_HOSTNAME}}"
# shellcheck source=../lib/remote.sh
source "${REPO_ROOT}/scripts/onboard/lib/remote.sh"
# ── SSH option arrays ─────────────────────────────────────────────────────────
# Interactive — no BatchMode; used for ssh-copy-id (may need password)
_FC_SSH_NOKEY=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10)
# Post-key — BatchMode; used once the key is installed
_FC_SSH=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes)
# ═══════════════════════════════════════════════════════════════════════════════
# Stage 1 — ensure_ssh_key
# ═══════════════════════════════════════════════════════════════════════════════
step "[$STEP_NAME] 1/3 ensure_ssh_key → ${FIRST_CONTACT}"
if [[ "$DRY_RUN" == "true" ]]; then
dryrun "ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 ${FIRST_CONTACT} true"
dryrun "# if key not present:"
dryrun "ssh-copy-id -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -i ~/.ssh/id_*.pub ${FIRST_CONTACT}"
else
if ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" true 2>/dev/null; then
log "SSH key already accepted by ${FIRST_CONTACT} — skip"
else
# Find the first available public key
pubkeys=( "$HOME"/.ssh/id_*.pub )
[[ -f "${pubkeys[0]}" ]] || die "No public key found at ~/.ssh/id_*.pub on SATURN"
log "Installing public key ${pubkeys[0]} on ${FIRST_CONTACT}"
log "(password prompt for ${FIRST_CONTACT} expected)"
ssh-copy-id \
-o StrictHostKeyChecking=accept-new \
-o ConnectTimeout=10 \
-i "${pubkeys[0]}" \
"$FIRST_CONTACT"
log "Verifying key-based access..."
ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" true \
|| die "Key-based SSH still fails after ssh-copy-id — check ~/.ssh/authorized_keys on ${FIRST_CONTACT}"
log "Key installed and verified"
fi
fi
# ═══════════════════════════════════════════════════════════════════════════════
# Stage 2 — ensure_tailscale
# ═══════════════════════════════════════════════════════════════════════════════
step "[$STEP_NAME] 2/3 ensure_tailscale on ${FIRST_CONTACT} → hostname=${TS_HOSTNAME}"
_ts_state() {
# Returns BackendState string or "unknown". Uses python3 (available on RPi OS).
ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" \
'tailscale status --json 2>/dev/null | python3 -c \
"import sys,json; print(json.load(sys.stdin).get(\"BackendState\",\"unknown\"))" \
2>/dev/null || echo "unknown"'
}
if [[ "$DRY_RUN" == "true" ]]; then
dryrun "ssh ${FIRST_CONTACT} 'command -v tailscale' # check if installed"
dryrun "# if missing: ssh ${FIRST_CONTACT} 'curl -fsSL https://tailscale.com/install.sh | sh'"
dryrun "ssh ${FIRST_CONTACT} 'tailscale status --json' # check BackendState"
dryrun "# if not Running: ssh ${FIRST_CONTACT} 'sudo tailscale up --hostname=${TS_HOSTNAME}'"
else
# 2a — install if missing
if ! ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" 'command -v tailscale' >/dev/null 2>&1; then
log "Tailscale not found — installing on ${FIRST_CONTACT}..."
ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" 'curl -fsSL https://tailscale.com/install.sh | sh'
log "Tailscale installed"
else
log "Tailscale already installed on ${FIRST_CONTACT}"
fi
# 2b — join if not active
ts_state=$(_ts_state)
if [[ "$ts_state" == "Running" ]]; then
log "Tailscale already active (BackendState=Running) — skip"
else
warn "Tailscale BackendState=${ts_state} — running tailscale up"
echo ""
echo -e "${_C_BOLD}┌─────────────────────────────────────────────────────────────┐"
echo -e "│ ACTION REQUIRED: open the URL below in your browser to │"
echo -e "│ authorize ${TS_HOSTNAME} in your Tailscale account. │"
echo -e "└─────────────────────────────────────────────────────────────┘${_C_RESET}"
echo ""
# pi user has passwordless sudo on Raspberry Pi OS; tailscale up blocks
# until the user authenticates via the URL it prints to stdout.
ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" "sudo tailscale up --hostname=${TS_HOSTNAME}"
echo ""
ts_state2=$(_ts_state)
[[ "$ts_state2" == "Running" ]] \
|| die "Tailscale still not active after tailscale up (BackendState=${ts_state2})"
log "Tailscale joined successfully (BackendState=Running)"
fi
fi
# ═══════════════════════════════════════════════════════════════════════════════
# Stage 3 — verify over Tailscale
# ═══════════════════════════════════════════════════════════════════════════════
step "[$STEP_NAME] 3/3 verify SSH over Tailscale → ${ONBOARD_SSH_USER}@${TS_HOSTNAME}"
if [[ "$DRY_RUN" == "true" ]]; then
dryrun "ssh -o BatchMode=yes ${ONBOARD_SSH_USER}@${TS_HOSTNAME} 'echo ok && uname -m'"
dryrun "# expected output: ok / aarch64"
else
out=$(ssh "${_FC_SSH[@]}" "${ONBOARD_SSH_USER}@${TS_HOSTNAME}" 'echo ok && uname -m' 2>&1) \
|| die "Verification SSH to ${TS_HOSTNAME} failed:\n ${out}"
echo "$out" | grep -q '^ok' || die "Verification output missing 'ok' line: ${out}"
arch=$(echo "$out" | grep -v '^ok' | head -1 | tr -d '[:space:]')
[[ "$arch" == "aarch64" ]] || warn "Unexpected arch '${arch}' — expected aarch64"
log "Verify OK: ${ONBOARD_SSH_USER}@${TS_HOSTNAME} reachable, arch=${arch}"
fi
log "[$STEP_NAME] done — SSH key installed, Tailscale active, Tailscale SSH verified"