From 9012a36827ae1226163bf4a752e9c8243754d193 Mon Sep 17 00:00:00 2001 From: Oskar Kapala Date: Mon, 8 Jun 2026 14:43:16 +0200 Subject: [PATCH] feat(onboard): add 00-access step + update lustro node.yaml MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 00-access.sh implements a 3-stage idempotent access bootstrap: 1. ensure_ssh_key — ssh-copy-id to first_contact (pi@pimirror2.local), skips if BatchMode key-auth already passes 2. ensure_tailscale — install via install.sh if missing, then tailscale up --hostname=lustro; prints interactive auth URL to operator, blocks until authenticated; skips if BackendState already Running 3. verify — SSH over Tailscale to pi@lustro, asserts 'ok' + arch=aarch64 Reads first_contact and tailscale.hostname from node.yaml. Respects --dry-run. No NOPASSWD or /opt/homelab mutations. hosts/lustro/node.yaml: fill known hardware facts (arm64, 4096 MB RAM, zram swap, docker_present, mm_runtime=systemd:magicmirror.service), add ssh_user=pi, first_contact=pi@pimirror2.local, services.node-agent.runtime engine=docker mem_limit=256m. Co-Authored-By: Claude Sonnet 4.6 --- hosts/lustro/node.yaml | 25 +++-- scripts/onboard/steps/00-access.sh | 153 +++++++++++++++++++++++++++++ 2 files changed, 168 insertions(+), 10 deletions(-) create mode 100755 scripts/onboard/steps/00-access.sh diff --git a/hosts/lustro/node.yaml b/hosts/lustro/node.yaml index 72cfaac..00ef7ed 100644 --- a/hosts/lustro/node.yaml +++ b/hosts/lustro/node.yaml @@ -1,26 +1,31 @@ # hosts/lustro/node.yaml — LUSTRO edge node manifest -# Run scripts/onboard/onboard.sh --node lustro --step 00-preflight -# to auto-populate the TODO fields below. +# First-contact bootstrap: scripts/onboard/onboard.sh --node lustro --step 00-access +# Full onboarding: scripts/onboard/onboard.sh --node lustro name: LUSTRO role: edge location: KEN -ssh_user: oskar +ssh_user: pi +first_contact: pi@pimirror2.local # LAN/mDNS target before Tailscale is set up tailscale: hostname: lustro - # ip: TODO — fill after tailscale join (step 30) + # ip: TODO — fill after tailscale join (step 30-install-tailscale) deploy_autonomy: true # onboard.sh may run mutating steps autonomously -git_control: false # node does NOT pull from Forgejo directly; push-based +git_control: false # node does NOT pull from Forgejo; push-based via SATURN hardware: - arch: TODO # populated by 00-preflight (e.g. aarch64, x86_64) - ram_mb: TODO # populated by 00-preflight - swap: TODO # populated by 00-preflight (none / ) - mm_runtime: TODO # populated by 00-preflight (systemd / pm2 / process / none) + arch: arm64 + ram_mb: 4096 + swap: + kind: zram + docker_present: true + mm_runtime: systemd:magicmirror.service services: node-agent: - runtime: TODO # populated by 40-deploy-node-agent (image tag, config hash) + runtime: + engine: docker + mem_limit: 256m diff --git a/scripts/onboard/steps/00-access.sh b/scripts/onboard/steps/00-access.sh new file mode 100755 index 0000000..08347c1 --- /dev/null +++ b/scripts/onboard/steps/00-access.sh @@ -0,0 +1,153 @@ +#!/usr/bin/env bash +# scripts/onboard/steps/00-access.sh — establish remote access channel +# +# Stages: +# 1. ensure_ssh_key — copy SATURN public key to first_contact (idempotent) +# 2. ensure_tailscale — install Tailscale and join network (interactive auth URL) +# 3. verify — confirm SSH over Tailscale, assert arch=aarch64 +# +# Does NOT configure NOPASSWD, /opt/homelab, or any host mutation beyond Tailscale. +# Reads: first_contact (e.g. pi@pimirror2.local) and tailscale.hostname from node.yaml. +# pi user on Raspberry Pi OS has passwordless sudo by default — required for `tailscale up`. + +set -euo pipefail + +STEP_NAME="00-access" + +: "${REPO_ROOT:?REPO_ROOT is not set — run via onboard.sh}" +: "${NODE_YAML:?NODE_YAML is not set — run via onboard.sh}" +: "${DRY_RUN:=false}" + +# Source common.sh when run standalone (orchestrator sources it before calling steps) +if ! declare -f log >/dev/null 2>&1; then + # shellcheck source=../lib/common.sh + source "${REPO_ROOT}/scripts/onboard/lib/common.sh" +fi + +# ── parse node.yaml ─────────────────────────────────────────────────────────── +FIRST_CONTACT=$(yaml_get "$NODE_YAML" "first_contact") +TS_HOSTNAME=$(yaml_get "$NODE_YAML" "tailscale.hostname") + +[[ -z "$FIRST_CONTACT" ]] && die "first_contact not set in $NODE_YAML" +[[ -z "$TS_HOSTNAME" ]] && die "tailscale.hostname not set in $NODE_YAML" + +FC_USER="${FIRST_CONTACT%%@*}" + +# remote.sh binds to ONBOARD_SSH_USER / ONBOARD_SSH_HOST — set from orchestrator +# (post-Tailscale target); fall back to first_contact coordinates for standalone use. +export ONBOARD_SSH_USER="${ONBOARD_SSH_USER:-${FC_USER}}" +export ONBOARD_SSH_HOST="${ONBOARD_SSH_HOST:-${TS_HOSTNAME}}" + +# shellcheck source=../lib/remote.sh +source "${REPO_ROOT}/scripts/onboard/lib/remote.sh" + +# ── SSH option arrays ───────────────────────────────────────────────────────── +# Interactive — no BatchMode; used for ssh-copy-id (may need password) +_FC_SSH_NOKEY=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10) +# Post-key — BatchMode; used once the key is installed +_FC_SSH=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes) + +# ═══════════════════════════════════════════════════════════════════════════════ +# Stage 1 — ensure_ssh_key +# ═══════════════════════════════════════════════════════════════════════════════ +step "[$STEP_NAME] 1/3 ensure_ssh_key → ${FIRST_CONTACT}" + +if [[ "$DRY_RUN" == "true" ]]; then + dryrun "ssh -o BatchMode=yes -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 ${FIRST_CONTACT} true" + dryrun "# if key not present:" + dryrun "ssh-copy-id -o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -i ~/.ssh/id_*.pub ${FIRST_CONTACT}" +else + if ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" true 2>/dev/null; then + log "SSH key already accepted by ${FIRST_CONTACT} — skip" + else + # Find the first available public key + pubkeys=( "$HOME"/.ssh/id_*.pub ) + [[ -f "${pubkeys[0]}" ]] || die "No public key found at ~/.ssh/id_*.pub on SATURN" + + log "Installing public key ${pubkeys[0]} on ${FIRST_CONTACT}" + log "(password prompt for ${FIRST_CONTACT} expected)" + ssh-copy-id \ + -o StrictHostKeyChecking=accept-new \ + -o ConnectTimeout=10 \ + -i "${pubkeys[0]}" \ + "$FIRST_CONTACT" + + log "Verifying key-based access..." + ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" true \ + || die "Key-based SSH still fails after ssh-copy-id — check ~/.ssh/authorized_keys on ${FIRST_CONTACT}" + log "Key installed and verified" + fi +fi + +# ═══════════════════════════════════════════════════════════════════════════════ +# Stage 2 — ensure_tailscale +# ═══════════════════════════════════════════════════════════════════════════════ +step "[$STEP_NAME] 2/3 ensure_tailscale on ${FIRST_CONTACT} → hostname=${TS_HOSTNAME}" + +_ts_state() { + # Returns BackendState string or "unknown". Uses python3 (available on RPi OS). + ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" \ + 'tailscale status --json 2>/dev/null | python3 -c \ + "import sys,json; print(json.load(sys.stdin).get(\"BackendState\",\"unknown\"))" \ + 2>/dev/null || echo "unknown"' +} + +if [[ "$DRY_RUN" == "true" ]]; then + dryrun "ssh ${FIRST_CONTACT} 'command -v tailscale' # check if installed" + dryrun "# if missing: ssh ${FIRST_CONTACT} 'curl -fsSL https://tailscale.com/install.sh | sh'" + dryrun "ssh ${FIRST_CONTACT} 'tailscale status --json' # check BackendState" + dryrun "# if not Running: ssh ${FIRST_CONTACT} 'sudo tailscale up --hostname=${TS_HOSTNAME}'" +else + # 2a — install if missing + if ! ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" 'command -v tailscale' >/dev/null 2>&1; then + log "Tailscale not found — installing on ${FIRST_CONTACT}..." + ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" 'curl -fsSL https://tailscale.com/install.sh | sh' + log "Tailscale installed" + else + log "Tailscale already installed on ${FIRST_CONTACT}" + fi + + # 2b — join if not active + ts_state=$(_ts_state) + if [[ "$ts_state" == "Running" ]]; then + log "Tailscale already active (BackendState=Running) — skip" + else + warn "Tailscale BackendState=${ts_state} — running tailscale up" + echo "" + echo -e "${_C_BOLD}┌─────────────────────────────────────────────────────────────┐" + echo -e "│ ACTION REQUIRED: open the URL below in your browser to │" + echo -e "│ authorize ${TS_HOSTNAME} in your Tailscale account. │" + echo -e "└─────────────────────────────────────────────────────────────┘${_C_RESET}" + echo "" + # pi user has passwordless sudo on Raspberry Pi OS; tailscale up blocks + # until the user authenticates via the URL it prints to stdout. + ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" "sudo tailscale up --hostname=${TS_HOSTNAME}" + echo "" + + ts_state2=$(_ts_state) + [[ "$ts_state2" == "Running" ]] \ + || die "Tailscale still not active after tailscale up (BackendState=${ts_state2})" + log "Tailscale joined successfully (BackendState=Running)" + fi +fi + +# ═══════════════════════════════════════════════════════════════════════════════ +# Stage 3 — verify over Tailscale +# ═══════════════════════════════════════════════════════════════════════════════ +step "[$STEP_NAME] 3/3 verify SSH over Tailscale → ${ONBOARD_SSH_USER}@${TS_HOSTNAME}" + +if [[ "$DRY_RUN" == "true" ]]; then + dryrun "ssh -o BatchMode=yes ${ONBOARD_SSH_USER}@${TS_HOSTNAME} 'echo ok && uname -m'" + dryrun "# expected output: ok / aarch64" +else + out=$(ssh "${_FC_SSH[@]}" "${ONBOARD_SSH_USER}@${TS_HOSTNAME}" 'echo ok && uname -m' 2>&1) \ + || die "Verification SSH to ${TS_HOSTNAME} failed:\n ${out}" + + echo "$out" | grep -q '^ok' || die "Verification output missing 'ok' line: ${out}" + arch=$(echo "$out" | grep -v '^ok' | head -1 | tr -d '[:space:]') + [[ "$arch" == "aarch64" ]] || warn "Unexpected arch '${arch}' — expected aarch64" + + log "Verify OK: ${ONBOARD_SSH_USER}@${TS_HOSTNAME} reachable, arch=${arch}" +fi + +log "[$STEP_NAME] done — SSH key installed, Tailscale active, Tailscale SSH verified"