#!/usr/bin/env bash # scripts/onboard/steps/00-access.sh — establish remote access channel # # Stages: # 1. ensure_ssh_key — copy SATURN public key to first_contact (idempotent) # 2. ensure_tailscale — install Tailscale and join network (interactive auth URL) # 3. verify — confirm SSH over Tailscale, assert arch=aarch64 # # Dry-run convention (DRY_RUN=1): # - Read-only probes (SSH BatchMode test, tailscale status, command -v) run ALWAYS # so the plan reflects real current state ("key present → skip" vs "would: install") # - Mutations (ssh-copy-id, curl installer, tailscale up) are wrapped with run() # # Does NOT configure NOPASSWD or /opt/homelab — those are later steps. # pi user on Raspberry Pi OS has passwordless sudo — required for `tailscale up`. set -euo pipefail STEP_NAME="00-access" : "${REPO_ROOT:?REPO_ROOT is not set — run via onboard.sh}" : "${NODE_YAML:?NODE_YAML is not set — run via onboard.sh}" : "${DRY_RUN:=0}" # Source common.sh when run standalone (orchestrator sources it before calling steps) if ! declare -f log >/dev/null 2>&1; then # shellcheck source=../lib/common.sh source "${REPO_ROOT}/scripts/onboard/lib/common.sh" fi # ── parse node.yaml ─────────────────────────────────────────────────────────── FIRST_CONTACT=$(yaml_get "$NODE_YAML" "first_contact") TS_HOSTNAME=$(yaml_get "$NODE_YAML" "tailscale.hostname") [[ -z "$FIRST_CONTACT" ]] && die "first_contact not set in $NODE_YAML" [[ -z "$TS_HOSTNAME" ]] && die "tailscale.hostname not set in $NODE_YAML" FC_USER="${FIRST_CONTACT%%@*}" # ONBOARD_SSH_USER/HOST set by orchestrator to post-Tailscale coordinates; # fall back to first_contact for standalone invocation. export ONBOARD_SSH_USER="${ONBOARD_SSH_USER:-${FC_USER}}" export ONBOARD_SSH_HOST="${ONBOARD_SSH_HOST:-${TS_HOSTNAME}}" # shellcheck source=../lib/remote.sh source "${REPO_ROOT}/scripts/onboard/lib/remote.sh" # ── SSH option arrays ───────────────────────────────────────────────────────── # No BatchMode — used for ssh-copy-id where a password prompt may appear _FC_SSH_NOKEY=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10) # BatchMode — used for all probes and post-key-install operations _FC_SSH=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes) # ── tailscale state probe helper ────────────────────────────────────────────── # Always runs; returns BackendState or "unknown" on any SSH/parse failure. _ts_state() { ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" \ 'tailscale status --json 2>/dev/null | python3 -c \ "import sys,json; print(json.load(sys.stdin).get(\"BackendState\",\"unknown\"))" \ 2>/dev/null || echo "unknown"' 2>/dev/null || echo "unknown" } # ═══════════════════════════════════════════════════════════════════════════════ # Stage 1 — ensure_ssh_key # ═══════════════════════════════════════════════════════════════════════════════ step "[$STEP_NAME] 1/3 ensure_ssh_key → ${FIRST_CONTACT}" # Probe: test key-based auth — always runs so dry-run reports real current state if ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" true 2>/dev/null; then log "SSH key already accepted by ${FIRST_CONTACT} — skip" else pubkeys=( "$HOME"/.ssh/id_*.pub ) [[ -f "${pubkeys[0]}" ]] || die "No public key found at ~/.ssh/id_*.pub on SATURN" log "Key not yet installed on ${FIRST_CONTACT} (password prompt expected)" # Mutation: install public key run ssh-copy-id \ "${_FC_SSH_NOKEY[@]}" \ -i "${pubkeys[0]}" \ "$FIRST_CONTACT" # Probe: verify key was installed (run() is a no-op in dry-run so this # prints "would:" — avoids a false-failure after a skipped ssh-copy-id) run ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" true log "Key installed and verified" fi # ═══════════════════════════════════════════════════════════════════════════════ # Stage 2 — ensure_tailscale # ═══════════════════════════════════════════════════════════════════════════════ step "[$STEP_NAME] 2/3 ensure_tailscale on ${FIRST_CONTACT} → hostname=${TS_HOSTNAME}" # Probe: check if tailscale binary present — always runs. # SSH auth failure (key not yet installed in dry-run) falls through to the # "not found" branch, which is correct for a fresh node. if ! ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" 'command -v tailscale' >/dev/null 2>&1; then log "Tailscale not found on ${FIRST_CONTACT}" # Mutation: install tailscale run ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" \ 'curl -fsSL https://tailscale.com/install.sh | sh' else log "Tailscale already installed on ${FIRST_CONTACT}" fi # Probe: check backend state — always runs ts_state=$(_ts_state) if [[ "$ts_state" == "Running" ]]; then log "Tailscale already active (BackendState=Running) — skip" else warn "Tailscale BackendState=${ts_state} — joining network..." echo "" echo -e "${_C_BOLD}┌─────────────────────────────────────────────────────────────┐" echo -e "│ ACTION REQUIRED: open the URL below in your browser to │" echo -e "│ authorize ${TS_HOSTNAME} in your Tailscale account. │" echo -e "└─────────────────────────────────────────────────────────────┘${_C_RESET}" echo "" # Mutation: tailscale up — blocks until user authenticates via printed URL run ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" "sudo tailscale up --hostname=${TS_HOSTNAME}" echo "" # Post-join state check — only meaningful after the mutation actually ran if [ "${DRY_RUN:-0}" != 1 ]; then ts_state2=$(_ts_state) [[ "$ts_state2" == "Running" ]] \ || die "Tailscale still not active after tailscale up (BackendState=${ts_state2})" log "Tailscale joined successfully (BackendState=Running)" fi fi # ═══════════════════════════════════════════════════════════════════════════════ # Stage 3 — verify over Tailscale # ═══════════════════════════════════════════════════════════════════════════════ step "[$STEP_NAME] 3/3 verify SSH over Tailscale → ${ONBOARD_SSH_USER}@${TS_HOSTNAME}" # Probe: always runs — on a node already joined this works even in dry-run. # On a fresh node in dry-run mode Tailscale isn't set up yet, so SSH will fail; # that is reported as a warning (not a fatal error) to keep dry-run informative. if out=$(ssh "${_FC_SSH[@]}" "${ONBOARD_SSH_USER}@${TS_HOSTNAME}" \ 'echo ok && uname -m' 2>&1); then echo "$out" | grep -q '^ok' || warn "Unexpected verify output: ${out}" arch=$(echo "$out" | grep -v '^ok' | head -1 | tr -d '[:space:]') [[ "$arch" == "aarch64" ]] || warn "Unexpected arch '${arch}' — expected aarch64" log "Verify OK: ${ONBOARD_SSH_USER}@${TS_HOSTNAME} reachable, arch=${arch}" else msg="Verify SSH to ${ONBOARD_SSH_USER}@${TS_HOSTNAME} failed (Tailscale not yet joined?)" [ "${DRY_RUN:-0}" = 1 ] && warn "$msg" || die "$msg" fi log "[$STEP_NAME] done"