On first SSH to a new mesh hostname, OpenSSH emits "Warning: Permanently added 'lustro' to the list of known hosts" on stderr. The previous code used 2>&1, merging it into the captured arch variable, which caused the arch assertion to fail with arch="Warning:Permanentlyadded...". Fix: - Add dedicated _TS_SSH opts array with -o LogLevel=ERROR, which suppresses INFO-level messages (known-hosts, banner) at source - Remove 2>&1 — stderr is no longer merged into the captured value - Run only `uname -m` instead of `echo ok && uname -m`; take the last non-empty stdout line to be robust against any remaining preamble - Change arch mismatch from warn to die in live mode (warn in dry-run) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
157 lines
8.8 KiB
Bash
Executable file
157 lines
8.8 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# scripts/onboard/steps/00-access.sh — establish remote access channel
|
|
#
|
|
# Stages:
|
|
# 1. ensure_ssh_key — copy SATURN public key to first_contact (idempotent)
|
|
# 2. ensure_tailscale — install Tailscale and join network (interactive auth URL)
|
|
# 3. verify — confirm SSH over Tailscale, assert arch=aarch64
|
|
#
|
|
# Dry-run convention (DRY_RUN=1):
|
|
# - Read-only probes (SSH BatchMode test, tailscale status, command -v) run ALWAYS
|
|
# so the plan reflects real current state ("key present → skip" vs "would: install")
|
|
# - Mutations (ssh-copy-id, curl installer, tailscale up) are wrapped with run()
|
|
#
|
|
# Does NOT configure NOPASSWD or /opt/homelab — those are later steps.
|
|
# pi user on Raspberry Pi OS has passwordless sudo — required for `tailscale up`.
|
|
|
|
set -euo pipefail
|
|
|
|
STEP_NAME="00-access"
|
|
|
|
: "${REPO_ROOT:?REPO_ROOT is not set — run via onboard.sh}"
|
|
: "${NODE_YAML:?NODE_YAML is not set — run via onboard.sh}"
|
|
: "${DRY_RUN:=0}"
|
|
|
|
# Source common.sh when run standalone (orchestrator sources it before calling steps)
|
|
if ! declare -f log >/dev/null 2>&1; then
|
|
# shellcheck source=../lib/common.sh
|
|
source "${REPO_ROOT}/scripts/onboard/lib/common.sh"
|
|
fi
|
|
|
|
# ── parse node.yaml ───────────────────────────────────────────────────────────
|
|
FIRST_CONTACT=$(yaml_get "$NODE_YAML" "first_contact")
|
|
TS_HOSTNAME=$(yaml_get "$NODE_YAML" "tailscale.hostname")
|
|
|
|
[[ -z "$FIRST_CONTACT" ]] && die "first_contact not set in $NODE_YAML"
|
|
[[ -z "$TS_HOSTNAME" ]] && die "tailscale.hostname not set in $NODE_YAML"
|
|
|
|
FC_USER="${FIRST_CONTACT%%@*}"
|
|
|
|
# ONBOARD_SSH_USER/HOST set by orchestrator to post-Tailscale coordinates;
|
|
# fall back to first_contact for standalone invocation.
|
|
export ONBOARD_SSH_USER="${ONBOARD_SSH_USER:-${FC_USER}}"
|
|
export ONBOARD_SSH_HOST="${ONBOARD_SSH_HOST:-${TS_HOSTNAME}}"
|
|
|
|
# shellcheck source=../lib/remote.sh
|
|
source "${REPO_ROOT}/scripts/onboard/lib/remote.sh"
|
|
|
|
# ── SSH option arrays ─────────────────────────────────────────────────────────
|
|
# No BatchMode — used for ssh-copy-id where a password prompt may appear
|
|
_FC_SSH_NOKEY=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10)
|
|
# BatchMode — used for all probes and post-key-install operations
|
|
_FC_SSH=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes)
|
|
# Tailscale verify — LogLevel=ERROR suppresses the "Permanently added" known-hosts
|
|
# INFO message that would otherwise leak into captured stdout on first connection
|
|
_TS_SSH=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes -o LogLevel=ERROR)
|
|
|
|
# ── tailscale state probe helper ──────────────────────────────────────────────
|
|
# Always runs; returns BackendState or "unknown" on any SSH/parse failure.
|
|
_ts_state() {
|
|
ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" \
|
|
'tailscale status --json 2>/dev/null | python3 -c \
|
|
"import sys,json; print(json.load(sys.stdin).get(\"BackendState\",\"unknown\"))" \
|
|
2>/dev/null || echo "unknown"' 2>/dev/null || echo "unknown"
|
|
}
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# Stage 1 — ensure_ssh_key
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
step "[$STEP_NAME] 1/3 ensure_ssh_key → ${FIRST_CONTACT}"
|
|
|
|
# Probe: test key-based auth — always runs so dry-run reports real current state
|
|
if ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" true 2>/dev/null; then
|
|
log "SSH key already accepted by ${FIRST_CONTACT} — skip"
|
|
else
|
|
pubkeys=( "$HOME"/.ssh/id_*.pub )
|
|
[[ -f "${pubkeys[0]}" ]] || die "No public key found at ~/.ssh/id_*.pub on SATURN"
|
|
|
|
log "Key not yet installed on ${FIRST_CONTACT} (password prompt expected)"
|
|
# Mutation: install public key
|
|
run ssh-copy-id \
|
|
"${_FC_SSH_NOKEY[@]}" \
|
|
-i "${pubkeys[0]}" \
|
|
"$FIRST_CONTACT"
|
|
# Probe: verify key was installed (run() is a no-op in dry-run so this
|
|
# prints "would:" — avoids a false-failure after a skipped ssh-copy-id)
|
|
run ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" true
|
|
log "Key installed and verified"
|
|
fi
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# Stage 2 — ensure_tailscale
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
step "[$STEP_NAME] 2/3 ensure_tailscale on ${FIRST_CONTACT} → hostname=${TS_HOSTNAME}"
|
|
|
|
# Probe: check if tailscale binary present — always runs.
|
|
# SSH auth failure (key not yet installed in dry-run) falls through to the
|
|
# "not found" branch, which is correct for a fresh node.
|
|
if ! ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" 'command -v tailscale' >/dev/null 2>&1; then
|
|
log "Tailscale not found on ${FIRST_CONTACT}"
|
|
# Mutation: install tailscale
|
|
run ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" \
|
|
'curl -fsSL https://tailscale.com/install.sh | sh'
|
|
else
|
|
log "Tailscale already installed on ${FIRST_CONTACT}"
|
|
fi
|
|
|
|
# Probe: check backend state — always runs
|
|
ts_state=$(_ts_state)
|
|
if [[ "$ts_state" == "Running" ]]; then
|
|
log "Tailscale already active (BackendState=Running) — skip"
|
|
else
|
|
warn "Tailscale BackendState=${ts_state} — joining network..."
|
|
echo ""
|
|
echo -e "${_C_BOLD}┌─────────────────────────────────────────────────────────────┐"
|
|
echo -e "│ ACTION REQUIRED: open the URL below in your browser to │"
|
|
echo -e "│ authorize ${TS_HOSTNAME} in your Tailscale account. │"
|
|
echo -e "└─────────────────────────────────────────────────────────────┘${_C_RESET}"
|
|
echo ""
|
|
# Mutation: tailscale up — blocks until user authenticates via printed URL
|
|
run ssh "${_FC_SSH[@]}" "$FIRST_CONTACT" "sudo tailscale up --hostname=${TS_HOSTNAME}"
|
|
echo ""
|
|
|
|
# Post-join state check — only meaningful after the mutation actually ran
|
|
if [ "${DRY_RUN:-0}" != 1 ]; then
|
|
ts_state2=$(_ts_state)
|
|
[[ "$ts_state2" == "Running" ]] \
|
|
|| die "Tailscale still not active after tailscale up (BackendState=${ts_state2})"
|
|
log "Tailscale joined successfully (BackendState=Running)"
|
|
fi
|
|
fi
|
|
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
# Stage 3 — verify over Tailscale
|
|
# ═══════════════════════════════════════════════════════════════════════════════
|
|
step "[$STEP_NAME] 3/3 verify SSH over Tailscale → ${ONBOARD_SSH_USER}@${TS_HOSTNAME}"
|
|
|
|
# Probe: always runs — on a node already joined this works even in dry-run.
|
|
# On a fresh node in dry-run mode Tailscale isn't set up yet, so SSH will fail;
|
|
# that is reported as a warning (not a fatal error) to keep dry-run informative.
|
|
# stderr is NOT merged (no 2>&1) — _TS_SSH uses LogLevel=ERROR so the
|
|
# "Permanently added … to known hosts" INFO message is suppressed at source.
|
|
if arch=$(ssh "${_TS_SSH[@]}" "${ONBOARD_SSH_USER}@${TS_HOSTNAME}" 'uname -m'); then
|
|
# Take the last non-empty stdout line to skip any unexpected preamble
|
|
arch=$(printf '%s' "$arch" | grep -v '^[[:space:]]*$' | tail -1 | tr -d '[:space:]')
|
|
if [[ "$arch" == "aarch64" ]]; then
|
|
log "Verify OK: ${ONBOARD_SSH_USER}@${TS_HOSTNAME} reachable, arch=${arch}"
|
|
else
|
|
msg="Unexpected arch '${arch}' on ${TS_HOSTNAME} — expected aarch64"
|
|
[ "${DRY_RUN:-0}" = 1 ] && warn "$msg" || die "$msg"
|
|
fi
|
|
else
|
|
msg="Verify SSH to ${ONBOARD_SSH_USER}@${TS_HOSTNAME} failed (Tailscale not yet joined?)"
|
|
[ "${DRY_RUN:-0}" = 1 ] && warn "$msg" || die "$msg"
|
|
fi
|
|
|
|
log "[$STEP_NAME] done"
|