fix(onboard/00-access): suppress known-hosts warning in Tailscale verify

On first SSH to a new mesh hostname, OpenSSH emits
"Warning: Permanently added 'lustro' to the list of known hosts"
on stderr. The previous code used 2>&1, merging it into the captured
arch variable, which caused the arch assertion to fail with
arch="Warning:Permanentlyadded...".

Fix:
- Add dedicated _TS_SSH opts array with -o LogLevel=ERROR, which
  suppresses INFO-level messages (known-hosts, banner) at source
- Remove 2>&1 — stderr is no longer merged into the captured value
- Run only `uname -m` instead of `echo ok && uname -m`; take the last
  non-empty stdout line to be robust against any remaining preamble
- Change arch mismatch from warn to die in live mode (warn in dry-run)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Oskar Kapala 2026-06-08 15:28:21 +02:00
parent 1bed8559fa
commit 471ba09c4a

View file

@ -50,6 +50,9 @@ source "${REPO_ROOT}/scripts/onboard/lib/remote.sh"
_FC_SSH_NOKEY=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10) _FC_SSH_NOKEY=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10)
# BatchMode — used for all probes and post-key-install operations # BatchMode — used for all probes and post-key-install operations
_FC_SSH=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes) _FC_SSH=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes)
# Tailscale verify — LogLevel=ERROR suppresses the "Permanently added" known-hosts
# INFO message that would otherwise leak into captured stdout on first connection
_TS_SSH=(-o StrictHostKeyChecking=accept-new -o ConnectTimeout=10 -o BatchMode=yes -o LogLevel=ERROR)
# ── tailscale state probe helper ────────────────────────────────────────────── # ── tailscale state probe helper ──────────────────────────────────────────────
# Always runs; returns BackendState or "unknown" on any SSH/parse failure. # Always runs; returns BackendState or "unknown" on any SSH/parse failure.
@ -134,12 +137,17 @@ step "[$STEP_NAME] 3/3 verify SSH over Tailscale → ${ONBOARD_SSH_USER}@${TS_HO
# Probe: always runs — on a node already joined this works even in dry-run. # Probe: always runs — on a node already joined this works even in dry-run.
# On a fresh node in dry-run mode Tailscale isn't set up yet, so SSH will fail; # On a fresh node in dry-run mode Tailscale isn't set up yet, so SSH will fail;
# that is reported as a warning (not a fatal error) to keep dry-run informative. # that is reported as a warning (not a fatal error) to keep dry-run informative.
if out=$(ssh "${_FC_SSH[@]}" "${ONBOARD_SSH_USER}@${TS_HOSTNAME}" \ # stderr is NOT merged (no 2>&1) — _TS_SSH uses LogLevel=ERROR so the
'echo ok && uname -m' 2>&1); then # "Permanently added … to known hosts" INFO message is suppressed at source.
echo "$out" | grep -q '^ok' || warn "Unexpected verify output: ${out}" if arch=$(ssh "${_TS_SSH[@]}" "${ONBOARD_SSH_USER}@${TS_HOSTNAME}" 'uname -m'); then
arch=$(echo "$out" | grep -v '^ok' | head -1 | tr -d '[:space:]') # Take the last non-empty stdout line to skip any unexpected preamble
[[ "$arch" == "aarch64" ]] || warn "Unexpected arch '${arch}' — expected aarch64" arch=$(printf '%s' "$arch" | grep -v '^[[:space:]]*$' | tail -1 | tr -d '[:space:]')
log "Verify OK: ${ONBOARD_SSH_USER}@${TS_HOSTNAME} reachable, arch=${arch}" if [[ "$arch" == "aarch64" ]]; then
log "Verify OK: ${ONBOARD_SSH_USER}@${TS_HOSTNAME} reachable, arch=${arch}"
else
msg="Unexpected arch '${arch}' on ${TS_HOSTNAME} — expected aarch64"
[ "${DRY_RUN:-0}" = 1 ] && warn "$msg" || die "$msg"
fi
else else
msg="Verify SSH to ${ONBOARD_SSH_USER}@${TS_HOSTNAME} failed (Tailscale not yet joined?)" msg="Verify SSH to ${ONBOARD_SSH_USER}@${TS_HOSTNAME} failed (Tailscale not yet joined?)"
[ "${DRY_RUN:-0}" = 1 ] && warn "$msg" || die "$msg" [ "${DRY_RUN:-0}" = 1 ] && warn "$msg" || die "$msg"