Merge branch 'bootstrap-new-node'

This commit is contained in:
Oskar Kapala 2026-05-12 13:18:43 +02:00
commit 95a976e930
4 changed files with 446 additions and 0 deletions

82
docs/node-onboarding.md Normal file
View file

@ -0,0 +1,82 @@
# Node Onboarding Workflow
This document describes the process of onboarding a new Linux machine into the homelab platform.
## Overview
The onboarding process consists of three main stages:
1. **Preparation**: Setting up the runtime environment and dependencies.
2. **Discovery**: Collecting hardware and software characteristics of the node.
3. **Inventory Generation**: Creating the YAML configuration files for the node in the central inventory.
## Prerequisites
- A fresh Linux machine (Debian/Ubuntu recommended).
- SSH access with sudo privileges.
- Tailscale account (if using Tailscale for networking).
## Onboarding Steps
### 1. Node Preparation
Run the `prepare-node.sh` script on the target node. This script will install Docker, Tailscale, and create the `/opt/homelab` directory structure.
```bash
sudo ./scripts/bootstrap/prepare-node.sh
```
**Manual Step**: If you are using Tailscale, you must manually authenticate it after the script runs:
```bash
sudo tailscale up
```
### 2. Node Discovery
Run the `discover-node.sh` script to collect system information. It is recommended to redirect the output to a file.
```bash
./scripts/bootstrap/discover-node.sh > discovery-$(hostname).json
```
### 3. Inventory Generation
Copy the discovery JSON file to your management machine (where the homelab repository is located) and run the inventory generator.
```bash
./scripts/bootstrap/generate-node-inventory.py discovery-node-name.json
```
This will create a new directory in `hosts/<hostname>/` with the following files:
- `host.yaml`: Basic host identity and roles.
- `capabilities.yaml`: Hardware and software capabilities.
- `paths.yaml`: Runtime path definitions.
- `networking.yaml`: Networking configuration.
### 4. Finalization
1. Review the generated YAML files in `hosts/<hostname>/`.
2. Assign appropriate roles to the node in `hosts/<hostname>/host.yaml`.
3. Commit the new host configuration to the repository.
4. Run the deployment script to apply the initial configuration:
```bash
./scripts/deploy/deploy-node.sh <hostname>
```
## Recovery Onboarding
If a node needs to be re-onboarded after a failure:
1. Run `prepare-node.sh` again. It is idempotent and will ensure the environment is correct.
2. Restore any critical data to `/opt/homelab/data/` and `/opt/homelab/backups/`.
3. Re-run `discover-node.sh` if hardware has changed, or reuse the existing inventory if it hasn't.
## Tailscale Assumptions
- Nodes are assumed to use Tailscale for management and inter-node communication.
- The `networking.yaml` will be populated with the Tailscale IP found during discovery.
- If Tailscale is not used, manual adjustment of `networking.yaml` and `host.yaml` is required.
## Troubleshooting
- **Docker not starting**: Check `journalctl -u docker`.
- **Discovery fails**: Ensure all required tools (lscpu, lsblk, ip, etc.) are installed.
- **Inventory Generation error**: Ensure `PyYAML` is installed on the management machine.

View file

@ -0,0 +1,130 @@
#!/bin/bash
# scripts/bootstrap/discover-node.sh
# Node discovery script for the homelab platform.
# Collects system information and outputs it in JSON format.
set -e
# Help function
show_help() {
echo "Usage: $0 [options]"
echo "Options:"
echo " --json Output in JSON format (default)"
echo " --yaml Output in YAML format"
echo " --help Show this help"
}
OUTPUT_FORMAT="json"
while [[ "$#" -gt 0 ]]; do
case $1 in
--json) OUTPUT_FORMAT="json"; shift ;;
--yaml) OUTPUT_FORMAT="yaml"; shift ;;
--help) show_help; exit 0 ;;
*) echo "Unknown parameter: $1"; show_help; exit 1 ;;
esac
done
# Check dependencies
for cmd in hostnamectl lscpu free lsblk ip curl; do
if ! command -v "$cmd" &> /dev/null; then
echo "Error: Required command '$cmd' not found." >&2
exit 1
fi
done
# Collect Data
HOSTNAME=$(hostname)
OS_DISTRO=$(grep PRETTY_NAME /etc/os-release | cut -d'"' -f2)
ARCH=$(uname -m)
CPU_MODEL=$(lscpu | grep "Model name:" | sed 's/Model name:[[:space:]]*//')
CPU_CORES=$(lscpu | grep "^CPU(s):" | awk '{print $2}')
CPU_THREADS=$(lscpu | grep "^Thread(s) per core:" | awk '{print $4 * $CPU_CORES}') # Simplistic
RAM_TOTAL_GB=$(free -g | grep "Mem:" | awk '{print $2}')
# Disks
DISKS=$(lsblk -dno NAME,SIZE,TYPE,MODEL | grep disk | awk '{printf "{\"name\": \"%s\", \"size\": \"%s\", \"model\": \"%s\"},", $1, $2, $4}' | sed 's/,$//')
# GPU Presence
GPU_PRESENT=false
if lspci | grep -i 'vga\|3d\|display' | grep -i 'nvidia\|amd\|intel' > /dev/null; then
GPU_PRESENT=true
GPU_INFO=$(lspci | grep -i 'vga\|3d\|display' | head -n 1 | cut -d ':' -f3 | sed 's/^[[:space:]]*//')
fi
# Virtualization
VIRT_SUPPORTED=false
if lscpu | grep "Virtualization:" > /dev/null; then
VIRT_SUPPORTED=true
VIRT_TYPE=$(lscpu | grep "Virtualization:" | awk '{print $2}')
fi
# Network Interfaces
INTERFACES=$(ip -j addr show | jq -c '[.[] | {name: .ifname, active: (if .operstate == "UP" then true else false end), ips: [.addr_info[].local]}]' 2>/dev/null || ip addr show | grep '^[0-9]' | awk '{print $2}' | sed 's/://' | xargs -I {} echo -n "\"{}\", " | sed 's/, $//')
# Tailscale
TAILSCALE_STATUS="not-installed"
TAILSCALE_IP="null"
if command -v tailscale &> /dev/null; then
if tailscale status &> /dev/null; then
TAILSCALE_STATUS="active"
TAILSCALE_IP=$(tailscale ip -4)
else
TAILSCALE_STATUS="installed-inactive"
fi
fi
# Docker
DOCKER_AVAILABLE=false
if command -v docker &> /dev/null; then
if docker info &> /dev/null; then
DOCKER_AVAILABLE=true
fi
fi
# Connectivity
CONNECTIVITY="unknown"
if curl -s --head https://google.com &> /dev/null; then
CONNECTIVITY="internet-access"
fi
# Output Construction (JSON)
cat <<EOF
{
"hostname": "$HOSTNAME",
"os": {
"distro": "$OS_DISTRO",
"arch": "$ARCH"
},
"hardware": {
"cpu": {
"model": "$CPU_MODEL",
"cores": $CPU_CORES,
"threads": $(lscpu | grep "^CPU(s):" | awk '{print $2}')
},
"memory": {
"total_gb": $RAM_TOTAL_GB
},
"gpu": {
"present": $GPU_PRESENT,
"info": "${GPU_INFO:-none}"
},
"disks": [$DISKS]
},
"virtualization": {
"supported": $VIRT_SUPPORTED,
"type": "${VIRT_TYPE:-none}"
},
"network": {
"interfaces": $INTERFACES,
"tailscale": {
"status": "$TAILSCALE_STATUS",
"ip": "$TAILSCALE_IP"
},
"connectivity": "$CONNECTIVITY"
},
"docker": {
"available": $DOCKER_AVAILABLE
}
}
EOF

View file

@ -0,0 +1,113 @@
#!/usr/bin/env python3
import json
import sys
import os
import yaml
from pathlib import Path
def generate_inventory(discovery_data):
hostname = discovery_data.get("hostname", "unknown-node")
host_dir = Path(f"hosts/{hostname}")
host_dir.mkdir(parents=True, exist_ok=True)
# 1. host.yaml
host_yaml = {
"hostname": hostname,
"roles": ["unassigned"],
"network": {
"tailscale_ip": discovery_data["network"]["tailscale"]["ip"]
},
"runtime": {
"root": "/opt/homelab"
},
"deployment": {
"mode": "pull",
"managed_by": "saturn"
}
}
with open(host_dir / "host.yaml", "w") as f:
yaml.dump(host_yaml, f, sort_keys=False)
# 2. capabilities.yaml
capabilities_yaml = {
"capabilities": {
"hardware": {
"cpu": {
"arch": discovery_data["os"]["arch"],
"cores": discovery_data["hardware"]["cpu"]["cores"],
"threads": discovery_data["hardware"]["cpu"]["threads"]
},
"memory": {
"total_gb": discovery_data["hardware"]["memory"]["total_gb"]
},
"acceleration": {
"type": "gpu" if discovery_data["hardware"]["gpu"]["present"] else "none"
}
},
"virtualization": {
"supported": discovery_data["virtualization"]["supported"],
"type": discovery_data["virtualization"]["type"]
},
"storage": {
"persistence": "persistent",
"type": "ssd", # Default assumption
"capacity_gb": sum([float(d["size"].rstrip("G")) for d in discovery_data["hardware"]["disks"] if "G" in d["size"]]) # Very rough estimate
},
"networking": {
"reachability": "tailscale-only" if discovery_data["network"]["tailscale"]["status"] == "active" else "direct",
"ingress_suitability": False,
"bandwidth": "unknown"
},
"runtime": {
"container_engine": "docker" if discovery_data["docker"]["available"] else "none",
"os": discovery_data["os"]["distro"]
}
}
}
with open(host_dir / "capabilities.yaml", "w") as f:
yaml.dump(capabilities_yaml, f, sort_keys=False)
# 3. paths.yaml
paths_yaml = {
"host": hostname,
"runtime_root": "/opt/homelab",
"conventions": {
"services": "/opt/homelab/services",
"data": "/opt/homelab/data",
"config": "/opt/homelab/config",
"logs": "/opt/homelab/logs"
}
}
with open(host_dir / "paths.yaml", "w") as f:
yaml.dump(paths_yaml, f, sort_keys=False)
# 4. networking.yaml
networking_yaml = {
"host": hostname,
"uplink": {
"type": "unknown",
"connectivity": "unknown"
},
"tailscale": {
"enabled": True if discovery_data["network"]["tailscale"]["status"] == "active" else False,
"host_ip": discovery_data["network"]["tailscale"]["ip"],
"role": "internal-management"
}
}
with open(host_dir / "networking.yaml", "w") as f:
yaml.dump(networking_yaml, f, sort_keys=False)
print(f"Inventory generated for {hostname} in {host_dir}")
def main():
if len(sys.argv) > 1:
with open(sys.argv[1], "r") as f:
data = json.load(f)
else:
# Read from stdin
data = json.load(sys.stdin)
generate_inventory(data)
if __name__ == "__main__":
main()

121
scripts/bootstrap/prepare-node.sh Executable file
View file

@ -0,0 +1,121 @@
#!/bin/bash
# scripts/bootstrap/prepare-node.sh
# Real node preparation script for the homelab platform.
# Responsibilities:
# - validate Linux environment
# - create runtime directories
# - install/check dependencies (git, docker, tailscale)
# - create homelab runtime layout
# - validate Docker daemon
# - validate network access
# - support idempotent re-runs
set -e
# Configuration
RUNTIME_ROOT="/opt/homelab"
DIRECTORIES=("config" "data" "logs" "state" "backups")
LOG_FILE="/tmp/homelab-prepare-node.log"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
log() {
echo -e "${GREEN}[INFO]${NC} $1" | tee -a "$LOG_FILE"
}
warn() {
echo -e "${YELLOW}[WARN]${NC} $1" | tee -a "$LOG_FILE"
}
error() {
echo -e "${RED}[ERROR]${NC} $1" | tee -a "$LOG_FILE" >&2
exit 1
}
log "Starting homelab node preparation..."
# 1. Validate Linux environment
if [[ "$OSTYPE" != "linux-gnu"* ]]; then
error "This script only supports Linux."
fi
if [[ $EUID -ne 0 ]]; then
error "This script must be run as root (use sudo)."
fi
# 2. Create runtime directories
log "Creating runtime directories in $RUNTIME_ROOT..."
mkdir -p "$RUNTIME_ROOT"
for dir in "${DIRECTORIES[@]}"; do
mkdir -p "$RUNTIME_ROOT/$dir"
done
chmod -R 755 "$RUNTIME_ROOT"
# 3. Install/check dependencies
install_apt_deps() {
log "Updating apt and installing dependencies..."
apt-get update -y
apt-get install -y git curl apt-transport-https ca-certificates gnupg lsb-release
}
# Docker installation
if ! command -v docker &> /dev/null; then
log "Installing Docker..."
install_apt_deps
curl -fsSL https://get.docker.com -o get-docker.sh
sh get-docker.sh
rm get-docker.sh
else
log "Docker is already installed."
fi
# Docker Compose Plugin
if ! docker compose version &> /dev/null; then
log "Installing Docker Compose plugin..."
apt-get update -y
apt-get install -y docker-compose-plugin
else
log "Docker Compose plugin is already installed."
fi
# Tailscale installation
if ! command -v tailscale &> /dev/null; then
log "Installing Tailscale..."
curl -fsSL https://tailscale.com/install.sh | sh
else
log "Tailscale is already installed."
fi
# 4. Validate Docker daemon
log "Validating Docker daemon..."
if ! systemctl is-active --quiet docker; then
log "Starting Docker service..."
systemctl enable --now docker
fi
if ! docker info &> /dev/null; then
error "Docker daemon is not responding correctly."
fi
# 5. Validate network access
log "Validating network access..."
if ! curl -s --head https://google.com | grep "200 OK" > /dev/null; then
warn "External network access might be limited."
fi
# 6. Prepare SSH access assumptions
log "Checking SSH access assumptions..."
if [[ ! -d "$HOME/.ssh" ]]; then
mkdir -p "$HOME/.ssh"
chmod 700 "$HOME/.ssh"
fi
# We assume the user has already set up their keys or will do so.
# We just ensure the directory exists with correct permissions.
log "Node preparation completed successfully!"
log "Runtime layout at $RUNTIME_ROOT is ready."
log "Next step: Run scripts/bootstrap/discover-node.sh to generate discovery data."