diff --git a/docs/node-onboarding.md b/docs/node-onboarding.md new file mode 100644 index 0000000..7d78665 --- /dev/null +++ b/docs/node-onboarding.md @@ -0,0 +1,82 @@ +# Node Onboarding Workflow + +This document describes the process of onboarding a new Linux machine into the homelab platform. + +## Overview + +The onboarding process consists of three main stages: +1. **Preparation**: Setting up the runtime environment and dependencies. +2. **Discovery**: Collecting hardware and software characteristics of the node. +3. **Inventory Generation**: Creating the YAML configuration files for the node in the central inventory. + +## Prerequisites + +- A fresh Linux machine (Debian/Ubuntu recommended). +- SSH access with sudo privileges. +- Tailscale account (if using Tailscale for networking). + +## Onboarding Steps + +### 1. Node Preparation + +Run the `prepare-node.sh` script on the target node. This script will install Docker, Tailscale, and create the `/opt/homelab` directory structure. + +```bash +sudo ./scripts/bootstrap/prepare-node.sh +``` + +**Manual Step**: If you are using Tailscale, you must manually authenticate it after the script runs: +```bash +sudo tailscale up +``` + +### 2. Node Discovery + +Run the `discover-node.sh` script to collect system information. It is recommended to redirect the output to a file. + +```bash +./scripts/bootstrap/discover-node.sh > discovery-$(hostname).json +``` + +### 3. Inventory Generation + +Copy the discovery JSON file to your management machine (where the homelab repository is located) and run the inventory generator. + +```bash +./scripts/bootstrap/generate-node-inventory.py discovery-node-name.json +``` + +This will create a new directory in `hosts//` with the following files: +- `host.yaml`: Basic host identity and roles. +- `capabilities.yaml`: Hardware and software capabilities. +- `paths.yaml`: Runtime path definitions. +- `networking.yaml`: Networking configuration. + +### 4. Finalization + +1. Review the generated YAML files in `hosts//`. +2. Assign appropriate roles to the node in `hosts//host.yaml`. +3. Commit the new host configuration to the repository. +4. Run the deployment script to apply the initial configuration: + ```bash + ./scripts/deploy/deploy-node.sh + ``` + +## Recovery Onboarding + +If a node needs to be re-onboarded after a failure: +1. Run `prepare-node.sh` again. It is idempotent and will ensure the environment is correct. +2. Restore any critical data to `/opt/homelab/data/` and `/opt/homelab/backups/`. +3. Re-run `discover-node.sh` if hardware has changed, or reuse the existing inventory if it hasn't. + +## Tailscale Assumptions + +- Nodes are assumed to use Tailscale for management and inter-node communication. +- The `networking.yaml` will be populated with the Tailscale IP found during discovery. +- If Tailscale is not used, manual adjustment of `networking.yaml` and `host.yaml` is required. + +## Troubleshooting + +- **Docker not starting**: Check `journalctl -u docker`. +- **Discovery fails**: Ensure all required tools (lscpu, lsblk, ip, etc.) are installed. +- **Inventory Generation error**: Ensure `PyYAML` is installed on the management machine. diff --git a/scripts/bootstrap/discover-node.sh b/scripts/bootstrap/discover-node.sh new file mode 100755 index 0000000..8990563 --- /dev/null +++ b/scripts/bootstrap/discover-node.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# scripts/bootstrap/discover-node.sh +# Node discovery script for the homelab platform. +# Collects system information and outputs it in JSON format. + +set -e + +# Help function +show_help() { + echo "Usage: $0 [options]" + echo "Options:" + echo " --json Output in JSON format (default)" + echo " --yaml Output in YAML format" + echo " --help Show this help" +} + +OUTPUT_FORMAT="json" + +while [[ "$#" -gt 0 ]]; do + case $1 in + --json) OUTPUT_FORMAT="json"; shift ;; + --yaml) OUTPUT_FORMAT="yaml"; shift ;; + --help) show_help; exit 0 ;; + *) echo "Unknown parameter: $1"; show_help; exit 1 ;; + esac +done + +# Check dependencies +for cmd in hostnamectl lscpu free lsblk ip curl; do + if ! command -v "$cmd" &> /dev/null; then + echo "Error: Required command '$cmd' not found." >&2 + exit 1 + fi +done + +# Collect Data +HOSTNAME=$(hostname) +OS_DISTRO=$(grep PRETTY_NAME /etc/os-release | cut -d'"' -f2) +ARCH=$(uname -m) +CPU_MODEL=$(lscpu | grep "Model name:" | sed 's/Model name:[[:space:]]*//') +CPU_CORES=$(lscpu | grep "^CPU(s):" | awk '{print $2}') +CPU_THREADS=$(lscpu | grep "^Thread(s) per core:" | awk '{print $4 * $CPU_CORES}') # Simplistic +RAM_TOTAL_GB=$(free -g | grep "Mem:" | awk '{print $2}') + +# Disks +DISKS=$(lsblk -dno NAME,SIZE,TYPE,MODEL | grep disk | awk '{printf "{\"name\": \"%s\", \"size\": \"%s\", \"model\": \"%s\"},", $1, $2, $4}' | sed 's/,$//') + +# GPU Presence +GPU_PRESENT=false +if lspci | grep -i 'vga\|3d\|display' | grep -i 'nvidia\|amd\|intel' > /dev/null; then + GPU_PRESENT=true + GPU_INFO=$(lspci | grep -i 'vga\|3d\|display' | head -n 1 | cut -d ':' -f3 | sed 's/^[[:space:]]*//') +fi + +# Virtualization +VIRT_SUPPORTED=false +if lscpu | grep "Virtualization:" > /dev/null; then + VIRT_SUPPORTED=true + VIRT_TYPE=$(lscpu | grep "Virtualization:" | awk '{print $2}') +fi + +# Network Interfaces +INTERFACES=$(ip -j addr show | jq -c '[.[] | {name: .ifname, active: (if .operstate == "UP" then true else false end), ips: [.addr_info[].local]}]' 2>/dev/null || ip addr show | grep '^[0-9]' | awk '{print $2}' | sed 's/://' | xargs -I {} echo -n "\"{}\", " | sed 's/, $//') + +# Tailscale +TAILSCALE_STATUS="not-installed" +TAILSCALE_IP="null" +if command -v tailscale &> /dev/null; then + if tailscale status &> /dev/null; then + TAILSCALE_STATUS="active" + TAILSCALE_IP=$(tailscale ip -4) + else + TAILSCALE_STATUS="installed-inactive" + fi +fi + +# Docker +DOCKER_AVAILABLE=false +if command -v docker &> /dev/null; then + if docker info &> /dev/null; then + DOCKER_AVAILABLE=true + fi +fi + +# Connectivity +CONNECTIVITY="unknown" +if curl -s --head https://google.com &> /dev/null; then + CONNECTIVITY="internet-access" +fi + +# Output Construction (JSON) +cat < 1: + with open(sys.argv[1], "r") as f: + data = json.load(f) + else: + # Read from stdin + data = json.load(sys.stdin) + + generate_inventory(data) + +if __name__ == "__main__": + main() diff --git a/scripts/bootstrap/prepare-node.sh b/scripts/bootstrap/prepare-node.sh new file mode 100755 index 0000000..7b70e19 --- /dev/null +++ b/scripts/bootstrap/prepare-node.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# scripts/bootstrap/prepare-node.sh +# Real node preparation script for the homelab platform. +# Responsibilities: +# - validate Linux environment +# - create runtime directories +# - install/check dependencies (git, docker, tailscale) +# - create homelab runtime layout +# - validate Docker daemon +# - validate network access +# - support idempotent re-runs + +set -e + +# Configuration +RUNTIME_ROOT="/opt/homelab" +DIRECTORIES=("config" "data" "logs" "state" "backups") +LOG_FILE="/tmp/homelab-prepare-node.log" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +log() { + echo -e "${GREEN}[INFO]${NC} $1" | tee -a "$LOG_FILE" +} + +warn() { + echo -e "${YELLOW}[WARN]${NC} $1" | tee -a "$LOG_FILE" +} + +error() { + echo -e "${RED}[ERROR]${NC} $1" | tee -a "$LOG_FILE" >&2 + exit 1 +} + +log "Starting homelab node preparation..." + +# 1. Validate Linux environment +if [[ "$OSTYPE" != "linux-gnu"* ]]; then + error "This script only supports Linux." +fi + +if [[ $EUID -ne 0 ]]; then + error "This script must be run as root (use sudo)." +fi + +# 2. Create runtime directories +log "Creating runtime directories in $RUNTIME_ROOT..." +mkdir -p "$RUNTIME_ROOT" +for dir in "${DIRECTORIES[@]}"; do + mkdir -p "$RUNTIME_ROOT/$dir" +done +chmod -R 755 "$RUNTIME_ROOT" + +# 3. Install/check dependencies +install_apt_deps() { + log "Updating apt and installing dependencies..." + apt-get update -y + apt-get install -y git curl apt-transport-https ca-certificates gnupg lsb-release +} + +# Docker installation +if ! command -v docker &> /dev/null; then + log "Installing Docker..." + install_apt_deps + curl -fsSL https://get.docker.com -o get-docker.sh + sh get-docker.sh + rm get-docker.sh +else + log "Docker is already installed." +fi + +# Docker Compose Plugin +if ! docker compose version &> /dev/null; then + log "Installing Docker Compose plugin..." + apt-get update -y + apt-get install -y docker-compose-plugin +else + log "Docker Compose plugin is already installed." +fi + +# Tailscale installation +if ! command -v tailscale &> /dev/null; then + log "Installing Tailscale..." + curl -fsSL https://tailscale.com/install.sh | sh +else + log "Tailscale is already installed." +fi + +# 4. Validate Docker daemon +log "Validating Docker daemon..." +if ! systemctl is-active --quiet docker; then + log "Starting Docker service..." + systemctl enable --now docker +fi + +if ! docker info &> /dev/null; then + error "Docker daemon is not responding correctly." +fi + +# 5. Validate network access +log "Validating network access..." +if ! curl -s --head https://google.com | grep "200 OK" > /dev/null; then + warn "External network access might be limited." +fi + +# 6. Prepare SSH access assumptions +log "Checking SSH access assumptions..." +if [[ ! -d "$HOME/.ssh" ]]; then + mkdir -p "$HOME/.ssh" + chmod 700 "$HOME/.ssh" +fi +# We assume the user has already set up their keys or will do so. +# We just ensure the directory exists with correct permissions. + +log "Node preparation completed successfully!" +log "Runtime layout at $RUNTIME_ROOT is ready." +log "Next step: Run scripts/bootstrap/discover-node.sh to generate discovery data."