homelab-codex-ws/scripts/deploy/deploy.sh

335 lines
9 KiB
Bash
Raw Normal View History

2026-05-11 20:46:50 +02:00
#!/usr/bin/env bash
# deploy.sh - Staged deployment framework for homelab nodes.
2026-05-11 21:04:24 +02:00
set -o pipefail
2026-05-11 20:46:50 +02:00
# --- Configuration ---
RUNTIME_PATH="/opt/homelab"
STATE_DIR="${RUNTIME_PATH}/state/deploy"
LOG_DIR="${RUNTIME_PATH}/logs/deploy"
REPO_PATH="${HOME}/homelab-codex-ws"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
LOG_FILE="${LOG_DIR}/deploy_${TIMESTAMP}.log"
# --- Initialization ---
mkdir -p "$STATE_DIR" "$LOG_DIR"
# Redirection for logging
exec > >(tee -a "$LOG_FILE") 2>&1
2026-05-11 21:04:24 +02:00
# --- Helpers ---
2026-05-11 20:46:50 +02:00
log() {
2026-05-11 21:04:24 +02:00
local level=$1
shift
local message=$*
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [$level] $message"
}
# Structured log for machine reading
# timestamp, stage, host, service, command_result, retry_info
struct_log() {
local stage=$1
local host=$2
local service=$3
local result=$4
local info=$5
log "STRUCT" "stage=$stage host=$host service=$service result=$result info=\"$info\""
2026-05-11 20:46:50 +02:00
}
set_state() {
echo "$1" > "${STATE_DIR}/current_stage"
}
get_state() {
if [ -f "${STATE_DIR}/current_stage" ]; then
cat "${STATE_DIR}/current_stage"
else
echo "none"
fi
}
2026-05-11 21:04:24 +02:00
set_last_service() {
echo "$1" > "${STATE_DIR}/last_service"
}
get_last_service() {
if [ -f "${STATE_DIR}/last_service" ]; then
cat "${STATE_DIR}/last_service"
else
echo ""
fi
}
# --- CLI Parsing ---
TARGET_HOST=$(hostname)
TARGET_SERVICE=""
RESUME=false
REQUESTED_STAGE=""
while [[ $# -gt 0 ]]; do
case $1 in
--host)
TARGET_HOST="$2"
shift 2
;;
--service)
TARGET_SERVICE="$2"
shift 2
;;
--resume)
RESUME=true
shift
;;
--stage)
REQUESTED_STAGE="$2"
shift 2
;;
*)
if [[ "$1" =~ ^(prepare|validate|deploy|verify|diagnose|complete)$ ]]; then
REQUESTED_STAGE="$1"
fi
shift
;;
esac
done
# --- Inventory Loading ---
load_inventory() {
log "INFO" "Loading inventory for host: $TARGET_HOST"
if [[ ! -d "${REPO_PATH}/hosts/${TARGET_HOST}" ]]; then
log "ERROR" "Host directory not found: ${REPO_PATH}/hosts/${TARGET_HOST}"
exit 1
fi
if [[ -n "$TARGET_SERVICE" ]]; then
SERVICES=("$TARGET_SERVICE")
else
if [[ -f "${REPO_PATH}/hosts/${TARGET_HOST}/services.txt" ]]; then
SERVICES=($(cat "${REPO_PATH}/hosts/${TARGET_HOST}/services.txt"))
elif [[ -f "${REPO_PATH}/hosts/${TARGET_HOST}/services.yaml" ]]; then
SERVICES=($(grep -A 100 "services:" "${REPO_PATH}/hosts/${TARGET_HOST}/services.yaml" | grep "^ [a-z0-9_-]\+:" | sed 's/ \(.*\):/\1/'))
else
log "WARN" "No services found for $TARGET_HOST"
SERVICES=()
fi
fi
log "INFO" "Services to process: ${SERVICES[*]}"
}
2026-05-11 20:46:50 +02:00
# --- Stages ---
stage_prepare() {
2026-05-11 21:04:24 +02:00
local host=$1
log "INFO" "Stage: PREPARE ($host)"
2026-05-11 20:46:50 +02:00
set_state "prepare"
2026-05-11 21:04:24 +02:00
cd "$REPO_PATH" || exit 1
log "INFO" "Pulling latest changes..."
if ! git pull; then
log "WARN" "Git pull failed, proceeding with local state (offline mode or network flap)"
fi
mkdir -p "${RUNTIME_PATH}/config" "${RUNTIME_PATH}/data" "${RUNTIME_PATH}/state" "${RUNTIME_PATH}/logs"
struct_log "prepare" "$host" "all" "success" "repo_updated"
}
stage_validate() {
local host=$1
log "INFO" "Stage: VALIDATE ($host)"
set_state "validate"
for service in "${SERVICES[@]}"; do
log "INFO" "Validating $service..."
if [[ ! -d "${REPO_PATH}/services/$service" ]]; then
log "ERROR" "Service definition not found: $service"
struct_log "validate" "$host" "$service" "fail" "not_found"
return 1
fi
done
struct_log "validate" "$host" "all" "success" "validated"
2026-05-11 20:46:50 +02:00
}
stage_deploy() {
2026-05-11 21:04:24 +02:00
local host=$1
log "INFO" "Stage: DEPLOY ($host)"
2026-05-11 20:46:50 +02:00
set_state "deploy"
2026-05-11 21:04:24 +02:00
local last_s=$(get_last_service)
local skip=false
if [[ "$RESUME" == "true" && -n "$last_s" ]]; then
skip=true
fi
for service in "${SERVICES[@]}"; do
if [[ "$skip" == "true" ]]; then
if [[ "$service" == "$last_s" ]]; then
skip=false
log "INFO" "Resuming from $service..."
else
log "INFO" "Skipping $service (already processed)"
continue
fi
fi
log "INFO" "Deploying $service..."
set_last_service "$service"
local svc_dir="${REPO_PATH}/services/$service"
local runtime_config_dir="${RUNTIME_PATH}/config/$service"
mkdir -p "$runtime_config_dir"
local compose_args=("-f" "${svc_dir}/docker-compose.yml")
if [[ -f "${runtime_config_dir}/docker-compose.override.yml" ]]; then
log "INFO" "Using override for $service"
compose_args+=("-f" "${runtime_config_dir}/docker-compose.override.yml")
fi
# Determine .env
local env_file=""
if [[ -f "${runtime_config_dir}/.env" ]]; then
env_file="${runtime_config_dir}/.env"
elif [[ -f "${svc_dir}/.env" ]]; then
env_file="${svc_dir}/.env"
fi
local run_cmd=("docker" "compose")
run_cmd+=("${compose_args[@]}")
if [[ -n "$env_file" ]]; then
run_cmd+=("--env-file" "$env_file")
fi
run_cmd+=("up" "-d" "--remove-orphans")
log "INFO" "Running: ${run_cmd[*]}"
if ! "${run_cmd[@]}"; then
log "ERROR" "Failed to deploy $service"
struct_log "deploy" "$host" "$service" "fail" "docker_compose_failed"
stage_diagnose "$host" "$service"
return 1
fi
struct_log "deploy" "$host" "$service" "success" "deployed"
done
set_last_service ""
2026-05-11 20:46:50 +02:00
}
stage_verify() {
2026-05-11 21:04:24 +02:00
local host=$1
log "INFO" "Stage: VERIFY ($host)"
2026-05-11 20:46:50 +02:00
set_state "verify"
2026-05-11 21:04:24 +02:00
for service in "${SERVICES[@]}"; do
log "INFO" "Verifying $service..."
local health_script="${REPO_PATH}/services/${service}/healthcheck.sh"
if [[ -f "$health_script" ]]; then
if ! bash "$health_script"; then
log "ERROR" "Healthcheck failed for $service"
struct_log "verify" "$host" "$service" "fail" "healthcheck_failed"
stage_diagnose "$host" "$service"
return 1
fi
else
if ! docker ps --filter "name=$service" --filter "status=running" | grep -q "$service"; then
log "ERROR" "Container $service is not running"
struct_log "verify" "$host" "$service" "fail" "container_not_running"
stage_diagnose "$host" "$service"
return 1
fi
fi
struct_log "verify" "$host" "$service" "success" "verified"
done
2026-05-11 20:46:50 +02:00
}
stage_diagnose() {
2026-05-11 21:04:24 +02:00
local host=$1
local service=$2
log "INFO" "Stage: DIAGNOSE ($host - ${service:-all})"
echo "--- DIAGNOSTICS FOR ${service:-all} ---"
docker ps --filter "name=${service:-}"
if [[ -n "$service" ]]; then
local svc_dir="${REPO_PATH}/services/$service"
if [[ -d "$svc_dir" ]]; then
cd "$svc_dir" || exit 1
docker compose ps
docker compose logs --tail=50
fi
fi
echo "--- END DIAGNOSTICS ---"
struct_log "diagnose" "$host" "${service:-all}" "done" "diagnostics_collected"
2026-05-11 20:46:50 +02:00
}
2026-05-11 21:04:24 +02:00
stage_complete() {
local host=$1
log "INFO" "Stage: COMPLETE ($host)"
set_state "complete"
struct_log "complete" "$host" "all" "success" "deployment_finished"
2026-05-11 20:46:50 +02:00
}
2026-05-11 21:04:24 +02:00
# --- Execution Logic ---
run_deployment() {
local start_stage=$1
case "$start_stage" in
prepare)
stage_prepare "$TARGET_HOST" || return 1
;&
validate)
stage_validate "$TARGET_HOST" || return 1
;&
deploy)
stage_deploy "$TARGET_HOST" || return 1
;&
verify)
stage_verify "$TARGET_HOST" || return 1
;&
complete)
stage_complete "$TARGET_HOST" || return 1
;;
*)
log "ERROR" "Invalid stage: $start_stage"
return 1
;;
2026-05-11 20:46:50 +02:00
esac
}
# --- Main ---
2026-05-11 21:04:24 +02:00
log "INFO" "--- Homelab Deployment Started (Host: $TARGET_HOST, Service: ${TARGET_SERVICE:-all}) ---"
2026-05-11 20:46:50 +02:00
2026-05-11 21:04:24 +02:00
load_inventory
2026-05-11 20:46:50 +02:00
2026-05-11 21:04:24 +02:00
if [[ "$RESUME" == "true" ]]; then
CURRENT=$(get_state)
log "INFO" "Resuming from state: $CURRENT"
case "$CURRENT" in
prepare|validate|deploy|verify)
run_deployment "$CURRENT"
;;
complete)
log "INFO" "Last deployment was complete. Nothing to resume."
;;
*)
log "INFO" "No valid state to resume. Starting from prepare..."
run_deployment "prepare"
;;
esac
elif [[ -n "$REQUESTED_STAGE" ]]; then
if [[ "$REQUESTED_STAGE" == "diagnose" ]]; then
stage_diagnose "$TARGET_HOST" "$TARGET_SERVICE"
else
run_deployment "$REQUESTED_STAGE"
fi
else
run_deployment "prepare"
fi
2026-05-11 20:46:50 +02:00
2026-05-11 21:04:24 +02:00
log "INFO" "--- Homelab Deployment Finished ---"