Harden deployment runtime framework #5
|
|
@ -12,7 +12,17 @@ This document describes the GitOps-lite deployment process for the homelab.
|
|||
|
||||
## Staged Deployment Framework
|
||||
|
||||
The homelab uses a staged deployment framework located at `scripts/deploy/deploy.sh`. This script is designed to be resumable, stage-aware, and observable.
|
||||
The homelab uses a modularized staged deployment framework located at `scripts/deploy/deploy.sh`. This script is designed to be resumable, stage-aware, and observable, with core logic split into maintainable libraries in `scripts/lib/`.
|
||||
|
||||
### Runtime Architecture
|
||||
|
||||
The runtime consists of:
|
||||
- `deploy.sh`: Orchestration entrypoint.
|
||||
- `lib/log.sh`: Logging and structured output.
|
||||
- `lib/state.sh`: Deployment state tracking and stage persistence.
|
||||
- `lib/inventory.sh`: Reliable host and service discovery (Python-based YAML parsing).
|
||||
- `lib/compose.sh`: Docker Compose operations.
|
||||
- `lib/diagnostics.sh`: Post-failure analysis and summary generation.
|
||||
|
||||
### Deployment Stages
|
||||
|
||||
|
|
@ -32,8 +42,16 @@ The homelab uses a staged deployment framework located at `scripts/deploy/deploy
|
|||
|
||||
If a deployment is interrupted (e.g., due to LTE disconnect on CHELSTY):
|
||||
1. Rerun the script with the `--resume` flag: `scripts/deploy/deploy.sh --resume`.
|
||||
2. The script reads the last incomplete stage and continues from there.
|
||||
3. In the `deploy` stage, it specifically resumes from the first service that was not successfully completed.
|
||||
2. The script identifies the last incomplete stage using deterministic markers (`/opt/homelab/state/deploy/stage_<name>_complete`) and continues from the exact failure point.
|
||||
3. In the `deploy` stage, it specifically resumes from the first service that was not successfully completed, skipping those already up.
|
||||
4. Repeated runs are safe and idempotent; completed stages are not re-executed unless the resume flag is omitted (which clears state for a fresh run).
|
||||
|
||||
### Diagnostics and Troubleshooting
|
||||
|
||||
The runtime is designed to fail predictably and provide immediate feedback:
|
||||
- **Automatic Diagnostics**: If any stage fails, `collect_diagnostics` is triggered to capture system state and container logs into `/opt/homelab/logs/deploy/diagnostics_<timestamp>.txt`.
|
||||
- **Deployment Summary**: Every run concludes with a concise summary showing the host status, last stage reached, and log locations.
|
||||
- **Offline Resilience**: The `prepare` stage handles `git pull` failures gracefully, allowing deployment from local cache during network instability.
|
||||
|
||||
### Operational Semantics
|
||||
|
||||
|
|
|
|||
|
|
@ -13,11 +13,11 @@ This document defines the lifecycle of a service in the homelab and the procedur
|
|||
- Ensure `/opt/homelab/config/<service>` exists and contains required secrets/configs.
|
||||
- Setup environment variables from `env.example` into `/opt/homelab/config/<service>/.env`.
|
||||
3. **Deployment**:
|
||||
- `scripts/deploy/deploy.sh prepare`
|
||||
- `scripts/deploy/deploy.sh deploy`
|
||||
- `scripts/deploy/deploy.sh` (Starts fresh)
|
||||
- `scripts/deploy/deploy.sh --resume` (Continues after interruption)
|
||||
4. **Verification**:
|
||||
- `scripts/deploy/deploy.sh verify`
|
||||
- Healthchecks are automated within the verify stage.
|
||||
- Automatic as part of the `deploy.sh` pipeline (`verify` stage).
|
||||
- Manual: `scripts/deploy/deploy.sh --stage verify`.
|
||||
5. **Maintenance**:
|
||||
- Periodic updates via `docker compose pull`.
|
||||
- Log monitoring via `docker compose logs -f`.
|
||||
|
|
|
|||
|
|
@ -4,12 +4,12 @@
|
|||
set -o pipefail
|
||||
|
||||
# --- Configuration ---
|
||||
RUNTIME_PATH="/opt/homelab"
|
||||
STATE_DIR="${RUNTIME_PATH}/state/deploy"
|
||||
LOG_DIR="${RUNTIME_PATH}/logs/deploy"
|
||||
REPO_PATH="${HOME}/homelab-codex-ws"
|
||||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||
LOG_FILE="${LOG_DIR}/deploy_${TIMESTAMP}.log"
|
||||
export RUNTIME_PATH="/opt/homelab"
|
||||
export STATE_DIR="${RUNTIME_PATH}/state/deploy"
|
||||
export LOG_DIR="${RUNTIME_PATH}/logs/deploy"
|
||||
export REPO_PATH="${HOME}/homelab-codex-ws"
|
||||
export TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||
export LOG_FILE="${LOG_DIR}/deploy_${TIMESTAMP}.log"
|
||||
|
||||
# --- Initialization ---
|
||||
mkdir -p "$STATE_DIR" "$LOG_DIR"
|
||||
|
|
@ -17,52 +17,15 @@ mkdir -p "$STATE_DIR" "$LOG_DIR"
|
|||
# Redirection for logging
|
||||
exec > >(tee -a "$LOG_FILE") 2>&1
|
||||
|
||||
# --- Helpers ---
|
||||
|
||||
log() {
|
||||
local level=$1
|
||||
shift
|
||||
local message=$*
|
||||
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [$level] $message"
|
||||
}
|
||||
|
||||
# Structured log for machine reading
|
||||
# timestamp, stage, host, service, command_result, retry_info
|
||||
struct_log() {
|
||||
local stage=$1
|
||||
local host=$2
|
||||
local service=$3
|
||||
local result=$4
|
||||
local info=$5
|
||||
log "STRUCT" "stage=$stage host=$host service=$service result=$result info=\"$info\""
|
||||
}
|
||||
|
||||
set_state() {
|
||||
echo "$1" > "${STATE_DIR}/current_stage"
|
||||
}
|
||||
|
||||
get_state() {
|
||||
if [ -f "${STATE_DIR}/current_stage" ]; then
|
||||
cat "${STATE_DIR}/current_stage"
|
||||
else
|
||||
echo "none"
|
||||
fi
|
||||
}
|
||||
|
||||
set_last_service() {
|
||||
echo "$1" > "${STATE_DIR}/last_service"
|
||||
}
|
||||
|
||||
get_last_service() {
|
||||
if [ -f "${STATE_DIR}/last_service" ]; then
|
||||
cat "${STATE_DIR}/last_service"
|
||||
else
|
||||
echo ""
|
||||
fi
|
||||
}
|
||||
# --- Load Libraries ---
|
||||
LIB_PATH="${REPO_PATH}/scripts/lib"
|
||||
source "${LIB_PATH}/log.sh"
|
||||
source "${LIB_PATH}/state.sh"
|
||||
source "${LIB_PATH}/inventory.sh"
|
||||
source "${LIB_PATH}/compose.sh"
|
||||
source "${LIB_PATH}/diagnostics.sh"
|
||||
|
||||
# --- CLI Parsing ---
|
||||
|
||||
TARGET_HOST=$(hostname)
|
||||
TARGET_SERVICE=""
|
||||
RESUME=false
|
||||
|
|
@ -95,37 +58,17 @@ while [[ $# -gt 0 ]]; do
|
|||
esac
|
||||
done
|
||||
|
||||
# --- Inventory Loading ---
|
||||
|
||||
load_inventory() {
|
||||
log "INFO" "Loading inventory for host: $TARGET_HOST"
|
||||
|
||||
if [[ ! -d "${REPO_PATH}/hosts/${TARGET_HOST}" ]]; then
|
||||
log "ERROR" "Host directory not found: ${REPO_PATH}/hosts/${TARGET_HOST}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -n "$TARGET_SERVICE" ]]; then
|
||||
SERVICES=("$TARGET_SERVICE")
|
||||
else
|
||||
if [[ -f "${REPO_PATH}/hosts/${TARGET_HOST}/services.txt" ]]; then
|
||||
SERVICES=($(cat "${REPO_PATH}/hosts/${TARGET_HOST}/services.txt"))
|
||||
elif [[ -f "${REPO_PATH}/hosts/${TARGET_HOST}/services.yaml" ]]; then
|
||||
SERVICES=($(grep -A 100 "services:" "${REPO_PATH}/hosts/${TARGET_HOST}/services.yaml" | grep "^ [a-z0-9_-]\+:" | sed 's/ \(.*\):/\1/'))
|
||||
else
|
||||
log "WARN" "No services found for $TARGET_HOST"
|
||||
SERVICES=()
|
||||
fi
|
||||
fi
|
||||
log "INFO" "Services to process: ${SERVICES[*]}"
|
||||
}
|
||||
|
||||
# --- Stages ---
|
||||
|
||||
stage_prepare() {
|
||||
local host=$1
|
||||
if is_stage_complete "prepare" && [[ "$RESUME" == "true" ]]; then
|
||||
log "INFO" "Skipping PREPARE (already complete)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "INFO" "Stage: PREPARE ($host)"
|
||||
set_state "prepare"
|
||||
set_stage "prepare"
|
||||
|
||||
cd "$REPO_PATH" || exit 1
|
||||
log "INFO" "Pulling latest changes..."
|
||||
|
|
@ -133,15 +76,22 @@ stage_prepare() {
|
|||
log "WARN" "Git pull failed, proceeding with local state (offline mode or network flap)"
|
||||
fi
|
||||
|
||||
# Ensure runtime directories exist
|
||||
mkdir -p "${RUNTIME_PATH}/config" "${RUNTIME_PATH}/data" "${RUNTIME_PATH}/state" "${RUNTIME_PATH}/logs"
|
||||
|
||||
struct_log "prepare" "$host" "all" "success" "repo_updated"
|
||||
mark_stage_complete "prepare"
|
||||
}
|
||||
|
||||
stage_validate() {
|
||||
local host=$1
|
||||
if is_stage_complete "validate" && [[ "$RESUME" == "true" ]]; then
|
||||
log "INFO" "Skipping VALIDATE (already complete)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "INFO" "Stage: VALIDATE ($host)"
|
||||
set_state "validate"
|
||||
set_stage "validate"
|
||||
|
||||
for service in "${SERVICES[@]}"; do
|
||||
log "INFO" "Validating $service..."
|
||||
|
|
@ -153,12 +103,18 @@ stage_validate() {
|
|||
done
|
||||
|
||||
struct_log "validate" "$host" "all" "success" "validated"
|
||||
mark_stage_complete "validate"
|
||||
}
|
||||
|
||||
stage_deploy() {
|
||||
local host=$1
|
||||
if is_stage_complete "deploy" && [[ "$RESUME" == "true" ]]; then
|
||||
log "INFO" "Skipping DEPLOY (already complete)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "INFO" "Stage: DEPLOY ($host)"
|
||||
set_state "deploy"
|
||||
set_stage "deploy"
|
||||
|
||||
local last_s=$(get_last_service)
|
||||
local skip=false
|
||||
|
|
@ -180,48 +136,28 @@ stage_deploy() {
|
|||
log "INFO" "Deploying $service..."
|
||||
set_last_service "$service"
|
||||
|
||||
local svc_dir="${REPO_PATH}/services/$service"
|
||||
local runtime_config_dir="${RUNTIME_PATH}/config/$service"
|
||||
mkdir -p "$runtime_config_dir"
|
||||
|
||||
local compose_args=("-f" "${svc_dir}/docker-compose.yml")
|
||||
if [[ -f "${runtime_config_dir}/docker-compose.override.yml" ]]; then
|
||||
log "INFO" "Using override for $service"
|
||||
compose_args+=("-f" "${runtime_config_dir}/docker-compose.override.yml")
|
||||
fi
|
||||
|
||||
# Determine .env
|
||||
local env_file=""
|
||||
if [[ -f "${runtime_config_dir}/.env" ]]; then
|
||||
env_file="${runtime_config_dir}/.env"
|
||||
elif [[ -f "${svc_dir}/.env" ]]; then
|
||||
env_file="${svc_dir}/.env"
|
||||
fi
|
||||
|
||||
local run_cmd=("docker" "compose")
|
||||
run_cmd+=("${compose_args[@]}")
|
||||
if [[ -n "$env_file" ]]; then
|
||||
run_cmd+=("--env-file" "$env_file")
|
||||
fi
|
||||
run_cmd+=("up" "-d" "--remove-orphans")
|
||||
|
||||
log "INFO" "Running: ${run_cmd[*]}"
|
||||
if ! "${run_cmd[@]}"; then
|
||||
log "ERROR" "Failed to deploy $service"
|
||||
if ! run_compose_up "$service"; then
|
||||
struct_log "deploy" "$host" "$service" "fail" "docker_compose_failed"
|
||||
stage_diagnose "$host" "$service"
|
||||
collect_diagnostics "$host" "$service"
|
||||
return 1
|
||||
fi
|
||||
|
||||
struct_log "deploy" "$host" "$service" "success" "deployed"
|
||||
done
|
||||
|
||||
set_last_service ""
|
||||
mark_stage_complete "deploy"
|
||||
}
|
||||
|
||||
stage_verify() {
|
||||
local host=$1
|
||||
if is_stage_complete "verify" && [[ "$RESUME" == "true" ]]; then
|
||||
log "INFO" "Skipping VERIFY (already complete)"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log "INFO" "Stage: VERIFY ($host)"
|
||||
set_state "verify"
|
||||
set_stage "verify"
|
||||
|
||||
for service in "${SERVICES[@]}"; do
|
||||
log "INFO" "Verifying $service..."
|
||||
|
|
@ -230,46 +166,29 @@ stage_verify() {
|
|||
if ! bash "$health_script"; then
|
||||
log "ERROR" "Healthcheck failed for $service"
|
||||
struct_log "verify" "$host" "$service" "fail" "healthcheck_failed"
|
||||
stage_diagnose "$host" "$service"
|
||||
collect_diagnostics "$host" "$service"
|
||||
return 1
|
||||
fi
|
||||
else
|
||||
# Generic check if container is running
|
||||
if ! docker ps --filter "name=$service" --filter "status=running" | grep -q "$service"; then
|
||||
log "ERROR" "Container $service is not running"
|
||||
struct_log "verify" "$host" "$service" "fail" "container_not_running"
|
||||
stage_diagnose "$host" "$service"
|
||||
collect_diagnostics "$host" "$service"
|
||||
return 1
|
||||
fi
|
||||
fi
|
||||
struct_log "verify" "$host" "$service" "success" "verified"
|
||||
done
|
||||
}
|
||||
|
||||
stage_diagnose() {
|
||||
local host=$1
|
||||
local service=$2
|
||||
log "INFO" "Stage: DIAGNOSE ($host - ${service:-all})"
|
||||
|
||||
echo "--- DIAGNOSTICS FOR ${service:-all} ---"
|
||||
docker ps --filter "name=${service:-}"
|
||||
|
||||
if [[ -n "$service" ]]; then
|
||||
local svc_dir="${REPO_PATH}/services/$service"
|
||||
if [[ -d "$svc_dir" ]]; then
|
||||
cd "$svc_dir" || exit 1
|
||||
docker compose ps
|
||||
docker compose logs --tail=50
|
||||
fi
|
||||
fi
|
||||
echo "--- END DIAGNOSTICS ---"
|
||||
struct_log "diagnose" "$host" "${service:-all}" "done" "diagnostics_collected"
|
||||
mark_stage_complete "verify"
|
||||
}
|
||||
|
||||
stage_complete() {
|
||||
local host=$1
|
||||
log "INFO" "Stage: COMPLETE ($host)"
|
||||
set_state "complete"
|
||||
set_stage "complete"
|
||||
struct_log "complete" "$host" "all" "success" "deployment_finished"
|
||||
clear_deployment_state
|
||||
}
|
||||
|
||||
# --- Execution Logic ---
|
||||
|
|
@ -277,6 +196,7 @@ stage_complete() {
|
|||
run_deployment() {
|
||||
local start_stage=$1
|
||||
|
||||
# Sequential execution from start_stage
|
||||
case "$start_stage" in
|
||||
prepare)
|
||||
stage_prepare "$TARGET_HOST" || return 1
|
||||
|
|
@ -304,31 +224,45 @@ run_deployment() {
|
|||
|
||||
log "INFO" "--- Homelab Deployment Started (Host: $TARGET_HOST, Service: ${TARGET_SERVICE:-all}) ---"
|
||||
|
||||
load_inventory
|
||||
if ! load_inventory "$TARGET_HOST" "$TARGET_SERVICE"; then
|
||||
log "ERROR" "Failed to load inventory"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
EXIT_STATUS=0
|
||||
if [[ "$RESUME" == "true" ]]; then
|
||||
CURRENT=$(get_state)
|
||||
CURRENT=$(get_stage)
|
||||
log "INFO" "Resuming from state: $CURRENT"
|
||||
case "$CURRENT" in
|
||||
prepare|validate|deploy|verify)
|
||||
run_deployment "$CURRENT"
|
||||
run_deployment "$CURRENT" || EXIT_STATUS=1
|
||||
;;
|
||||
complete)
|
||||
log "INFO" "Last deployment was complete. Nothing to resume."
|
||||
complete|none)
|
||||
log "INFO" "No interrupted deployment found. Starting from scratch..."
|
||||
run_deployment "prepare" || EXIT_STATUS=1
|
||||
;;
|
||||
*)
|
||||
log "INFO" "No valid state to resume. Starting from prepare..."
|
||||
run_deployment "prepare"
|
||||
log "INFO" "Unknown state. Starting from prepare..."
|
||||
run_deployment "prepare" || EXIT_STATUS=1
|
||||
;;
|
||||
esac
|
||||
elif [[ -n "$REQUESTED_STAGE" ]]; then
|
||||
if [[ "$REQUESTED_STAGE" == "diagnose" ]]; then
|
||||
stage_diagnose "$TARGET_HOST" "$TARGET_SERVICE"
|
||||
collect_diagnostics "$TARGET_HOST" "$TARGET_SERVICE"
|
||||
else
|
||||
run_deployment "$REQUESTED_STAGE"
|
||||
run_deployment "$REQUESTED_STAGE" || EXIT_STATUS=1
|
||||
fi
|
||||
else
|
||||
run_deployment "prepare"
|
||||
# New deployment - clear previous state
|
||||
clear_deployment_state
|
||||
run_deployment "prepare" || EXIT_STATUS=1
|
||||
fi
|
||||
|
||||
log "INFO" "--- Homelab Deployment Finished ---"
|
||||
if [[ $EXIT_STATUS -eq 0 ]]; then
|
||||
print_summary "$TARGET_HOST" "SUCCESS"
|
||||
log "INFO" "--- Homelab Deployment Finished Successfully ---"
|
||||
else
|
||||
print_summary "$TARGET_HOST" "FAILED"
|
||||
log "ERROR" "--- Homelab Deployment Failed ---"
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
|||
45
scripts/lib/compose.sh
Normal file
45
scripts/lib/compose.sh
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
#!/usr/bin/env bash
|
||||
# compose.sh - Docker Compose operations
|
||||
|
||||
run_compose_up() {
|
||||
local service=$1
|
||||
local svc_dir="${REPO_PATH}/services/$service"
|
||||
local runtime_config_dir="${RUNTIME_PATH}/config/$service"
|
||||
|
||||
if [[ ! -d "$svc_dir" ]]; then
|
||||
log "ERROR" "Service directory not found: $svc_dir"
|
||||
return 1
|
||||
fi
|
||||
|
||||
mkdir -p "$runtime_config_dir"
|
||||
|
||||
local compose_args=("-f" "${svc_dir}/docker-compose.yml")
|
||||
if [[ -f "${runtime_config_dir}/docker-compose.override.yml" ]]; then
|
||||
log "INFO" "Using override for $service"
|
||||
compose_args+=("-f" "${runtime_config_dir}/docker-compose.override.yml")
|
||||
fi
|
||||
|
||||
# Determine .env
|
||||
local env_file=""
|
||||
if [[ -f "${runtime_config_dir}/.env" ]]; then
|
||||
env_file="${runtime_config_dir}/.env"
|
||||
elif [[ -f "${svc_dir}/.env" ]]; then
|
||||
env_file="${svc_dir}/.env"
|
||||
fi
|
||||
|
||||
local run_cmd=("docker" "compose")
|
||||
run_cmd+=("${compose_args[@]}")
|
||||
if [[ -n "$env_file" ]]; then
|
||||
run_cmd+=("--env-file" "$env_file")
|
||||
fi
|
||||
run_cmd+=("up" "-d" "--remove-orphans")
|
||||
|
||||
log "INFO" "Running: ${run_cmd[*]}"
|
||||
if ! "${run_cmd[@]}"; then
|
||||
log "ERROR" "Docker compose failed for $service"
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
export -f run_compose_up
|
||||
53
scripts/lib/diagnostics.sh
Normal file
53
scripts/lib/diagnostics.sh
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
#!/usr/bin/env bash
|
||||
# diagnostics.sh - Deployment failure diagnostics
|
||||
|
||||
collect_diagnostics() {
|
||||
local host=$1
|
||||
local service=$2
|
||||
log "INFO" "Stage: DIAGNOSE ($host - ${service:-all})"
|
||||
|
||||
local diag_file="${LOG_DIR}/diagnostics_${TIMESTAMP}.txt"
|
||||
{
|
||||
echo "--- DIAGNOSTICS FOR ${service:-all} (Host: $host, Time: $(date)) ---"
|
||||
echo "Uptime: $(uptime)"
|
||||
echo "Memory: $(free -h)"
|
||||
echo "Disk: $(df -h /)"
|
||||
echo "--- Docker Status ---"
|
||||
docker ps --filter "name=${service:-}"
|
||||
|
||||
if [[ -n "$service" ]]; then
|
||||
local svc_dir="${REPO_PATH}/services/$service"
|
||||
if [[ -d "$svc_dir" ]]; then
|
||||
echo "--- $service Logs ---"
|
||||
cd "$svc_dir" && docker compose logs --tail=50
|
||||
fi
|
||||
fi
|
||||
echo "--- END DIAGNOSTICS ---"
|
||||
} > "$diag_file" 2>&1
|
||||
|
||||
# Also output to console for immediate visibility
|
||||
cat "$diag_file"
|
||||
log "INFO" "Diagnostics stored in $diag_file"
|
||||
}
|
||||
|
||||
print_summary() {
|
||||
local host=$1
|
||||
local status=$2
|
||||
local last_stage=$(get_stage)
|
||||
local last_service=$(get_last_service)
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo " DEPLOYMENT SUMMARY"
|
||||
echo "=========================================="
|
||||
echo "Host: $host"
|
||||
echo "Status: $status"
|
||||
echo "Last Stage: $last_stage"
|
||||
[[ -n "$last_service" ]] && echo "Last Service: $last_service"
|
||||
echo "Log File: $LOG_FILE"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
}
|
||||
|
||||
export -f collect_diagnostics
|
||||
export -f print_summary
|
||||
45
scripts/lib/inventory.sh
Normal file
45
scripts/lib/inventory.sh
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
#!/usr/bin/env bash
|
||||
# inventory.sh - Host and service discovery
|
||||
|
||||
load_inventory() {
|
||||
local host=$1
|
||||
local service_override=$2
|
||||
|
||||
log "INFO" "Loading inventory for host: $host"
|
||||
|
||||
if [[ ! -d "${REPO_PATH}/hosts/${host}" ]]; then
|
||||
log "ERROR" "Host directory not found: ${REPO_PATH}/hosts/${host}"
|
||||
return 1
|
||||
fi
|
||||
|
||||
if [[ -n "$service_override" ]]; then
|
||||
SERVICES=("$service_override")
|
||||
else
|
||||
if [[ -f "${REPO_PATH}/hosts/${host}/services.txt" ]]; then
|
||||
# Read services from text file, ignoring comments and empty lines
|
||||
mapfile -t SERVICES < <(grep -v '^\s*#' "${REPO_PATH}/hosts/${host}/services.txt" | grep -v '^\s*$')
|
||||
elif [[ -f "${REPO_PATH}/hosts/${host}/services.yaml" ]]; then
|
||||
# Use python for reliable YAML parsing
|
||||
SERVICES=($(python3 -c "
|
||||
import yaml, sys
|
||||
try:
|
||||
with open('${REPO_PATH}/hosts/${host}/services.yaml', 'r') as f:
|
||||
data = yaml.safe_load(f)
|
||||
if data and 'services' in data:
|
||||
if isinstance(data['services'], dict):
|
||||
print(' '.join(data['services'].keys()))
|
||||
elif isinstance(data['services'], list):
|
||||
print(' '.join(data['services']))
|
||||
except Exception as e:
|
||||
print(f'Error parsing YAML: {e}', file=sys.stderr)
|
||||
sys.exit(1)
|
||||
"))
|
||||
else
|
||||
log "WARN" "No services found for $host"
|
||||
SERVICES=()
|
||||
fi
|
||||
fi
|
||||
log "INFO" "Services to process: ${SERVICES[*]}"
|
||||
}
|
||||
|
||||
export -f load_inventory
|
||||
23
scripts/lib/log.sh
Normal file
23
scripts/lib/log.sh
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
#!/usr/bin/env bash
|
||||
# log.sh - Logging utilities for homelab deployment
|
||||
|
||||
log() {
|
||||
local level=$1
|
||||
shift
|
||||
local message=$*
|
||||
echo "[$(date +'%Y-%m-%d %H:%M:%S')] [$level] $message"
|
||||
}
|
||||
|
||||
# Structured log for machine reading
|
||||
# timestamp, stage, host, service, command_result, info
|
||||
struct_log() {
|
||||
local stage=$1
|
||||
local host=$2
|
||||
local service=$3
|
||||
local result=$4
|
||||
local info=$5
|
||||
log "STRUCT" "stage=$stage host=$host service=$service result=$result info=\"$info\""
|
||||
}
|
||||
|
||||
export -f log
|
||||
export -f struct_log
|
||||
51
scripts/lib/state.sh
Normal file
51
scripts/lib/state.sh
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
#!/usr/bin/env bash
|
||||
# state.sh - Deployment state management
|
||||
|
||||
set_stage() {
|
||||
local stage=$1
|
||||
echo "$stage" > "${STATE_DIR}/current_stage"
|
||||
}
|
||||
|
||||
get_stage() {
|
||||
if [[ -f "${STATE_DIR}/current_stage" ]]; then
|
||||
cat "${STATE_DIR}/current_stage"
|
||||
else
|
||||
echo "none"
|
||||
fi
|
||||
}
|
||||
|
||||
mark_stage_complete() {
|
||||
local stage=$1
|
||||
touch "${STATE_DIR}/stage_${stage}_complete"
|
||||
}
|
||||
|
||||
is_stage_complete() {
|
||||
local stage=$1
|
||||
[[ -f "${STATE_DIR}/stage_${stage}_complete" ]]
|
||||
}
|
||||
|
||||
clear_deployment_state() {
|
||||
rm -f "${STATE_DIR}"/stage_*_complete
|
||||
rm -f "${STATE_DIR}/current_stage"
|
||||
rm -f "${STATE_DIR}/last_service"
|
||||
}
|
||||
|
||||
set_last_service() {
|
||||
echo "$1" > "${STATE_DIR}/last_service"
|
||||
}
|
||||
|
||||
get_last_service() {
|
||||
if [[ -f "${STATE_DIR}/last_service" ]]; then
|
||||
cat "${STATE_DIR}/last_service"
|
||||
else
|
||||
echo ""
|
||||
fi
|
||||
}
|
||||
|
||||
export -f set_stage
|
||||
export -f get_stage
|
||||
export -f mark_stage_complete
|
||||
export -f is_stage_complete
|
||||
export -f clear_deployment_state
|
||||
export -f set_last_service
|
||||
export -f get_last_service
|
||||
Loading…
Reference in a new issue