diff --git a/docs/stability-agent-rollout.md b/docs/stability-agent-rollout.md index 412a2a6..8facc6f 100644 --- a/docs/stability-agent-rollout.md +++ b/docs/stability-agent-rollout.md @@ -11,51 +11,35 @@ The `stability-agent` is a lightweight Python service that monitors node health ## Why UI only showed CHELSTY Previously, the `stability-agent` had `NODE_NAME` defaulted to `chelsty` and was only deployed there. The Agent System UI materializer on PIHA filters nodes based on the Redis keys `homelab:nodes:`. Without other agents publishing their specific `NODE_NAME`, the UI remained limited to the single active node. -## Deployment Commands +## Deployment -Use the helper script to generate commands: +Use the helper script to deploy or generate commands: ```bash +# Print commands ./scripts/deploy/deploy-stability-agent.sh + +# Deploy via SSH (requires SSH access to the node) +./scripts/deploy/deploy-stability-agent.sh --ssh ``` -### PIHA +### Manual Steps per Node +The manual steps are encapsulated in `services/stability-agent/deploy-local.sh`. On the target node: ```bash cd ~/homelab-codex-ws git pull cd services/stability-agent -NODE_NAME=piha REDIS_HOST=100.108.208.3 REDIS_PORT=6379 REDIS_ENABLED=true docker compose up -d --build --force-recreate +./deploy-local.sh ``` -### CHELSTY +## Verification + +### Fleet Overview +Run the verification script from any node with `redis-cli` access: ```bash -cd ~/homelab-codex-ws -git pull -cd services/stability-agent -NODE_NAME=chelsty REDIS_HOST=100.108.208.3 REDIS_PORT=6379 REDIS_ENABLED=true docker compose up -d --build --force-recreate +./scripts/deploy/verify-agent-fleet.sh ``` -### SOLARIA -```bash -cd ~/homelab-codex-ws -git pull -cd services/stability-agent -NODE_NAME=solaria REDIS_HOST=100.108.208.3 REDIS_PORT=6379 REDIS_ENABLED=true docker compose up -d --build --force-recreate -``` - -### VPS -```bash -cd ~/homelab-codex-ws -git pull -cd services/stability-agent -NODE_NAME=vps REDIS_HOST=100.108.208.3 REDIS_PORT=6379 REDIS_ENABLED=true docker compose up -d --build --force-recreate -``` - -### SATURN (Optional) -Saturn is the orchestrator and can optionally run the stability-agent. If deployed, follow the same pattern with `NODE_NAME=saturn`. - -## Verification (on PIHA) - -Verify Redis keys: +### Redis Inspection (on PIHA) ```bash docker exec agent-system-redis redis-cli KEYS 'homelab:nodes:*' docker exec agent-system-redis redis-cli HGETALL homelab:nodes: diff --git a/scripts/deploy/deploy-stability-agent.sh b/scripts/deploy/deploy-stability-agent.sh index b0a82ec..b97528b 100755 --- a/scripts/deploy/deploy-stability-agent.sh +++ b/scripts/deploy/deploy-stability-agent.sh @@ -1,38 +1,43 @@ #!/usr/bin/env bash -# deploy-stability-agent.sh - Helper to print deployment commands for stability-agent +# deploy-stability-agent.sh - Helper to deploy stability-agent (print or SSH) -NODE=$1 +TARGET=$1 +MODE="print" REPO_PATH="~/homelab-codex-ws" -if [[ -z "$NODE" ]]; then - echo "Usage: $0 " +if [[ "$2" == "--ssh" ]]; then + MODE="ssh" +fi + +if [[ -z "$TARGET" ]]; then + echo "Usage: $0 [--ssh]" echo "Supported nodes: chelsty, piha, solaria, vps" exit 1 fi -case "$NODE" in +case "$TARGET" in chelsty|piha|solaria|vps) ;; *) - echo "Error: Unknown node '$NODE'" + echo "Error: Unknown node '$TARGET'" echo "Supported nodes: chelsty, piha, solaria, vps" exit 1 ;; esac -echo "# --- Deployment commands for $NODE ---" -echo "cd $REPO_PATH" -echo "git fetch origin" -echo "git checkout master" -echo "git pull" -echo "cd services/stability-agent" -echo "" -echo "# Command (Docker Compose V2):" -echo "NODE_NAME=$NODE REDIS_HOST=100.108.208.3 REDIS_PORT=6379 REDIS_ENABLED=true docker compose up -d --build --force-recreate" -echo "" -echo "# Command (Docker Compose V1):" -echo "NODE_NAME=$NODE REDIS_HOST=100.108.208.3 REDIS_PORT=6379 REDIS_ENABLED=true docker-compose up -d --build --force-recreate" -echo "" -echo "# Notes:" -echo "# - If using host-specific overrides: add '-f ../../hosts/$NODE/runtime/stability-agent/docker-compose.override.yml'" -echo "# - Ensure /opt/homelab/state and /opt/homelab/events exist on the host." +if [[ "$MODE" == "ssh" ]]; then + echo "--- Deploying to $TARGET via SSH ---" + ssh "$TARGET" "cd $REPO_PATH && git fetch origin && git checkout master && git pull && cd services/stability-agent && ./deploy-local.sh" +else + echo "# --- Deployment commands for $TARGET ---" + echo "cd $REPO_PATH" + echo "git fetch origin" + echo "git checkout master" + echo "git pull" + echo "cd services/stability-agent" + echo "./deploy-local.sh" + echo "" + echo "# Notes:" + echo "# - Run './deploy-local.sh' on the target host." + echo "# - Ensure /opt/homelab/state and /opt/homelab/events exist on the host." +fi diff --git a/scripts/deploy/verify-agent-fleet.sh b/scripts/deploy/verify-agent-fleet.sh new file mode 100755 index 0000000..c483067 --- /dev/null +++ b/scripts/deploy/verify-agent-fleet.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# verify-agent-fleet.sh - Check the status of stability agents across the fleet + +REDIS_HOST="100.108.208.3" +REDIS_PORT="6379" + +echo "--- Homelab Agent Fleet Status ---" + +# Check if redis-cli is available +if ! command -v redis-cli &> /dev/null; then + echo "Error: redis-cli not found. Please install it or run this on a node with Redis access." + echo "Expected Redis: $REDIS_HOST:$REDIS_PORT" + exit 1 +fi + +NODES=$(redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" --raw KEYS 'homelab:nodes:*' | sed 's/homelab:nodes://') + +if [[ -z "$NODES" ]]; then + echo "No nodes found in Redis." + exit 0 +fi + +printf "%-15s | %-10s | %-20s | %-10s\n" "NODE" "STATUS" "LAST HEARTBEAT" "DOCKER" +printf "%s\n" "--------------------------------------------------------------------------------" + +for NODE in $NODES; do + DATA=$(redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" HGETALL "homelab:nodes:$NODE") + + # Simple parser for HGETALL output (alternating key/value) + STATUS=$(echo "$DATA" | grep -A 1 "status" | tail -n 1) + HEARTBEAT=$(echo "$DATA" | grep -A 1 "timestamp" | tail -n 1) + CHECKS=$(echo "$DATA" | grep -A 1 "checks" | tail -n 1) + + DOCKER_STATUS="unknown" + if [[ "$CHECKS" == *"docker"* ]]; then + DOCKER_STATUS=$(echo "$CHECKS" | jq -r '.docker.status' 2>/dev/null || echo "error") + fi + + printf "%-15s | %-10s | %-20s | %-10s\n" "$NODE" "$STATUS" "$HEARTBEAT" "$DOCKER_STATUS" +done + +echo "" +echo "Events (last 5):" +redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" XREVRANGE homelab:events + - COUNT 5 diff --git a/services/stability-agent/deploy-local.sh b/services/stability-agent/deploy-local.sh new file mode 100755 index 0000000..09d5117 --- /dev/null +++ b/services/stability-agent/deploy-local.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# deploy-local.sh - Local deployment script for stability-agent +# This script is intended to be run on the target node. + +set -e + +# Default values +NODE_NAME=${NODE_NAME:-$(hostname)} +REDIS_HOST=${REDIS_HOST:-100.108.208.3} +REDIS_PORT=${REDIS_PORT:-6379} +REDIS_ENABLED=${REDIS_ENABLED:-true} + +echo "--- Deploying stability-agent on $NODE_NAME ---" + +# Check for docker-compose or docker compose +if docker compose version >/dev/null 2>&1; then + DOCKER_COMPOSE="docker compose" +else + DOCKER_COMPOSE="docker-compose" +fi + +# Use host-specific override if it exists +OVERRIDE_FILE="../../hosts/$NODE_NAME/runtime/stability-agent/docker-compose.override.yml" +COMPOSE_ARGS="-f docker-compose.yml" + +if [ -f "$OVERRIDE_FILE" ]; then + echo "Using override file: $OVERRIDE_FILE" + COMPOSE_ARGS="$COMPOSE_ARGS -f $OVERRIDE_FILE" +fi + +# Run deployment +NODE_NAME=$NODE_NAME \ +REDIS_HOST=$REDIS_HOST \ +REDIS_PORT=$REDIS_PORT \ +REDIS_ENABLED=$REDIS_ENABLED \ +$DOCKER_COMPOSE $COMPOSE_ARGS up -d --build --force-recreate + +echo "Deployment finished."