Fix agent fleet verification via Redis container

This commit is contained in:
oskar 2026-05-17 23:00:51 +02:00
parent b129f03837
commit c299a2cb85

View file

@ -1,44 +1,68 @@
#!/usr/bin/env bash #!/usr/bin/env bash
# verify-agent-fleet.sh - Check the status of stability agents across the fleet # verify-agent-fleet.sh - Check the status of stability agents across the fleet
REDIS_HOST="100.108.208.3" REDIS_CMD="docker exec agent-system-redis redis-cli --raw"
REDIS_PORT="6379"
echo "--- Homelab Agent Fleet Status ---" # Check if docker is available
if ! command -v docker &> /dev/null; then
# Check if redis-cli is available echo "Error: docker command not found."
if ! command -v redis-cli &> /dev/null; then
echo "Error: redis-cli not found. Please install it or run this on a node with Redis access."
echo "Expected Redis: $REDIS_HOST:$REDIS_PORT"
exit 1 exit 1
fi fi
NODES=$(redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" --raw KEYS 'homelab:nodes:*' | sed 's/homelab:nodes://') # Check if container is running
if ! docker ps --filter "name=agent-system-redis" --format "{{.Names}}" | grep -q "agent-system-redis"; then
if [[ -z "$NODES" ]]; then echo "Error: agent-system-redis container not found or not running."
echo "No nodes found in Redis." echo "This script must be run on PIHA (the node hosting the Redis container)."
exit 0 exit 1
fi fi
printf "%-15s | %-10s | %-20s | %-10s\n" "NODE" "STATUS" "LAST HEARTBEAT" "DOCKER" REQUIRED_NODES=("piha" "chelsty" "solaria" "vps")
MISSING_NODES=0
echo "--- Homelab Agent Fleet Status ---"
printf "%-10s %-15s %-10s %-10s %-30s\n" "NODE" "HOSTNAME" "HEALTH" "STATUS" "LAST_SEEN"
printf "%s\n" "--------------------------------------------------------------------------------" printf "%s\n" "--------------------------------------------------------------------------------"
for NODE in $NODES; do for NODE in "${REQUIRED_NODES[@]}"; do
DATA=$(redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" HGETALL "homelab:nodes:$NODE") KEY="homelab:nodes:$NODE"
# Simple parser for HGETALL output (alternating key/value) # Check if key exists
STATUS=$(echo "$DATA" | grep -A 1 "status" | tail -n 1) EXISTS=$($REDIS_CMD EXISTS "$KEY" 2>/dev/null | tr -d '\r\n')
HEARTBEAT=$(echo "$DATA" | grep -A 1 "timestamp" | tail -n 1)
CHECKS=$(echo "$DATA" | grep -A 1 "checks" | tail -n 1) if [[ "$EXISTS" != "1" ]]; then
printf "%-10s %-15s %-10s %-10s %-30s\n" "$NODE" "MISSING" "N/A" "N/A" "N/A"
DOCKER_STATUS="unknown" MISSING_NODES=$((MISSING_NODES + 1))
if [[ "$CHECKS" == *"docker"* ]]; then continue
DOCKER_STATUS=$(echo "$CHECKS" | jq -r '.docker.status' 2>/dev/null || echo "error")
fi fi
printf "%-15s | %-10s | %-20s | %-10s\n" "$NODE" "$STATUS" "$HEARTBEAT" "$DOCKER_STATUS" HOSTNAME=$($REDIS_CMD HGET "$KEY" hostname 2>/dev/null | tr -d '\r\n')
HEALTH=$($REDIS_CMD HGET "$KEY" health 2>/dev/null | tr -d '\r\n')
STATUS=$($REDIS_CMD HGET "$KEY" status 2>/dev/null | tr -d '\r\n')
LAST_SEEN=$($REDIS_CMD HGET "$KEY" last_seen 2>/dev/null | tr -d '\r\n')
printf "%-10s %-15s %-10s %-10s %-30s\n" "$NODE" "$HOSTNAME" "$HEALTH" "$STATUS" "$LAST_SEEN"
done done
echo "" echo ""
echo "Events (last 5):" echo "--- Control Plane Summary ---"
redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" XREVRANGE homelab:events + - COUNT 5 if command -v jq >/dev/null; then
curl -s http://127.0.0.1:18180/summary | jq .
else
curl -s http://127.0.0.1:18180/summary
fi
echo ""
echo "--- Control Plane Nodes ---"
if command -v jq >/dev/null; then
curl -s http://127.0.0.1:18180/nodes | jq .
else
curl -s http://127.0.0.1:18180/nodes
fi
if [[ $MISSING_NODES -gt 0 ]]; then
echo ""
echo "Error: $MISSING_NODES required nodes are missing from Redis."
exit 1
fi
exit 0