Finish repo-first implementation of Agent System UI pipeline
Co-authored-by: Junie <junie@jetbrains.com>
This commit is contained in:
parent
41c05f42b5
commit
12a775c834
37
services/agent-system/README.md
Normal file
37
services/agent-system/README.md
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
### Agent System
|
||||
Central runtime materializer and Operator Control Plane UI.
|
||||
|
||||
#### Components
|
||||
- **Redis**: Central state store (on PIHA).
|
||||
- **Runtime Materializer**: Converts Redis state to JSON files in `/opt/homelab/world`.
|
||||
- **Web UI**: Exposes API endpoints and serving the Operator UI.
|
||||
|
||||
#### Deployment (on PIHA)
|
||||
```bash
|
||||
cd services/agent-system
|
||||
./deploy.sh
|
||||
```
|
||||
|
||||
#### Deployment (on CHELSTY)
|
||||
```bash
|
||||
cd services/stability-agent
|
||||
docker compose up -d --build
|
||||
```
|
||||
|
||||
#### Verification
|
||||
The `deploy.sh` script automatically verifies the local endpoints.
|
||||
You can also manually check:
|
||||
```bash
|
||||
# Check runtime summary
|
||||
curl http://localhost:18180/summary
|
||||
|
||||
# Check discovered nodes
|
||||
curl http://localhost:18180/nodes
|
||||
|
||||
# Check discovered services
|
||||
curl http://localhost:18180/services
|
||||
```
|
||||
|
||||
#### Directory Structure
|
||||
- `/opt/homelab/world`: Contains materialized JSON state.
|
||||
- `/opt/homelab/state`: Contains operator configuration and local heartbeats.
|
||||
22
services/agent-system/deploy.sh
Executable file
22
services/agent-system/deploy.sh
Executable file
|
|
@ -0,0 +1,22 @@
|
|||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
echo ">>> Validating docker-compose configuration..."
|
||||
docker compose config
|
||||
|
||||
echo ">>> Building and starting Agent System services..."
|
||||
docker compose up -d --build
|
||||
|
||||
echo ">>> Services status:"
|
||||
docker ps --filter "name=agent-system"
|
||||
|
||||
echo ">>> Verifying API endpoints..."
|
||||
sleep 5 # Give it a moment to start
|
||||
|
||||
endpoints=("summary" "nodes" "services")
|
||||
for ep in "${endpoints[@]}"; do
|
||||
echo "Checking /$ep..."
|
||||
curl -s -f http://localhost:18180/$ep > /dev/null && echo " OK" || echo " FAILED"
|
||||
done
|
||||
|
||||
echo ">>> Deployment complete."
|
||||
|
|
@ -30,6 +30,17 @@ def safe_json_loads(data, default=None):
|
|||
except (json.JSONDecodeError, TypeError):
|
||||
return data
|
||||
|
||||
def normalize_health(health):
|
||||
"""Normalizes health values for the UI."""
|
||||
if not health:
|
||||
return "nominal"
|
||||
h = str(health).lower()
|
||||
if h in ["healthy", "ok", "running", "nominal"]:
|
||||
return "nominal"
|
||||
if h in ["degraded", "warning"]:
|
||||
return "degraded"
|
||||
return "error"
|
||||
|
||||
def materialize():
|
||||
"""Reads state from Redis and writes JSON files to the world directory."""
|
||||
print(f"[{datetime.now().isoformat()}] Materializing world state...")
|
||||
|
|
@ -42,6 +53,9 @@ def materialize():
|
|||
for key in node_keys:
|
||||
node_data = r.hgetall(key)
|
||||
if node_data:
|
||||
# Normalize health
|
||||
if "health" in node_data:
|
||||
node_data["health"] = normalize_health(node_data["health"])
|
||||
# Parse JSON fields if they exist
|
||||
if "capabilities" in node_data:
|
||||
node_data["capabilities"] = safe_json_loads(node_data["capabilities"], [])
|
||||
|
|
@ -55,6 +69,9 @@ def materialize():
|
|||
for key in service_keys:
|
||||
svc_data = r.hgetall(key)
|
||||
if svc_data:
|
||||
# Normalize health
|
||||
if "health" in svc_data:
|
||||
svc_data["health"] = normalize_health(svc_data["health"])
|
||||
if "dependencies" in svc_data:
|
||||
svc_data["dependencies"] = safe_json_loads(svc_data["dependencies"], [])
|
||||
if "recommendations" in svc_data:
|
||||
|
|
@ -82,6 +99,9 @@ def materialize():
|
|||
for key in incident_keys:
|
||||
incident_data = r.hgetall(key)
|
||||
if incident_data:
|
||||
# Normalize health if present
|
||||
if "health" in incident_data:
|
||||
incident_data["health"] = normalize_health(incident_data["health"])
|
||||
incidents.append(incident_data)
|
||||
|
||||
# 5. Deployments (Hash)
|
||||
|
|
@ -101,13 +121,26 @@ def materialize():
|
|||
recommendations.append(rec_data)
|
||||
|
||||
# 7. Runtime Summary
|
||||
unhealthy_services = [s for s in services if s.get("health") != "nominal"]
|
||||
active_incidents = [i for i in incidents if i.get("status") not in ["resolved", "closed"]]
|
||||
|
||||
status = "nominal"
|
||||
if len(active_incidents) > 0 or len(unhealthy_services) > 5:
|
||||
status = "error"
|
||||
elif len(unhealthy_services) > 0:
|
||||
status = "degraded"
|
||||
|
||||
summary = {
|
||||
"status": status,
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
"last_update": int(time.time()),
|
||||
"node_count": len(nodes),
|
||||
"service_count": len(services),
|
||||
"unhealthy_services_count": len([s for s in services if s.get("health") != "healthy"]),
|
||||
"active_incidents_count": len(active_incidents),
|
||||
"unhealthy_services_count": len(unhealthy_services),
|
||||
"incident_count": len(incidents),
|
||||
"recent_events_count": len(events)
|
||||
"recent_events_count": len(events),
|
||||
"stale": False
|
||||
}
|
||||
|
||||
# Ensure directory exists
|
||||
|
|
|
|||
|
|
@ -47,45 +47,37 @@ def save_config(config):
|
|||
|
||||
|
||||
def current_nodes():
|
||||
return read_json_file(STATE_DIR / "nodes.json")
|
||||
return read_json_file(WORLD_DIR / "nodes.json")
|
||||
|
||||
|
||||
def current_services():
|
||||
return read_json_file(STATE_DIR / "services.json")
|
||||
return read_json_file(WORLD_DIR / "services.json")
|
||||
|
||||
|
||||
def current_deployments():
|
||||
return read_json_file(STATE_DIR / "deployments.json")
|
||||
return read_json_file(WORLD_DIR / "deployments.json")
|
||||
|
||||
|
||||
def current_incidents():
|
||||
return read_json_file(STATE_DIR / "incidents.json")
|
||||
return read_json_file(WORLD_DIR / "incidents.json")
|
||||
|
||||
|
||||
def current_recommendations():
|
||||
return read_json_file(STATE_DIR / "recommendations.json")
|
||||
return read_json_file(WORLD_DIR / "recommendations.json")
|
||||
|
||||
|
||||
def current_summary():
|
||||
summary = read_json_file(STATE_DIR / "runtime-summary.json", default={})
|
||||
summary = read_json_file(WORLD_DIR / "runtime-summary.json", default={})
|
||||
if summary:
|
||||
# Check for staleness
|
||||
mtime = os.path.getmtime(STATE_DIR / "runtime-summary.json")
|
||||
mtime = os.path.getmtime(WORLD_DIR / "runtime-summary.json")
|
||||
summary["last_update"] = mtime
|
||||
summary["stale"] = (time.time() - mtime) > 60 # Stale if older than 60s
|
||||
return summary
|
||||
|
||||
|
||||
def current_events():
|
||||
events = []
|
||||
if EVENTS_DIR.exists():
|
||||
for f in EVENTS_DIR.glob("*.json"):
|
||||
data = read_json_file(f)
|
||||
if data:
|
||||
# Add source file for traceability
|
||||
data["_source"] = f.name
|
||||
events.append(data)
|
||||
return sorted(events, key=lambda x: x.get("timestamp", 0), reverse=True)
|
||||
return read_json_file(WORLD_DIR / "events.json", default=[])
|
||||
|
||||
|
||||
def current_actions():
|
||||
|
|
|
|||
|
|
@ -293,7 +293,7 @@ def main():
|
|||
|
||||
redis_client.hset(f"homelab:nodes:{NODE_NAME}", {
|
||||
"id": NODE_NAME,
|
||||
"hostname": socket.gethostname(),
|
||||
"hostname": NODE_NAME,
|
||||
"health": node_health,
|
||||
"status": "online",
|
||||
"last_seen": status["timestamp"],
|
||||
|
|
|
|||
Loading…
Reference in a new issue