From 9c883c2d69507a1e725944874c2d3f72ec8675ec Mon Sep 17 00:00:00 2001 From: Oskar Kapala Date: Tue, 12 May 2026 18:48:35 +0200 Subject: [PATCH] Integrate operator control plane with live homelab runtime --- .output.txt | 1040 ------------------------ compose/monitor-agent.yml | 9 - docker-compose.yml | 34 +- docs/operator/approval-workflow.md | 20 +- docs/operator/stale-state-semantics.md | 25 + monitor-agent/Dockerfile | 6 - monitor-agent/main.py | 117 --- monitor-agent/requirements.txt | 0 node-agent/Dockerfile | 14 - node-agent/main.py | 116 --- node-agent/requirements.txt | 1 - orchestrator/Dockerfile | 10 - orchestrator/diagnosis.py | 80 -- orchestrator/events.py | 35 - orchestrator/main.py | 408 ---------- orchestrator/redis_client.py | 8 - orchestrator/requirements.txt | 1 - orchestrator/result_listener.py | 567 ------------- orchestrator/task_builder.py | 11 - scripts/executor/executor.py | 225 ----- scripts/executor/test_actions.sh | 74 -- scripts/supervisor/supervisor.py | 363 --------- scripts/supervisor/test_scenarios.sh | 79 -- webui/index.html | 42 +- webui/web.py | 241 ++---- 25 files changed, 136 insertions(+), 3390 deletions(-) delete mode 100644 .output.txt delete mode 100644 compose/monitor-agent.yml create mode 100644 docs/operator/stale-state-semantics.md delete mode 100644 monitor-agent/Dockerfile delete mode 100644 monitor-agent/main.py delete mode 100644 monitor-agent/requirements.txt delete mode 100644 node-agent/Dockerfile delete mode 100644 node-agent/main.py delete mode 100644 node-agent/requirements.txt delete mode 100644 orchestrator/Dockerfile delete mode 100644 orchestrator/diagnosis.py delete mode 100644 orchestrator/events.py delete mode 100644 orchestrator/main.py delete mode 100644 orchestrator/redis_client.py delete mode 100644 orchestrator/requirements.txt delete mode 100644 orchestrator/result_listener.py delete mode 100644 orchestrator/task_builder.py delete mode 100644 scripts/executor/executor.py delete mode 100644 scripts/executor/test_actions.sh delete mode 100644 scripts/supervisor/supervisor.py delete mode 100644 scripts/supervisor/test_scenarios.sh diff --git a/.output.txt b/.output.txt deleted file mode 100644 index 419b487..0000000 --- a/.output.txt +++ /dev/null @@ -1,1040 +0,0 @@ -Command finished with exit code 0. -Command output: -diff --git a/.gitignore b/.gitignore -index 569bec1..ad8f693 100644 ---- a/.gitignore -+++ b/.gitignore -@@ -4,4 +4,3 @@ __pycache__/ - - .idea/ - .vscode/ --tmp/ -diff --git a/docs/action-queue-system.md b/docs/action-queue-system.md -deleted file mode 100644 -index 614ae1b..0000000 ---- a/docs/action-queue-system.md -+++ /dev/null -@@ -1,75 +0,0 @@ --# Action Queue System -- --The Action Queue System provides a safe, filesystem-first lifecycle for operational actions in the homelab platform. It enables controlled execution with mandatory approval for high-risk operations. -- --## Action Lifecycle -- --Actions move through various states, represented by directories under `/opt/homelab/actions/`: -- --1. **Pending** (`pending/`): Actions proposed by the Supervisor or other agents. --2. **Approved** (`approved/`): Actions that have been reviewed and approved for execution. --3. **Running** (`running/`): Actions currently being processed by the Executor. --4. **Completed** (`completed/`): Successfully executed actions. --5. **Failed** (`failed/`): Actions that encountered errors during execution. --6. **Rejected** (`rejected/`): Proposed actions that were explicitly denied. -- --## Action Schema -- --Actions are stored as JSON documents with the following structure: -- --```json --{ -- "action_id": "uuid", -- "created_at": 1620000000.0, -- "proposed_by": "supervisor", -- "correlation_id": "uuid", -- "node": "node-name", -- "service": "service-name", -- "action_type": "redeploy_service", -- "risk_level": "guarded", -- "confidence": 0.9, -- "approval_required": true, -- "autonomous_eligible": false, -- "status": "pending", -- "payload": { ... }, -- "rollback_reference": null --} --``` -- --## Safety Model -- --Actions are categorized into safety classes: -- --- **Safe**: Low-risk actions that may be eligible for autonomous execution in the future (e.g., `collect_diagnostics`, `rerun_healthcheck`). --- **Guarded**: Actions that default to requiring approval but could be automated under strict conditions (e.g., `redeploy_service`, `rerun_deployment_stage`). --- **Dangerous**: High-risk actions that ALWAYS require manual approval. -- --Currently, the platform operates in a **Recommendation-Only** mode where even `safe` actions require explicit approval. -- --## Initial Action Types -- --- `redeploy_service`: Restarts or redeploys a service container. --- `rerun_healthcheck`: Triggers an immediate health check. --- `rerun_deployment_stage`: Retries a specific stage of a failed deployment. --- `collect_diagnostics`: Gathers logs and metrics for troubleshooting. -- --## Executor -- --The Executor (`scripts/executor/executor.py`) is responsible for processing approved actions. It features: -- --- **Process Approved Only**: Only actions in the `approved/` directory are processed. --- **Recommendation-Safe**: Simulation-based execution that logs intended mutations without side effects. --- **Idempotency**: Designed to be safe to run multiple times. --- **Resumable State**: If interrupted, it will pick up actions in the `running/` state. --- **Append-Only History**: Maintains a `history.log` of all action transitions. -- --## Rollback Concepts -- --Every action schema includes a `rollback_reference`. In future iterations, this will point to the previous stable state or a reverse action that can be triggered if the current action fails or causes further instability. -- --## Future Autonomous Execution -- --The system is designed to transition to autonomous execution by: --1. Identifying `safe` actions with high `confidence` scores. --2. Matching them against a `policy-engine`. --3. Automatically moving them from `pending/` to `approved/` based on allowed safety guardrails. -diff --git a/docs/operator/approval-workflow.md b/docs/operator/approval-workflow.md -deleted file mode 100644 -index 9312ae1..0000000 ---- a/docs/operator/approval-workflow.md -+++ /dev/null -@@ -1,27 +0,0 @@ --# Operator Approval Workflow -- --This document describes the process of reviewing and approving actions generated by the reconciliation supervisor. -- --## Workflow Stages -- --### 1. Action Identification --When the supervisor identifies a delta between desired and actual state, it generates a pending action in `/opt/homelab/actions/pending/`. -- --### 2. Risk Assessment --Actions are categorized by risk level: --- **Safe**: Low impact, high confidence. Can be auto-approved in autonomous mode. --- **Guarded**: Moderate impact. Requires explicit operator approval. --- **Dangerous**: High impact (e.g., node redeploy). Requires multi-step approval or senior operator override. -- --### 3. Review Process --1. Navigate to the **Action Queue** view. --2. Review the **Confidence Score** and **Correlation Chain** to understand why the action was proposed. --3. Check the **Rollback Availability**. -- --### 4. Decision --- **Approve**: Moves action to `approved` state. --- **Reject**: Moves action to `rejected` state and suppresses similar recommendations for a cooldown period. --- **Execute**: Transitions an approved action to `running` status. -- --## Mobile Approvals --Approval requests can be acknowledged via the Telegram bot integration, allowing for remote operational control. -diff --git a/docs/operator/incident-remediation.md b/docs/operator/incident-remediation.md -deleted file mode 100644 -index e11ca9d..0000000 ---- a/docs/operator/incident-remediation.md -+++ /dev/null -@@ -1,24 +0,0 @@ --# Incident Remediation Guide -- --Guide for operators responding to system incidents using the Control Plane. -- --## Remediation Flow -- --### 1. Detection --Incidents appear in the **Active Incidents** card on the Dashboard and in the **Events** timeline. -- --### 2. Correlation --Use the **Correlation** view to see: --- The event chain leading to the incident. --- Automated recommendations generated in response. --- Any manual actions already taken. -- --### 3. Intervention --1. Review the recommended actions in the **Action Queue**. --2. If the automated recommendation is not sufficient, use the **Nodes** or **Services** view to manually trigger commands. --3. Observe the **Runtime Topology** to ensure no cascading failures occur during remediation. -- --### 4. Verification --Once actions are completed, verify the system state: --- Health badges should transition back to **Nominal**. --- The **System Status** in the sidebar should reflect a healthy state. -diff --git a/docs/operator/reconcile-review.md b/docs/operator/reconcile-review.md -index e56c333..8b99c6f 100644 ---- a/docs/operator/reconcile-review.md -+++ b/docs/operator/reconcile-review.md -@@ -2,13 +2,11 @@ - - The system continuously monitors for drift between desired and actual state. - --1. **Drift Detection**: When drift is detected, the supervisor generates a recommendation and a corresponding pending action. --2. **Review**: Navigate to the **Recommendations** view for a high-level summary, or the **Action Queue** for the specific execution plan. --3. **Approval**: For 'guarded' or 'dangerous' actions, click **Approve** in the Action Queue. --4. **Execution**: Once approved, the action can be triggered manually by clicking **Execute**, or it will be picked up by the autonomous executor if the system is in `AUTONOMOUS` mode. --5. **Observation**: Monitor the **Deployments** and **Topology** views to watch the reconciliation in real-time. -- --Risk Levels: --- **Safe**: Minimal impact, high success rate. --- **Guarded**: Potential brief service interruption. --- **Dangerous**: Significant impact, potential data loss, or node-level disruption. -+1. If a service is in RECONCILING state, check the Services view. -+2. Review the Recommendations view for automated or guarded actions. -+3. For 'safe' actions with high confidence, the system may act autonomously if enabled. -+4. For 'guarded' or 'dangerous' actions, an operator must manually approve the action. -+5. Risk Levels: -+ - **Safe**: Minimal impact, high success rate. -+ - **Guarded**: Potential brief service interruption. -+ - **Dangerous**: Significant impact, potential data loss, or hardware interaction required. -diff --git a/scripts/executor/executor.py b/scripts/executor/executor.py -deleted file mode 100644 -index 12bb98f..0000000 ---- a/scripts/executor/executor.py -+++ /dev/null -@@ -1,225 +0,0 @@ --#!/usr/bin/env python3 --import os --import json --import time --import sys --import shutil --import uuid --from pathlib import Path -- --# Configuration --ACTIONS_ROOT = Path(os.getenv("HOMELAB_ACTIONS_ROOT", "/opt/homelab/actions")) --EVENT_LOG = Path("/tmp/agent-events.log") --HISTORY_LOG = ACTIONS_ROOT / "history.log" -- --def emit_event(event_type, message, details=None): -- """Emit action lifecycle events.""" -- event = { -- "type": event_type, -- "message": message, -- "timestamp": time.time(), -- "details": details or {} -- } -- line = json.dumps(event) -- print(line) -- try: -- with open(EVENT_LOG, "a") as f: -- f.write(line + "\n") -- f.flush() -- except Exception as e: -- print(f"Error writing to event log: {e}", file=sys.stderr) -- --def log_history(action_id, status, message): -- """Append-only execution history.""" -- entry = { -- "timestamp": time.time(), -- "action_id": action_id, -- "status": status, -- "message": message -- } -- try: -- with open(HISTORY_LOG, "a") as f: -- f.write(json.dumps(entry) + "\n") -- f.flush() -- except Exception as e: -- print(f"Error writing history: {e}", file=sys.stderr) -- --def ensure_dirs(): -- for d in ["pending", "approved", "running", "completed", "failed", "rejected"]: -- (ACTIONS_ROOT / d).mkdir(parents=True, exist_ok=True) -- --def approve_action(action_id): -- ensure_dirs() -- if not action_id.endswith(".json"): -- filename = f"{action_id}.json" -- else: -- filename = action_id -- -- pending_path = ACTIONS_ROOT / "pending" / filename -- if not pending_path.exists(): -- print(f"Action {filename} not found in pending.") -- return False -- -- approved_path = ACTIONS_ROOT / "approved" / filename -- -- try: -- with open(pending_path, "r") as f: -- action = json.load(f) -- -- action["status"] = "approved" -- action["approved_at"] = time.time() -- -- with open(pending_path, "w") as f: -- json.dump(action, f, indent=2) -- -- shutil.move(pending_path, approved_path) -- -- emit_event("action_approved", f"Action approved: {action['action_id']}", {"action_id": action['action_id']}) -- log_history(action['action_id'], "approved", "Manual approval received") -- print(f"Action {action['action_id']} approved.") -- return True -- except Exception as e: -- print(f"Error approving action: {e}") -- return False -- --def reject_action(action_id): -- ensure_dirs() -- if not action_id.endswith(".json"): -- filename = f"{action_id}.json" -- else: -- filename = action_id -- -- pending_path = ACTIONS_ROOT / "pending" / filename -- if not pending_path.exists(): -- print(f"Action {filename} not found in pending.") -- return False -- -- rejected_path = ACTIONS_ROOT / "rejected" / filename -- -- try: -- with open(pending_path, "r") as f: -- action = json.load(f) -- -- action["status"] = "rejected" -- action["rejected_at"] = time.time() -- -- with open(pending_path, "w") as f: -- json.dump(action, f, indent=2) -- -- shutil.move(pending_path, rejected_path) -- -- emit_event("action_rejected", f"Action rejected: {action['action_id']}", {"action_id": action['action_id']}) -- log_history(action['action_id'], "rejected", "Manual rejection received") -- print(f"Action {action['action_id']} rejected.") -- return True -- except Exception as e: -- print(f"Error rejecting action: {e}") -- return False -- --def process_action(action_path, dry_run=False): -- """Process a single approved action.""" -- try: -- with open(action_path, "r") as f: -- action = json.load(f) -- except Exception as e: -- print(f"Error reading action {action_path}: {e}") -- return -- -- action_id = action["action_id"] -- action_type = action["action_type"] -- -- # Move to running (Resumable execution state) -- running_path = ACTIONS_ROOT / "running" / action_path.name -- shutil.move(action_path, running_path) -- -- action["status"] = "running" -- action["started_at"] = time.time() -- with open(running_path, "w") as f: -- json.dump(action, f, indent=2) -- -- emit_event("action_started", f"Started action {action_id} ({action_type})", {"action_id": action_id}) -- log_history(action_id, "running", f"Execution started (dry_run={dry_run})") -- -- # Simulation logic (Recommendation-safe execution model) -- print(f"Executing {action_type} for {action.get('service') or action.get('node')}...") -- -- # Idempotent simulation: in a real world, we'd check if it's already done -- time.sleep(0.5) -- -- success = True -- if dry_run: -- print(f"[DRY-RUN] Would execute {action_type} logic here.") -- else: -- # Initial action types implementation (Simulation) -- if action_type == "redeploy_service": -- print(f"DEBUG: Triggering container restart/redeploy for {action.get('service')}") -- elif action_type == "rerun_healthcheck": -- print(f"DEBUG: Running healthcheck for {action.get('service')}") -- elif action_type == "rerun_deployment_stage": -- print(f"DEBUG: Retrying deployment stage for {action.get('service')}") -- elif action_type == "collect_diagnostics": -- print(f"DEBUG: Collecting logs and metrics for {action.get('service') or action.get('node')}") -- else: -- print(f"DEBUG: Executing unknown action type: {action_type}") -- -- # Finalize -- if success: -- final_status = "completed" -- target_dir = ACTIONS_ROOT / "completed" -- else: -- final_status = "failed" -- target_dir = ACTIONS_ROOT / "failed" -- -- final_path = target_dir / action_path.name -- action["status"] = final_status -- action["finished_at"] = time.time() -- -- with open(running_path, "w") as f: -- json.dump(action, f, indent=2) -- -- shutil.move(running_path, final_path) -- -- emit_event(f"action_{final_status}", f"Action {action_id} {final_status}", {"action_id": action_id}) -- log_history(action_id, final_status, "Execution finished") -- --def run_executor(dry_run=False): -- ensure_dirs() -- print(f"--- Executor Run: {time.ctime()} (dry_run={dry_run}) ---") -- -- # 1. Resume running actions -- running_actions = list((ACTIONS_ROOT / "running").glob("*.json")) -- for action_file in running_actions: -- print(f"Resuming action: {action_file.name}") -- process_action(action_file, dry_run=dry_run) -- -- # 2. Process approved actions -- approved_actions = list((ACTIONS_ROOT / "approved").glob("*.json")) -- if not approved_actions: -- print("No approved actions found.") -- else: -- for action_file in approved_actions: -- process_action(action_file, dry_run=dry_run) -- -- print("Run complete.") -- --if __name__ == "__main__": -- import argparse -- parser = argparse.ArgumentParser(description="Homelab Action Executor") -- parser.add_argument("command", choices=["run", "approve", "reject"], nargs="?", default="run") -- parser.add_argument("action_id", nargs="?") -- parser.add_argument("--dry-run", action="store_true") -- -- args = parser.parse_args() -- -- if args.command == "run": -- run_executor(dry_run=args.dry_run) -- elif args.command == "approve": -- if not args.action_id: -- print("Error: action_id required for approve") -- sys.exit(1) -- approve_action(args.action_id) -- elif args.command == "reject": -- if not args.action_id: -- print("Error: action_id required for reject") -- sys.exit(1) -- reject_action(args.action_id) -diff --git a/scripts/executor/test_actions.sh b/scripts/executor/test_actions.sh -deleted file mode 100644 -index 6cf7271..0000000 ---- a/scripts/executor/test_actions.sh -+++ /dev/null -@@ -1,74 +0,0 @@ --#!/bin/bash --# Validation script for Homelab Action Queue System -- --set -e -- --BASE_DIR=$(pwd) --export HOMELAB_WORLD_ROOT="$BASE_DIR/tmp/homelab/world" --export HOMELAB_ACTIONS_ROOT="$BASE_DIR/tmp/homelab/actions" --EVENT_LOG="/tmp/agent-events.log" -- --echo "=== Starting Action Queue Validation ===" -- --# 1. Setup drift scenarios --echo "Setting up drift scenarios..." --bash scripts/supervisor/test_scenarios.sh -- --# 2. Run supervisor to generate action proposals --echo "Running supervisor..." --python3 scripts/supervisor/supervisor.py -- --# 3. Check for pending actions --echo "Checking pending actions..." --ls -l "$HOMELAB_ACTIONS_ROOT/pending/" -- --# Get an action ID from pending --ACTION_FILE=$(ls "$HOMELAB_ACTIONS_ROOT/pending/" | head -n 1) --if [ -z "$ACTION_FILE" ]; then -- echo "Error: No pending actions found!" -- exit 1 --fi --ACTION_ID="${ACTION_FILE%.json}" --echo "Found action: $ACTION_ID" -- --# 4. Approve the action --echo "Approving action $ACTION_ID..." --python3 scripts/executor/executor.py approve "$ACTION_ID" -- --# 5. Run executor --echo "Running executor..." --python3 scripts/executor/executor.py run -- --# 6. Verify completion --if [ -f "$HOMELAB_ACTIONS_ROOT/completed/$ACTION_FILE" ]; then -- echo "SUCCESS: Action $ACTION_ID moved to completed." --else -- echo "FAILURE: Action $ACTION_ID NOT found in completed." -- exit 1 --fi -- --# 7. Test rejection --echo "Testing rejection..." --NEXT_ACTION_FILE=$(ls "$HOMELAB_ACTIONS_ROOT/pending/" | head -n 1) --if [ -n "$NEXT_ACTION_FILE" ]; then -- NEXT_ACTION_ID="${NEXT_ACTION_FILE%.json}" -- echo "Rejecting action $NEXT_ACTION_ID..." -- python3 scripts/executor/executor.py reject "$NEXT_ACTION_ID" -- -- if [ -f "$HOMELAB_ACTIONS_ROOT/rejected/$NEXT_ACTION_FILE" ]; then -- echo "SUCCESS: Action $NEXT_ACTION_ID moved to rejected." -- else -- echo "FAILURE: Action $NEXT_ACTION_ID NOT found in rejected." -- exit 1 -- fi --fi -- --# 8. Verify events --echo "Verifying events in $EVENT_LOG..." --grep "action_created" "$EVENT_LOG" | tail -n 1 --grep "action_approved" "$EVENT_LOG" | tail -n 1 --grep "action_started" "$EVENT_LOG" | tail -n 1 --grep "action_completed" "$EVENT_LOG" | tail -n 1 --grep "action_rejected" "$EVENT_LOG" | tail -n 1 -- --echo "=== Validation Complete ===" -diff --git a/scripts/supervisor/supervisor.py b/scripts/supervisor/supervisor.py -index ce5d162..e58027b 100644 ---- a/scripts/supervisor/supervisor.py -+++ b/scripts/supervisor/supervisor.py -@@ -5,19 +5,14 @@ import yaml - import json - import time - import glob --import uuid - from pathlib import Path - - # Configuration - WORLD_STATE_PATH = Path(os.getenv("HOMELAB_WORLD_ROOT", "/opt/homelab/world")) --ACTIONS_ROOT = Path(os.getenv("HOMELAB_ACTIONS_ROOT", "/opt/homelab/actions")) - INVENTORY_PATH = Path("hosts") - EVENT_LOG = Path("/tmp/agent-events.log") - CHECKPOINT_FILE = Path("/tmp/supervisor-checkpoint.json") - --# Action Queue Layout --ACTION_DIRS = ["pending", "approved", "running", "completed", "failed", "rejected"] -- - # Reconcile event types - RECONCILE_REQUIRED = "reconcile_required" - RECONCILE_RECOMMENDED = "reconcile_recommended" -@@ -29,70 +24,6 @@ STATE_DEGRADED = "degraded" - STATE_UNSTABLE = "unstable" - STATE_RECONCILING = "reconciling" - --def ensure_action_dirs(): -- """Ensure action queue directories exist.""" -- for d in ACTION_DIRS: -- (ACTIONS_ROOT / d).mkdir(parents=True, exist_ok=True) -- --def emit_action_proposal(recommendation): -- """Convert recommendation to action proposal and save to pending/.""" -- ensure_action_dirs() -- -- action_type_map = { -- "redeploy": "redeploy_service", -- "deploy": "redeploy_service", -- "diagnostics": "collect_diagnostics", -- "failover_review": "collect_diagnostics", -- "review": "collect_diagnostics", -- "delayed_deployment": "rerun_deployment_stage" -- } -- -- action_type = action_type_map.get(recommendation["action"], "collect_diagnostics") -- -- risk_level_map = { -- "redeploy_service": "guarded", -- "rerun_healthcheck": "safe", -- "rerun_deployment_stage": "guarded", -- "collect_diagnostics": "safe" -- } -- risk_level = risk_level_map.get(action_type, "dangerous") -- -- # Dangerous always requires approval -- # Guarded defaults to approval -- approval_required = risk_level in ["dangerous", "guarded"] -- -- action_id = str(uuid.uuid4()) -- action = { -- "action_id": action_id, -- "created_at": time.time(), -- "proposed_by": "supervisor", -- "correlation_id": str(uuid.uuid4()), # In a real system, link to drift ID -- "node": recommendation["drift"].get("node"), -- "service": recommendation["drift"].get("service"), -- "action_type": action_type, -- "risk_level": risk_level, -- "confidence": 0.9, # Default confidence -- "approval_required": approval_required, -- "autonomous_eligible": False, # No autonomy yet -- "status": "pending", -- "payload": recommendation["drift"], -- "rollback_reference": None -- } -- -- file_path = ACTIONS_ROOT / "pending" / f"{action_id}.json" -- try: -- with open(file_path, "w") as f: -- json.dump(action, f, indent=2) -- -- emit_event("action_created", f"Action proposed: {action_type} for {action.get('service') or action.get('node')}", { -- "action_id": action_id, -- "action_type": action_type, -- "node": action.get("node"), -- "service": action.get("service") -- }) -- except Exception as e: -- print(f"Error emitting action proposal: {e}", file=sys.stderr) -- - def emit_event(event_type, message, details=None): - """Emit reconciliation events using existing event system (append-only file).""" - event = { -@@ -347,8 +278,6 @@ def main(): - # Emit reconciliation events - for rec in recommendations: - emit_event(rec["type"], rec["message"], rec["drift"]) -- # Proposed: Emit action proposals to action queue -- emit_action_proposal(rec) - - # 6. Save checkpoint - save_checkpoint({ -diff --git a/tmp/homelab/world/deployments/dep-001.json b/tmp/homelab/world/deployments/dep-001.json -new file mode 100644 -index 0000000..02db067 ---- /dev/null -+++ b/tmp/homelab/world/deployments/dep-001.json -@@ -0,0 +1 @@ -+{"id": "dep-001", "service": "webapp", "status": "failed", "timestamp": 1778597957} -diff --git a/tmp/homelab/world/deployments/dep-002.json b/tmp/homelab/world/deployments/dep-002.json -new file mode 100644 -index 0000000..e977aa0 ---- /dev/null -+++ b/tmp/homelab/world/deployments/dep-002.json -@@ -0,0 +1 @@ -+{"id": "dep-002", "service": "webapp", "status": "failed", "timestamp": 1778597657} -diff --git a/tmp/homelab/world/deployments/dep-003.json b/tmp/homelab/world/deployments/dep-003.json -new file mode 100644 -index 0000000..66f10c9 ---- /dev/null -+++ b/tmp/homelab/world/deployments/dep-003.json -@@ -0,0 +1 @@ -+{"id": "dep-003", "service": "webapp", "status": "failed", "timestamp": 1778597357} -diff --git a/tmp/homelab/world/incidents/inc-99.json b/tmp/homelab/world/incidents/inc-99.json -new file mode 100644 -index 0000000..4f28449 ---- /dev/null -+++ b/tmp/homelab/world/incidents/inc-99.json -@@ -0,0 +1 @@ -+{"id": "inc-99", "description": "High memory usage on node1", "status": "investigating"} -diff --git a/tmp/homelab/world/nodes/node1.json b/tmp/homelab/world/nodes/node1.json -new file mode 100644 -index 0000000..d246df0 ---- /dev/null -+++ b/tmp/homelab/world/nodes/node1.json -@@ -0,0 +1 @@ -+{"name": "node1", "status": "online"} -diff --git a/tmp/homelab/world/nodes/node2.json b/tmp/homelab/world/nodes/node2.json -new file mode 100644 -index 0000000..bcc0d43 ---- /dev/null -+++ b/tmp/homelab/world/nodes/node2.json -@@ -0,0 +1 @@ -+{"name": "node2", "status": "offline"} -diff --git a/tmp/homelab/world/services/database.json b/tmp/homelab/world/services/database.json -new file mode 100644 -index 0000000..4395a11 ---- /dev/null -+++ b/tmp/homelab/world/services/database.json -@@ -0,0 +1 @@ -+{"name": "database", "status": "error", "node": "node2"} -diff --git a/tmp/homelab/world/services/homeassistant.json b/tmp/homelab/world/services/homeassistant.json -new file mode 100644 -index 0000000..50e31b7 ---- /dev/null -+++ b/tmp/homelab/world/services/homeassistant.json -@@ -0,0 +1 @@ -+{"name": "homeassistant", "status": "unhealthy", "node": "node1"} -diff --git a/webui/index.html b/webui/index.html -index 5c049c1..d720307 100644 ---- a/webui/index.html -+++ b/webui/index.html -@@ -216,9 +216,9 @@ - .label { color: var(--text-muted); font-size: 12px; margin-bottom: 4px; } - .value { font-weight: 500; margin-bottom: 12px; } - -- .risk-safe { background: rgba(62, 175, 124, 0.1); color: var(--safe); } -- .risk-guarded { background: rgba(230, 126, 34, 0.1); color: var(--guarded); } -- .risk-dangerous { background: rgba(192, 57, 43, 0.1); color: var(--dangerous); } -+ .risk-safe { color: var(--safe); } -+ .risk-guarded { color: var(--guarded); } -+ .risk-dangerous { color: var(--dangerous); } - - - -@@ -229,9 +229,6 @@ - -- - -@@ -241,15 +238,9 @@ - -- - -- - -@@ -264,16 +255,7 @@ - -
-
--
--
Dashboard
-- --
-+
Dashboard
-
- -
-@@ -287,10 +269,6 @@ -
System Overview
-
- --
--
Pending Actions
--
--
-
-
Active Incidents
-
-@@ -298,20 +276,6 @@ -
- - -- -- -- - - - -- -- -- - - - -- -- -- - -