Dashboard
++
++
+ Dashboard
++
++
+
+
+@@ -269,6 +287,10 @@
+ System Overview
+
+
++
++
+ Pending Actions
++
++
+
+
+
++
++ Active Incidents
+
+@@ -276,6 +298,20 @@
+
++
++
+
+
++
++
++
++ Pending Approval
++ ++
++
++ Active / History
++ ++
+
+@@ -291,11 +327,24 @@
+
+
+
++
++
++
++
+
+
++
++ Runtime Topology
++
++
+
+
+
++
++
++
++
++
+
+
+
+@@ -335,6 +384,34 @@
+ }
+ }
+
++ async function postData(endpoint, data) {
++ try {
++ const res = await fetch(endpoint, {
++ method: 'POST',
++ headers: {'Content-Type': 'application/json'},
++ body: JSON.stringify(data)
++ });
++ return await res.json();
++ } catch (e) {
++ console.error('Post error:', endpoint, e);
++ return null;
++ }
++ }
++
++ async function mutateAction(id, status) {
++ const res = await postData('/action/mutate', {id, status});
++ if (res && res.status === 'ok') {
++ refreshData();
++ } else {
++ alert('Mutation failed');
++ }
++ }
++
++ function setOperatorMode(mode) {
++ console.log('Operator mode set to:', mode);
++ // In real system, this would call backend
++ }
++
+ function formatTime(ts) {
+ if (!ts) return 'N/A';
+ return new Date(ts * 1000).toLocaleString();
+@@ -368,6 +445,53 @@
+ }
+ }
+
++ if (currentView === 'dashboard' || currentView === 'actions') {
++ const actions = await fetchData('/actions');
++ if (actions) {
++ if (currentView === 'dashboard') {
++ const dashActions = document.getElementById('dashboard-actions-summary');
++ const pendingCount = actions.pending.length;
++ dashActions.innerHTML = `
++
Dashboard
+
+ Action Queue
+
Nodes
@@ -238,9 +241,15 @@
Deployments
+
+ Topology
+
Events
+
+ Correlation
+
Recommendations
@@ -255,7 +264,16 @@
-
@@ -269,6 +287,10 @@
+ Pending
${pendingCount}
++ Running
${actions.running.length}
++ `;
++ }
++ if (currentView === 'actions') {
++ const pendingEl = document.getElementById('actions-pending');
++ const historyEl = document.getElementById('actions-history');
++
++ pendingEl.innerHTML = actions.pending.map(a => `
++
++
++ `).join('') || 'No pending actions.';
++
++ const history = [...actions.approved, ...actions.running, ...actions.completed, ...actions.failed];
++ historyEl.innerHTML = history.sort((a,b) => b.timestamp - a.timestamp).map(a => `
++
++
++ ${a.type.toUpperCase()}
++ ${a.risk_level}
++ ${a.description}
++Target
${a.target.node} ${a.target.service || ''}
++ Confidence
${Math.round(a.confidence*100)}%
++
++
++
++
++
++
++ `).join('') || 'No history.';
++ }
++ }
++ }
++
+ if (currentView === 'dashboard' || currentView === 'events') {
+ const incidents = await fetchData('/incidents');
+ if (currentView === 'dashboard') {
+@@ -474,6 +598,64 @@
+ `).join('');
+ }
+
++ if (currentView === 'topology') {
++ const nodes = await fetchData('/nodes');
++ const services = await fetchData('/services');
++ const topMap = document.getElementById('topology-map');
++ if (nodes && services) {
++ topMap.innerHTML = nodes.map(node => {
++ const nodeServices = services.filter(s => s.node === node.hostname || s.node === node.id);
++ return `
++
++ ${a.type.toUpperCase()}
++ ${a.status}
++
++ ${a.description}
++ ${formatTime(a.timestamp)} | Target: ${a.target.node}
++ ${a.status === 'approved' ? `` : ''}
++
++
++ `;
++ }).join('');
++ }
++ }
++
++ if (currentView === 'correlation') {
++ const incidents = await fetchData('/incidents');
++ const actions = await fetchData('/actions');
++ const list = document.getElementById('correlation-chains');
++ if (incidents && actions) {
++ const allActions = Object.values(actions).flat();
++ list.innerHTML = incidents.map(inc => {
++ const related = allActions.filter(a => a.correlation_chain && a.correlation_chain.includes(inc.id));
++ return `
++
++
++ ${node.hostname}
++ ${node.health}
++ Capabilities
++ ${node.capabilities.join(', ')}
++ Services
++
++ ${nodeServices.length > 0 ? nodeServices.map(s => `
++
++
++ ${s.name}
++ ${s.health}
++
++ ${s.dependencies.length > 0 ? `dep: ${s.dependencies.join(', ')}
` : ''}
++ `).join('') : 'None
'}
++
++
++ `;
++ }).join('');
++ }
++ }
+ if (currentView === 'settings') {
+ const config = await fetchData('/config');
+ const content = document.getElementById('settings-content');
+@@ -482,6 +664,8 @@
+
++
++ Incident: ${inc.id || 'INC-001'}
++ Active
++ ${inc.message}
++Related Actions
++ ${related.map(a => `
++
++ ${a.type} (${a.status})
++ ${a.description} ++
++ `).join('') || '++ ${a.description} ++
No actions yet
'}
++ ${config.auto_mode ? 'Enabled' : 'Disabled'}
+ Action Thresholds
+ ${JSON.stringify(config.action_thresholds, null, 2)}
++ Telegram Integration
++ Ready for mobile approval flows. Hook: /api/v1/telegram/webhook
+
+ `;
+ }
+diff --git a/webui/web.py b/webui/web.py
+index 053ac1a..4727274 100644
+--- a/webui/web.py
++++ b/webui/web.py
+@@ -8,6 +8,7 @@ from pathlib import Path
+ STATE_DIR = Path("/opt/homelab/state")
+ EVENTS_DIR = Path("/opt/homelab/events")
+ WORLD_DIR = Path("/opt/homelab/world")
++ACTIONS_DIR = Path("/opt/homelab/actions")
+ EVENT_LOG = Path("/tmp/agent-events.log")
+ STATIC_DIR = Path(__file__).parent
+ REDIS_HOST = os.getenv("REDIS_HOST", "redis")
+@@ -164,6 +165,55 @@ def current_events():
+ return sorted(events, key=lambda x: x.get("timestamp", 0), reverse=True)
+
+
++def current_actions():
++ actions = {}
++ statuses = ["pending", "approved", "running", "completed", "failed", "rejected"]
++ for status in statuses:
++ actions[status] = []
++ status_dir = ACTIONS_DIR / status
++ if status_dir.exists():
++ for f in status_dir.glob("*.json"):
++ data = read_json_file(f)
++ if data:
++ actions[status].append(data)
++ return actions
++
++
++def mutate_action(action_id, target_status):
++ statuses = ["pending", "approved", "running", "completed", "failed", "rejected"]
++ if target_status not in statuses:
++ return False, f"Invalid target status: {target_status}"
++
++ # Find where the action is
++ source_path = None
++ for status in statuses:
++ p = ACTIONS_DIR / status / f"{action_id}.json"
++ if p.exists():
++ source_path = p
++ break
++
++ if not source_path:
++ return False, f"Action {action_id} not found"
++
++ target_dir = ACTIONS_DIR / target_status
++ target_dir.mkdir(parents=True, exist_ok=True)
++ target_path = target_dir / f"{action_id}.json"
++
++ try:
++ data = json.loads(source_path.read_text())
++ data["status"] = target_status
++ data["last_mutation"] = os.path.getmtime(source_path) # or current time
++ import time
++ data["last_mutation"] = time.time()
++
++ target_path.write_text(json.dumps(data, indent=2))
++ if source_path != target_path:
++ source_path.unlink()
++ return True, "Success"
++ except Exception as e:
++ return False, str(e)
++
++
+ def send_json(status, payload, handler):
+ body = (json.dumps(payload) + "\n").encode("utf-8")
+ handler.send_response(status)
+@@ -207,6 +257,10 @@ class Handler(BaseHTTPRequestHandler):
+ send_json(200, current_events(), self)
+ return
+
++ if self.path == "/actions":
++ send_json(200, current_actions(), self)
++ return
++
+ if self.path == "/logs":
+ print("LOGS endpoint called", flush=True)
+ body = ("\n".join(tail_lines(EVENT_LOG, 200)) + "\n").encode("utf-8")
+@@ -236,6 +290,7 @@ class Handler(BaseHTTPRequestHandler):
+ "/auto-mode",
+ "/config",
+ "/events",
++ "/action/mutate",
+ ):
+ self.send_error(404)
+ return
+@@ -291,6 +346,19 @@ class Handler(BaseHTTPRequestHandler):
+ send_json(200, {"status": "sent"}, self)
+ return
+
++ if self.path == "/action/mutate":
++ action_id = payload.get("id")
++ target = payload.get("status")
++ if not action_id or not target:
++ self.send_error(400, "id and status are required")
++ return
++ success, msg = mutate_action(action_id, target)
++ if success:
++ send_json(200, {"status": "ok"}, self)
++ else:
++ self.send_error(500, msg)
++ return
++
+ if not command:
+ self.send_error(400, "command is required")
+ return
\ No newline at end of file
diff --git a/docs/action-queue-system.md b/docs/action-queue-system.md
new file mode 100644
index 0000000..614ae1b
--- /dev/null
+++ b/docs/action-queue-system.md
@@ -0,0 +1,75 @@
+# Action Queue System
+
+The Action Queue System provides a safe, filesystem-first lifecycle for operational actions in the homelab platform. It enables controlled execution with mandatory approval for high-risk operations.
+
+## Action Lifecycle
+
+Actions move through various states, represented by directories under `/opt/homelab/actions/`:
+
+1. **Pending** (`pending/`): Actions proposed by the Supervisor or other agents.
+2. **Approved** (`approved/`): Actions that have been reviewed and approved for execution.
+3. **Running** (`running/`): Actions currently being processed by the Executor.
+4. **Completed** (`completed/`): Successfully executed actions.
+5. **Failed** (`failed/`): Actions that encountered errors during execution.
+6. **Rejected** (`rejected/`): Proposed actions that were explicitly denied.
+
+## Action Schema
+
+Actions are stored as JSON documents with the following structure:
+
+```json
+{
+ "action_id": "uuid",
+ "created_at": 1620000000.0,
+ "proposed_by": "supervisor",
+ "correlation_id": "uuid",
+ "node": "node-name",
+ "service": "service-name",
+ "action_type": "redeploy_service",
+ "risk_level": "guarded",
+ "confidence": 0.9,
+ "approval_required": true,
+ "autonomous_eligible": false,
+ "status": "pending",
+ "payload": { ... },
+ "rollback_reference": null
+}
+```
+
+## Safety Model
+
+Actions are categorized into safety classes:
+
+- **Safe**: Low-risk actions that may be eligible for autonomous execution in the future (e.g., `collect_diagnostics`, `rerun_healthcheck`).
+- **Guarded**: Actions that default to requiring approval but could be automated under strict conditions (e.g., `redeploy_service`, `rerun_deployment_stage`).
+- **Dangerous**: High-risk actions that ALWAYS require manual approval.
+
+Currently, the platform operates in a **Recommendation-Only** mode where even `safe` actions require explicit approval.
+
+## Initial Action Types
+
+- `redeploy_service`: Restarts or redeploys a service container.
+- `rerun_healthcheck`: Triggers an immediate health check.
+- `rerun_deployment_stage`: Retries a specific stage of a failed deployment.
+- `collect_diagnostics`: Gathers logs and metrics for troubleshooting.
+
+## Executor
+
+The Executor (`scripts/executor/executor.py`) is responsible for processing approved actions. It features:
+
+- **Process Approved Only**: Only actions in the `approved/` directory are processed.
+- **Recommendation-Safe**: Simulation-based execution that logs intended mutations without side effects.
+- **Idempotency**: Designed to be safe to run multiple times.
+- **Resumable State**: If interrupted, it will pick up actions in the `running/` state.
+- **Append-Only History**: Maintains a `history.log` of all action transitions.
+
+## Rollback Concepts
+
+Every action schema includes a `rollback_reference`. In future iterations, this will point to the previous stable state or a reverse action that can be triggered if the current action fails or causes further instability.
+
+## Future Autonomous Execution
+
+The system is designed to transition to autonomous execution by:
+1. Identifying `safe` actions with high `confidence` scores.
+2. Matching them against a `policy-engine`.
+3. Automatically moving them from `pending/` to `approved/` based on allowed safety guardrails.
diff --git a/docs/operator/approval-workflow.md b/docs/operator/approval-workflow.md
new file mode 100644
index 0000000..9312ae1
--- /dev/null
+++ b/docs/operator/approval-workflow.md
@@ -0,0 +1,27 @@
+# Operator Approval Workflow
+
+This document describes the process of reviewing and approving actions generated by the reconciliation supervisor.
+
+## Workflow Stages
+
+### 1. Action Identification
+When the supervisor identifies a delta between desired and actual state, it generates a pending action in `/opt/homelab/actions/pending/`.
+
+### 2. Risk Assessment
+Actions are categorized by risk level:
+- **Safe**: Low impact, high confidence. Can be auto-approved in autonomous mode.
+- **Guarded**: Moderate impact. Requires explicit operator approval.
+- **Dangerous**: High impact (e.g., node redeploy). Requires multi-step approval or senior operator override.
+
+### 3. Review Process
+1. Navigate to the **Action Queue** view.
+2. Review the **Confidence Score** and **Correlation Chain** to understand why the action was proposed.
+3. Check the **Rollback Availability**.
+
+### 4. Decision
+- **Approve**: Moves action to `approved` state.
+- **Reject**: Moves action to `rejected` state and suppresses similar recommendations for a cooldown period.
+- **Execute**: Transitions an approved action to `running` status.
+
+## Mobile Approvals
+Approval requests can be acknowledged via the Telegram bot integration, allowing for remote operational control.
diff --git a/docs/operator/incident-remediation.md b/docs/operator/incident-remediation.md
new file mode 100644
index 0000000..e11ca9d
--- /dev/null
+++ b/docs/operator/incident-remediation.md
@@ -0,0 +1,24 @@
+# Incident Remediation Guide
+
+Guide for operators responding to system incidents using the Control Plane.
+
+## Remediation Flow
+
+### 1. Detection
+Incidents appear in the **Active Incidents** card on the Dashboard and in the **Events** timeline.
+
+### 2. Correlation
+Use the **Correlation** view to see:
+- The event chain leading to the incident.
+- Automated recommendations generated in response.
+- Any manual actions already taken.
+
+### 3. Intervention
+1. Review the recommended actions in the **Action Queue**.
+2. If the automated recommendation is not sufficient, use the **Nodes** or **Services** view to manually trigger commands.
+3. Observe the **Runtime Topology** to ensure no cascading failures occur during remediation.
+
+### 4. Verification
+Once actions are completed, verify the system state:
+- Health badges should transition back to **Nominal**.
+- The **System Status** in the sidebar should reflect a healthy state.
diff --git a/docs/operator/reconcile-review.md b/docs/operator/reconcile-review.md
index 8b99c6f..e56c333 100644
--- a/docs/operator/reconcile-review.md
+++ b/docs/operator/reconcile-review.md
@@ -2,11 +2,13 @@
The system continuously monitors for drift between desired and actual state.
-1. If a service is in RECONCILING state, check the Services view.
-2. Review the Recommendations view for automated or guarded actions.
-3. For 'safe' actions with high confidence, the system may act autonomously if enabled.
-4. For 'guarded' or 'dangerous' actions, an operator must manually approve the action.
-5. Risk Levels:
- - **Safe**: Minimal impact, high success rate.
- - **Guarded**: Potential brief service interruption.
- - **Dangerous**: Significant impact, potential data loss, or hardware interaction required.
+1. **Drift Detection**: When drift is detected, the supervisor generates a recommendation and a corresponding pending action.
+2. **Review**: Navigate to the **Recommendations** view for a high-level summary, or the **Action Queue** for the specific execution plan.
+3. **Approval**: For 'guarded' or 'dangerous' actions, click **Approve** in the Action Queue.
+4. **Execution**: Once approved, the action can be triggered manually by clicking **Execute**, or it will be picked up by the autonomous executor if the system is in `AUTONOMOUS` mode.
+5. **Observation**: Monitor the **Deployments** and **Topology** views to watch the reconciliation in real-time.
+
+Risk Levels:
+- **Safe**: Minimal impact, high success rate.
+- **Guarded**: Potential brief service interruption.
+- **Dangerous**: Significant impact, potential data loss, or node-level disruption.
diff --git a/scripts/executor/executor.py b/scripts/executor/executor.py
new file mode 100644
index 0000000..12bb98f
--- /dev/null
+++ b/scripts/executor/executor.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+import os
+import json
+import time
+import sys
+import shutil
+import uuid
+from pathlib import Path
+
+# Configuration
+ACTIONS_ROOT = Path(os.getenv("HOMELAB_ACTIONS_ROOT", "/opt/homelab/actions"))
+EVENT_LOG = Path("/tmp/agent-events.log")
+HISTORY_LOG = ACTIONS_ROOT / "history.log"
+
+def emit_event(event_type, message, details=None):
+ """Emit action lifecycle events."""
+ event = {
+ "type": event_type,
+ "message": message,
+ "timestamp": time.time(),
+ "details": details or {}
+ }
+ line = json.dumps(event)
+ print(line)
+ try:
+ with open(EVENT_LOG, "a") as f:
+ f.write(line + "\n")
+ f.flush()
+ except Exception as e:
+ print(f"Error writing to event log: {e}", file=sys.stderr)
+
+def log_history(action_id, status, message):
+ """Append-only execution history."""
+ entry = {
+ "timestamp": time.time(),
+ "action_id": action_id,
+ "status": status,
+ "message": message
+ }
+ try:
+ with open(HISTORY_LOG, "a") as f:
+ f.write(json.dumps(entry) + "\n")
+ f.flush()
+ except Exception as e:
+ print(f"Error writing history: {e}", file=sys.stderr)
+
+def ensure_dirs():
+ for d in ["pending", "approved", "running", "completed", "failed", "rejected"]:
+ (ACTIONS_ROOT / d).mkdir(parents=True, exist_ok=True)
+
+def approve_action(action_id):
+ ensure_dirs()
+ if not action_id.endswith(".json"):
+ filename = f"{action_id}.json"
+ else:
+ filename = action_id
+
+ pending_path = ACTIONS_ROOT / "pending" / filename
+ if not pending_path.exists():
+ print(f"Action {filename} not found in pending.")
+ return False
+
+ approved_path = ACTIONS_ROOT / "approved" / filename
+
+ try:
+ with open(pending_path, "r") as f:
+ action = json.load(f)
+
+ action["status"] = "approved"
+ action["approved_at"] = time.time()
+
+ with open(pending_path, "w") as f:
+ json.dump(action, f, indent=2)
+
+ shutil.move(pending_path, approved_path)
+
+ emit_event("action_approved", f"Action approved: {action['action_id']}", {"action_id": action['action_id']})
+ log_history(action['action_id'], "approved", "Manual approval received")
+ print(f"Action {action['action_id']} approved.")
+ return True
+ except Exception as e:
+ print(f"Error approving action: {e}")
+ return False
+
+def reject_action(action_id):
+ ensure_dirs()
+ if not action_id.endswith(".json"):
+ filename = f"{action_id}.json"
+ else:
+ filename = action_id
+
+ pending_path = ACTIONS_ROOT / "pending" / filename
+ if not pending_path.exists():
+ print(f"Action {filename} not found in pending.")
+ return False
+
+ rejected_path = ACTIONS_ROOT / "rejected" / filename
+
+ try:
+ with open(pending_path, "r") as f:
+ action = json.load(f)
+
+ action["status"] = "rejected"
+ action["rejected_at"] = time.time()
+
+ with open(pending_path, "w") as f:
+ json.dump(action, f, indent=2)
+
+ shutil.move(pending_path, rejected_path)
+
+ emit_event("action_rejected", f"Action rejected: {action['action_id']}", {"action_id": action['action_id']})
+ log_history(action['action_id'], "rejected", "Manual rejection received")
+ print(f"Action {action['action_id']} rejected.")
+ return True
+ except Exception as e:
+ print(f"Error rejecting action: {e}")
+ return False
+
+def process_action(action_path, dry_run=False):
+ """Process a single approved action."""
+ try:
+ with open(action_path, "r") as f:
+ action = json.load(f)
+ except Exception as e:
+ print(f"Error reading action {action_path}: {e}")
+ return
+
+ action_id = action["action_id"]
+ action_type = action["action_type"]
+
+ # Move to running (Resumable execution state)
+ running_path = ACTIONS_ROOT / "running" / action_path.name
+ shutil.move(action_path, running_path)
+
+ action["status"] = "running"
+ action["started_at"] = time.time()
+ with open(running_path, "w") as f:
+ json.dump(action, f, indent=2)
+
+ emit_event("action_started", f"Started action {action_id} ({action_type})", {"action_id": action_id})
+ log_history(action_id, "running", f"Execution started (dry_run={dry_run})")
+
+ # Simulation logic (Recommendation-safe execution model)
+ print(f"Executing {action_type} for {action.get('service') or action.get('node')}...")
+
+ # Idempotent simulation: in a real world, we'd check if it's already done
+ time.sleep(0.5)
+
+ success = True
+ if dry_run:
+ print(f"[DRY-RUN] Would execute {action_type} logic here.")
+ else:
+ # Initial action types implementation (Simulation)
+ if action_type == "redeploy_service":
+ print(f"DEBUG: Triggering container restart/redeploy for {action.get('service')}")
+ elif action_type == "rerun_healthcheck":
+ print(f"DEBUG: Running healthcheck for {action.get('service')}")
+ elif action_type == "rerun_deployment_stage":
+ print(f"DEBUG: Retrying deployment stage for {action.get('service')}")
+ elif action_type == "collect_diagnostics":
+ print(f"DEBUG: Collecting logs and metrics for {action.get('service') or action.get('node')}")
+ else:
+ print(f"DEBUG: Executing unknown action type: {action_type}")
+
+ # Finalize
+ if success:
+ final_status = "completed"
+ target_dir = ACTIONS_ROOT / "completed"
+ else:
+ final_status = "failed"
+ target_dir = ACTIONS_ROOT / "failed"
+
+ final_path = target_dir / action_path.name
+ action["status"] = final_status
+ action["finished_at"] = time.time()
+
+ with open(running_path, "w") as f:
+ json.dump(action, f, indent=2)
+
+ shutil.move(running_path, final_path)
+
+ emit_event(f"action_{final_status}", f"Action {action_id} {final_status}", {"action_id": action_id})
+ log_history(action_id, final_status, "Execution finished")
+
+def run_executor(dry_run=False):
+ ensure_dirs()
+ print(f"--- Executor Run: {time.ctime()} (dry_run={dry_run}) ---")
+
+ # 1. Resume running actions
+ running_actions = list((ACTIONS_ROOT / "running").glob("*.json"))
+ for action_file in running_actions:
+ print(f"Resuming action: {action_file.name}")
+ process_action(action_file, dry_run=dry_run)
+
+ # 2. Process approved actions
+ approved_actions = list((ACTIONS_ROOT / "approved").glob("*.json"))
+ if not approved_actions:
+ print("No approved actions found.")
+ else:
+ for action_file in approved_actions:
+ process_action(action_file, dry_run=dry_run)
+
+ print("Run complete.")
+
+if __name__ == "__main__":
+ import argparse
+ parser = argparse.ArgumentParser(description="Homelab Action Executor")
+ parser.add_argument("command", choices=["run", "approve", "reject"], nargs="?", default="run")
+ parser.add_argument("action_id", nargs="?")
+ parser.add_argument("--dry-run", action="store_true")
+
+ args = parser.parse_args()
+
+ if args.command == "run":
+ run_executor(dry_run=args.dry_run)
+ elif args.command == "approve":
+ if not args.action_id:
+ print("Error: action_id required for approve")
+ sys.exit(1)
+ approve_action(args.action_id)
+ elif args.command == "reject":
+ if not args.action_id:
+ print("Error: action_id required for reject")
+ sys.exit(1)
+ reject_action(args.action_id)
diff --git a/scripts/executor/test_actions.sh b/scripts/executor/test_actions.sh
new file mode 100644
index 0000000..6cf7271
--- /dev/null
+++ b/scripts/executor/test_actions.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+# Validation script for Homelab Action Queue System
+
+set -e
+
+BASE_DIR=$(pwd)
+export HOMELAB_WORLD_ROOT="$BASE_DIR/tmp/homelab/world"
+export HOMELAB_ACTIONS_ROOT="$BASE_DIR/tmp/homelab/actions"
+EVENT_LOG="/tmp/agent-events.log"
+
+echo "=== Starting Action Queue Validation ==="
+
+# 1. Setup drift scenarios
+echo "Setting up drift scenarios..."
+bash scripts/supervisor/test_scenarios.sh
+
+# 2. Run supervisor to generate action proposals
+echo "Running supervisor..."
+python3 scripts/supervisor/supervisor.py
+
+# 3. Check for pending actions
+echo "Checking pending actions..."
+ls -l "$HOMELAB_ACTIONS_ROOT/pending/"
+
+# Get an action ID from pending
+ACTION_FILE=$(ls "$HOMELAB_ACTIONS_ROOT/pending/" | head -n 1)
+if [ -z "$ACTION_FILE" ]; then
+ echo "Error: No pending actions found!"
+ exit 1
+fi
+ACTION_ID="${ACTION_FILE%.json}"
+echo "Found action: $ACTION_ID"
+
+# 4. Approve the action
+echo "Approving action $ACTION_ID..."
+python3 scripts/executor/executor.py approve "$ACTION_ID"
+
+# 5. Run executor
+echo "Running executor..."
+python3 scripts/executor/executor.py run
+
+# 6. Verify completion
+if [ -f "$HOMELAB_ACTIONS_ROOT/completed/$ACTION_FILE" ]; then
+ echo "SUCCESS: Action $ACTION_ID moved to completed."
+else
+ echo "FAILURE: Action $ACTION_ID NOT found in completed."
+ exit 1
+fi
+
+# 7. Test rejection
+echo "Testing rejection..."
+NEXT_ACTION_FILE=$(ls "$HOMELAB_ACTIONS_ROOT/pending/" | head -n 1)
+if [ -n "$NEXT_ACTION_FILE" ]; then
+ NEXT_ACTION_ID="${NEXT_ACTION_FILE%.json}"
+ echo "Rejecting action $NEXT_ACTION_ID..."
+ python3 scripts/executor/executor.py reject "$NEXT_ACTION_ID"
+
+ if [ -f "$HOMELAB_ACTIONS_ROOT/rejected/$NEXT_ACTION_FILE" ]; then
+ echo "SUCCESS: Action $NEXT_ACTION_ID moved to rejected."
+ else
+ echo "FAILURE: Action $NEXT_ACTION_ID NOT found in rejected."
+ exit 1
+ fi
+fi
+
+# 8. Verify events
+echo "Verifying events in $EVENT_LOG..."
+grep "action_created" "$EVENT_LOG" | tail -n 1
+grep "action_approved" "$EVENT_LOG" | tail -n 1
+grep "action_started" "$EVENT_LOG" | tail -n 1
+grep "action_completed" "$EVENT_LOG" | tail -n 1
+grep "action_rejected" "$EVENT_LOG" | tail -n 1
+
+echo "=== Validation Complete ==="
diff --git a/scripts/supervisor/supervisor.py b/scripts/supervisor/supervisor.py
index e58027b..ce5d162 100644
--- a/scripts/supervisor/supervisor.py
+++ b/scripts/supervisor/supervisor.py
@@ -5,14 +5,19 @@ import yaml
import json
import time
import glob
+import uuid
from pathlib import Path
# Configuration
WORLD_STATE_PATH = Path(os.getenv("HOMELAB_WORLD_ROOT", "/opt/homelab/world"))
+ACTIONS_ROOT = Path(os.getenv("HOMELAB_ACTIONS_ROOT", "/opt/homelab/actions"))
INVENTORY_PATH = Path("hosts")
EVENT_LOG = Path("/tmp/agent-events.log")
CHECKPOINT_FILE = Path("/tmp/supervisor-checkpoint.json")
+# Action Queue Layout
+ACTION_DIRS = ["pending", "approved", "running", "completed", "failed", "rejected"]
+
# Reconcile event types
RECONCILE_REQUIRED = "reconcile_required"
RECONCILE_RECOMMENDED = "reconcile_recommended"
@@ -24,6 +29,70 @@ STATE_DEGRADED = "degraded"
STATE_UNSTABLE = "unstable"
STATE_RECONCILING = "reconciling"
+def ensure_action_dirs():
+ """Ensure action queue directories exist."""
+ for d in ACTION_DIRS:
+ (ACTIONS_ROOT / d).mkdir(parents=True, exist_ok=True)
+
+def emit_action_proposal(recommendation):
+ """Convert recommendation to action proposal and save to pending/."""
+ ensure_action_dirs()
+
+ action_type_map = {
+ "redeploy": "redeploy_service",
+ "deploy": "redeploy_service",
+ "diagnostics": "collect_diagnostics",
+ "failover_review": "collect_diagnostics",
+ "review": "collect_diagnostics",
+ "delayed_deployment": "rerun_deployment_stage"
+ }
+
+ action_type = action_type_map.get(recommendation["action"], "collect_diagnostics")
+
+ risk_level_map = {
+ "redeploy_service": "guarded",
+ "rerun_healthcheck": "safe",
+ "rerun_deployment_stage": "guarded",
+ "collect_diagnostics": "safe"
+ }
+ risk_level = risk_level_map.get(action_type, "dangerous")
+
+ # Dangerous always requires approval
+ # Guarded defaults to approval
+ approval_required = risk_level in ["dangerous", "guarded"]
+
+ action_id = str(uuid.uuid4())
+ action = {
+ "action_id": action_id,
+ "created_at": time.time(),
+ "proposed_by": "supervisor",
+ "correlation_id": str(uuid.uuid4()), # In a real system, link to drift ID
+ "node": recommendation["drift"].get("node"),
+ "service": recommendation["drift"].get("service"),
+ "action_type": action_type,
+ "risk_level": risk_level,
+ "confidence": 0.9, # Default confidence
+ "approval_required": approval_required,
+ "autonomous_eligible": False, # No autonomy yet
+ "status": "pending",
+ "payload": recommendation["drift"],
+ "rollback_reference": None
+ }
+
+ file_path = ACTIONS_ROOT / "pending" / f"{action_id}.json"
+ try:
+ with open(file_path, "w") as f:
+ json.dump(action, f, indent=2)
+
+ emit_event("action_created", f"Action proposed: {action_type} for {action.get('service') or action.get('node')}", {
+ "action_id": action_id,
+ "action_type": action_type,
+ "node": action.get("node"),
+ "service": action.get("service")
+ })
+ except Exception as e:
+ print(f"Error emitting action proposal: {e}", file=sys.stderr)
+
def emit_event(event_type, message, details=None):
"""Emit reconciliation events using existing event system (append-only file)."""
event = {
@@ -278,6 +347,8 @@ def main():
# Emit reconciliation events
for rec in recommendations:
emit_event(rec["type"], rec["message"], rec["drift"])
+ # Proposed: Emit action proposals to action queue
+ emit_action_proposal(rec)
# 6. Save checkpoint
save_checkpoint({
diff --git a/tmp/homelab/actions/completed/0083f8ad-1f2b-47a4-81a8-81e59740879e.json b/tmp/homelab/actions/completed/0083f8ad-1f2b-47a4-81a8-81e59740879e.json
new file mode 100644
index 0000000..eca129a
--- /dev/null
+++ b/tmp/homelab/actions/completed/0083f8ad-1f2b-47a4-81a8-81e59740879e.json
@@ -0,0 +1,24 @@
+{
+ "action_id": "0083f8ad-1f2b-47a4-81a8-81e59740879e",
+ "created_at": 1778600485.050643,
+ "proposed_by": "supervisor",
+ "correlation_id": "6d88755b-ca89-45eb-bf2d-506fca631144",
+ "node": "node1",
+ "service": "homeassistant",
+ "action_type": "redeploy_service",
+ "risk_level": "guarded",
+ "confidence": 0.9,
+ "approval_required": true,
+ "autonomous_eligible": false,
+ "status": "completed",
+ "payload": {
+ "type": "unhealthy_service",
+ "service": "homeassistant",
+ "status": "unhealthy",
+ "node": "node1"
+ },
+ "rollback_reference": null,
+ "approved_at": 1778600485.1278665,
+ "started_at": 1778600485.1792338,
+ "finished_at": 1778600485.6797137
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/completed/050add79-3265-4e35-bb88-41c368bbccda.json b/tmp/homelab/actions/completed/050add79-3265-4e35-bb88-41c368bbccda.json
new file mode 100644
index 0000000..d7f9318
--- /dev/null
+++ b/tmp/homelab/actions/completed/050add79-3265-4e35-bb88-41c368bbccda.json
@@ -0,0 +1,23 @@
+{
+ "action_id": "050add79-3265-4e35-bb88-41c368bbccda",
+ "created_at": 1778600510.7529757,
+ "proposed_by": "supervisor",
+ "correlation_id": "d8ba7d84-74dd-46c8-a085-5ed8ba186770",
+ "node": null,
+ "service": "webapp",
+ "action_type": "collect_diagnostics",
+ "risk_level": "safe",
+ "confidence": 0.9,
+ "approval_required": false,
+ "autonomous_eligible": false,
+ "status": "completed",
+ "payload": {
+ "type": "failed_deployment",
+ "deployment_id": "dep-001",
+ "service": "webapp"
+ },
+ "rollback_reference": null,
+ "approved_at": 1778600510.8252015,
+ "started_at": 1778600510.8744874,
+ "finished_at": 1778600511.3750403
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/completed/resumable-task.json b/tmp/homelab/actions/completed/resumable-task.json
new file mode 100644
index 0000000..f53ce2f
--- /dev/null
+++ b/tmp/homelab/actions/completed/resumable-task.json
@@ -0,0 +1,7 @@
+{
+ "action_id": "resumable-task",
+ "action_type": "rerun_healthcheck",
+ "status": "completed",
+ "started_at": 1778600488.5642526,
+ "finished_at": 1778600489.0646975
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/history.log b/tmp/homelab/actions/history.log
new file mode 100644
index 0000000..fa16b1f
--- /dev/null
+++ b/tmp/homelab/actions/history.log
@@ -0,0 +1,10 @@
+{"timestamp": 1778600485.1282582, "action_id": "0083f8ad-1f2b-47a4-81a8-81e59740879e", "status": "approved", "message": "Manual approval received"}
+{"timestamp": 1778600485.179484, "action_id": "0083f8ad-1f2b-47a4-81a8-81e59740879e", "status": "running", "message": "Execution started (dry_run=False)"}
+{"timestamp": 1778600485.680433, "action_id": "0083f8ad-1f2b-47a4-81a8-81e59740879e", "status": "completed", "message": "Execution finished"}
+{"timestamp": 1778600485.7410686, "action_id": "2143ae5b-bcc6-410b-b925-e7def70fc013", "status": "rejected", "message": "Manual rejection received"}
+{"timestamp": 1778600488.5644836, "action_id": "resumable-task", "status": "running", "message": "Execution started (dry_run=False)"}
+{"timestamp": 1778600489.0652084, "action_id": "resumable-task", "status": "completed", "message": "Execution finished"}
+{"timestamp": 1778600510.825529, "action_id": "050add79-3265-4e35-bb88-41c368bbccda", "status": "approved", "message": "Manual approval received"}
+{"timestamp": 1778600510.8747966, "action_id": "050add79-3265-4e35-bb88-41c368bbccda", "status": "running", "message": "Execution started (dry_run=False)"}
+{"timestamp": 1778600511.3755214, "action_id": "050add79-3265-4e35-bb88-41c368bbccda", "status": "completed", "message": "Execution finished"}
+{"timestamp": 1778600511.4307747, "action_id": "240cbbc0-891e-4032-bf73-1fa40ff850b4", "status": "rejected", "message": "Manual rejection received"}
diff --git a/tmp/homelab/actions/pending/50d7cdab-2f12-449f-965a-0383e32babaa.json b/tmp/homelab/actions/pending/50d7cdab-2f12-449f-965a-0383e32babaa.json
new file mode 100644
index 0000000..f3b3850
--- /dev/null
+++ b/tmp/homelab/actions/pending/50d7cdab-2f12-449f-965a-0383e32babaa.json
@@ -0,0 +1,21 @@
+{
+ "action_id": "50d7cdab-2f12-449f-965a-0383e32babaa",
+ "created_at": 1778600485.053174,
+ "proposed_by": "supervisor",
+ "correlation_id": "a2899a7f-548f-455d-a8dd-4e208be58e00",
+ "node": null,
+ "service": null,
+ "action_type": "collect_diagnostics",
+ "risk_level": "safe",
+ "confidence": 0.9,
+ "approval_required": false,
+ "autonomous_eligible": false,
+ "status": "pending",
+ "payload": {
+ "type": "unresolved_incident",
+ "incident_id": "inc-99",
+ "description": "High memory usage on node1",
+ "status": "investigating"
+ },
+ "rollback_reference": null
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/pending/5e239d96-ff3f-48a3-a71a-ad5aa6b7ff88.json b/tmp/homelab/actions/pending/5e239d96-ff3f-48a3-a71a-ad5aa6b7ff88.json
new file mode 100644
index 0000000..2da2527
--- /dev/null
+++ b/tmp/homelab/actions/pending/5e239d96-ff3f-48a3-a71a-ad5aa6b7ff88.json
@@ -0,0 +1,20 @@
+{
+ "action_id": "5e239d96-ff3f-48a3-a71a-ad5aa6b7ff88",
+ "created_at": 1778600485.05199,
+ "proposed_by": "supervisor",
+ "correlation_id": "c5fa628e-35a1-44f9-9119-07d93f20af80",
+ "node": null,
+ "service": "webapp",
+ "action_type": "collect_diagnostics",
+ "risk_level": "safe",
+ "confidence": 0.9,
+ "approval_required": false,
+ "autonomous_eligible": false,
+ "status": "pending",
+ "payload": {
+ "type": "failed_deployment",
+ "deployment_id": "dep-002",
+ "service": "webapp"
+ },
+ "rollback_reference": null
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/pending/7cde5093-3394-43af-9391-321c50ac5362.json b/tmp/homelab/actions/pending/7cde5093-3394-43af-9391-321c50ac5362.json
new file mode 100644
index 0000000..43c8aab
--- /dev/null
+++ b/tmp/homelab/actions/pending/7cde5093-3394-43af-9391-321c50ac5362.json
@@ -0,0 +1,20 @@
+{
+ "action_id": "7cde5093-3394-43af-9391-321c50ac5362",
+ "created_at": 1778600510.7521193,
+ "proposed_by": "supervisor",
+ "correlation_id": "2a91f58e-e10d-4de5-abd7-5f4fe6fdc325",
+ "node": null,
+ "service": "webapp",
+ "action_type": "collect_diagnostics",
+ "risk_level": "safe",
+ "confidence": 0.9,
+ "approval_required": false,
+ "autonomous_eligible": false,
+ "status": "pending",
+ "payload": {
+ "type": "failed_deployment",
+ "deployment_id": "dep-002",
+ "service": "webapp"
+ },
+ "rollback_reference": null
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/pending/a42e2183-ca22-4a50-97a7-eb53ab0e039a.json b/tmp/homelab/actions/pending/a42e2183-ca22-4a50-97a7-eb53ab0e039a.json
new file mode 100644
index 0000000..3010be7
--- /dev/null
+++ b/tmp/homelab/actions/pending/a42e2183-ca22-4a50-97a7-eb53ab0e039a.json
@@ -0,0 +1,20 @@
+{
+ "action_id": "a42e2183-ca22-4a50-97a7-eb53ab0e039a",
+ "created_at": 1778600510.75163,
+ "proposed_by": "supervisor",
+ "correlation_id": "ec2a1960-5baa-453a-8380-65fc9376cc82",
+ "node": "node2",
+ "service": null,
+ "action_type": "collect_diagnostics",
+ "risk_level": "safe",
+ "confidence": 0.9,
+ "approval_required": false,
+ "autonomous_eligible": false,
+ "status": "pending",
+ "payload": {
+ "type": "offline_node",
+ "node": "node2",
+ "status": "offline"
+ },
+ "rollback_reference": null
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/pending/aae83bcd-455f-4b59-bab0-7c7994116468.json b/tmp/homelab/actions/pending/aae83bcd-455f-4b59-bab0-7c7994116468.json
new file mode 100644
index 0000000..8a8ac4d
--- /dev/null
+++ b/tmp/homelab/actions/pending/aae83bcd-455f-4b59-bab0-7c7994116468.json
@@ -0,0 +1,21 @@
+{
+ "action_id": "aae83bcd-455f-4b59-bab0-7c7994116468",
+ "created_at": 1778600510.7506568,
+ "proposed_by": "supervisor",
+ "correlation_id": "0a786305-46cb-4837-8725-53d99203f39e",
+ "node": "node1",
+ "service": "homeassistant",
+ "action_type": "redeploy_service",
+ "risk_level": "guarded",
+ "confidence": 0.9,
+ "approval_required": true,
+ "autonomous_eligible": false,
+ "status": "pending",
+ "payload": {
+ "type": "unhealthy_service",
+ "service": "homeassistant",
+ "status": "unhealthy",
+ "node": "node1"
+ },
+ "rollback_reference": null
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/pending/c2e6c844-6d96-4ea7-b924-5e33764e5493.json b/tmp/homelab/actions/pending/c2e6c844-6d96-4ea7-b924-5e33764e5493.json
new file mode 100644
index 0000000..dd3b02f
--- /dev/null
+++ b/tmp/homelab/actions/pending/c2e6c844-6d96-4ea7-b924-5e33764e5493.json
@@ -0,0 +1,21 @@
+{
+ "action_id": "c2e6c844-6d96-4ea7-b924-5e33764e5493",
+ "created_at": 1778600510.7533653,
+ "proposed_by": "supervisor",
+ "correlation_id": "6ffc0579-71ac-417f-8ea1-fc46e54527c6",
+ "node": null,
+ "service": null,
+ "action_type": "collect_diagnostics",
+ "risk_level": "safe",
+ "confidence": 0.9,
+ "approval_required": false,
+ "autonomous_eligible": false,
+ "status": "pending",
+ "payload": {
+ "type": "unresolved_incident",
+ "incident_id": "inc-99",
+ "description": "High memory usage on node1",
+ "status": "investigating"
+ },
+ "rollback_reference": null
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/pending/c91a4171-e636-4194-a146-6e003d2f2586.json b/tmp/homelab/actions/pending/c91a4171-e636-4194-a146-6e003d2f2586.json
new file mode 100644
index 0000000..b86e4c6
--- /dev/null
+++ b/tmp/homelab/actions/pending/c91a4171-e636-4194-a146-6e003d2f2586.json
@@ -0,0 +1,20 @@
+{
+ "action_id": "c91a4171-e636-4194-a146-6e003d2f2586",
+ "created_at": 1778600510.7511823,
+ "proposed_by": "supervisor",
+ "correlation_id": "966a62ee-f81b-497d-96cb-7749f4da0c6f",
+ "node": "node2",
+ "service": "webapp",
+ "action_type": "rerun_deployment_stage",
+ "risk_level": "guarded",
+ "confidence": 0.9,
+ "approval_required": true,
+ "autonomous_eligible": false,
+ "status": "pending",
+ "payload": {
+ "type": "missing_service",
+ "service": "webapp",
+ "node": "node2"
+ },
+ "rollback_reference": null
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/pending/e6d3f0d6-c294-4282-b9f4-a730f9cec9dc.json b/tmp/homelab/actions/pending/e6d3f0d6-c294-4282-b9f4-a730f9cec9dc.json
new file mode 100644
index 0000000..6f2ac57
--- /dev/null
+++ b/tmp/homelab/actions/pending/e6d3f0d6-c294-4282-b9f4-a730f9cec9dc.json
@@ -0,0 +1,20 @@
+{
+ "action_id": "e6d3f0d6-c294-4282-b9f4-a730f9cec9dc",
+ "created_at": 1778600485.0515254,
+ "proposed_by": "supervisor",
+ "correlation_id": "bf51852b-0b34-4b4b-98c9-fffff38f77ce",
+ "node": "node2",
+ "service": null,
+ "action_type": "collect_diagnostics",
+ "risk_level": "safe",
+ "confidence": 0.9,
+ "approval_required": false,
+ "autonomous_eligible": false,
+ "status": "pending",
+ "payload": {
+ "type": "offline_node",
+ "node": "node2",
+ "status": "offline"
+ },
+ "rollback_reference": null
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/pending/f4c56df2-6775-484b-806e-cdecdcc19584.json b/tmp/homelab/actions/pending/f4c56df2-6775-484b-806e-cdecdcc19584.json
new file mode 100644
index 0000000..554805b
--- /dev/null
+++ b/tmp/homelab/actions/pending/f4c56df2-6775-484b-806e-cdecdcc19584.json
@@ -0,0 +1,20 @@
+{
+ "action_id": "f4c56df2-6775-484b-806e-cdecdcc19584",
+ "created_at": 1778600485.0527768,
+ "proposed_by": "supervisor",
+ "correlation_id": "f974d640-d0fb-4a85-bf8a-eda100182181",
+ "node": null,
+ "service": "webapp",
+ "action_type": "collect_diagnostics",
+ "risk_level": "safe",
+ "confidence": 0.9,
+ "approval_required": false,
+ "autonomous_eligible": false,
+ "status": "pending",
+ "payload": {
+ "type": "failed_deployment",
+ "deployment_id": "dep-001",
+ "service": "webapp"
+ },
+ "rollback_reference": null
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/pending/ff3da03c-fffa-49a7-985d-ed4589ab6856.json b/tmp/homelab/actions/pending/ff3da03c-fffa-49a7-985d-ed4589ab6856.json
new file mode 100644
index 0000000..060f995
--- /dev/null
+++ b/tmp/homelab/actions/pending/ff3da03c-fffa-49a7-985d-ed4589ab6856.json
@@ -0,0 +1,20 @@
+{
+ "action_id": "ff3da03c-fffa-49a7-985d-ed4589ab6856",
+ "created_at": 1778600485.0510974,
+ "proposed_by": "supervisor",
+ "correlation_id": "37da2d5b-3ecd-4a29-97c2-7e9461b1792e",
+ "node": "node2",
+ "service": "webapp",
+ "action_type": "rerun_deployment_stage",
+ "risk_level": "guarded",
+ "confidence": 0.9,
+ "approval_required": true,
+ "autonomous_eligible": false,
+ "status": "pending",
+ "payload": {
+ "type": "missing_service",
+ "service": "webapp",
+ "node": "node2"
+ },
+ "rollback_reference": null
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/rejected/2143ae5b-bcc6-410b-b925-e7def70fc013.json b/tmp/homelab/actions/rejected/2143ae5b-bcc6-410b-b925-e7def70fc013.json
new file mode 100644
index 0000000..2c670bf
--- /dev/null
+++ b/tmp/homelab/actions/rejected/2143ae5b-bcc6-410b-b925-e7def70fc013.json
@@ -0,0 +1,21 @@
+{
+ "action_id": "2143ae5b-bcc6-410b-b925-e7def70fc013",
+ "created_at": 1778600485.0523734,
+ "proposed_by": "supervisor",
+ "correlation_id": "dc23556c-68d2-41a3-a5d2-9ad66705f989",
+ "node": null,
+ "service": "webapp",
+ "action_type": "collect_diagnostics",
+ "risk_level": "safe",
+ "confidence": 0.9,
+ "approval_required": false,
+ "autonomous_eligible": false,
+ "status": "rejected",
+ "payload": {
+ "type": "failed_deployment",
+ "deployment_id": "dep-003",
+ "service": "webapp"
+ },
+ "rollback_reference": null,
+ "rejected_at": 1778600485.740686
+}
\ No newline at end of file
diff --git a/tmp/homelab/actions/rejected/240cbbc0-891e-4032-bf73-1fa40ff850b4.json b/tmp/homelab/actions/rejected/240cbbc0-891e-4032-bf73-1fa40ff850b4.json
new file mode 100644
index 0000000..e0d738d
--- /dev/null
+++ b/tmp/homelab/actions/rejected/240cbbc0-891e-4032-bf73-1fa40ff850b4.json
@@ -0,0 +1,21 @@
+{
+ "action_id": "240cbbc0-891e-4032-bf73-1fa40ff850b4",
+ "created_at": 1778600510.7525399,
+ "proposed_by": "supervisor",
+ "correlation_id": "fd234809-82aa-459d-858b-18bc3205a6c5",
+ "node": null,
+ "service": "webapp",
+ "action_type": "collect_diagnostics",
+ "risk_level": "safe",
+ "confidence": 0.9,
+ "approval_required": false,
+ "autonomous_eligible": false,
+ "status": "rejected",
+ "payload": {
+ "type": "failed_deployment",
+ "deployment_id": "dep-003",
+ "service": "webapp"
+ },
+ "rollback_reference": null,
+ "rejected_at": 1778600511.4303465
+}
\ No newline at end of file
diff --git a/tmp/homelab/world/deployments/dep-001.json b/tmp/homelab/world/deployments/dep-001.json
index 02db067..f70d7a8 100644
--- a/tmp/homelab/world/deployments/dep-001.json
+++ b/tmp/homelab/world/deployments/dep-001.json
@@ -1 +1 @@
-{"id": "dep-001", "service": "webapp", "status": "failed", "timestamp": 1778597957}
+{"id": "dep-001", "service": "webapp", "status": "failed", "timestamp": 1778600510}
diff --git a/tmp/homelab/world/deployments/dep-002.json b/tmp/homelab/world/deployments/dep-002.json
index e977aa0..1ee5a29 100644
--- a/tmp/homelab/world/deployments/dep-002.json
+++ b/tmp/homelab/world/deployments/dep-002.json
@@ -1 +1 @@
-{"id": "dep-002", "service": "webapp", "status": "failed", "timestamp": 1778597657}
+{"id": "dep-002", "service": "webapp", "status": "failed", "timestamp": 1778600210}
diff --git a/tmp/homelab/world/deployments/dep-003.json b/tmp/homelab/world/deployments/dep-003.json
index 66f10c9..f44385b 100644
--- a/tmp/homelab/world/deployments/dep-003.json
+++ b/tmp/homelab/world/deployments/dep-003.json
@@ -1 +1 @@
-{"id": "dep-003", "service": "webapp", "status": "failed", "timestamp": 1778597357}
+{"id": "dep-003", "service": "webapp", "status": "failed", "timestamp": 1778599910}
diff --git a/webui/index.html b/webui/index.html
index d720307..5c049c1 100644
--- a/webui/index.html
+++ b/webui/index.html
@@ -216,9 +216,9 @@
.label { color: var(--text-muted); font-size: 12px; margin-bottom: 4px; }
.value { font-weight: 500; margin-bottom: 12px; }
- .risk-safe { color: var(--safe); }
- .risk-guarded { color: var(--guarded); }
- .risk-dangerous { color: var(--dangerous); }
+ .risk-safe { background: rgba(62, 175, 124, 0.1); color: var(--safe); }
+ .risk-guarded { background: rgba(230, 126, 34, 0.1); color: var(--guarded); }
+ .risk-dangerous { background: rgba(192, 57, 43, 0.1); color: var(--dangerous); }
@@ -229,6 +229,9 @@
Dashboard
+
+
Dashboard
+
+ System Overview
+
Pending Actions
+
+ Active Incidents
@@ -276,6 +298,20 @@
+
+
+
+
+
+ Pending Approval
+ +
+
+ Active / History
+ +
@@ -291,11 +327,24 @@
+
+
+
+
+
+ Runtime Topology
+
+
+
+
+
@@ -335,6 +384,34 @@
}
}
+ async function postData(endpoint, data) {
+ try {
+ const res = await fetch(endpoint, {
+ method: 'POST',
+ headers: {'Content-Type': 'application/json'},
+ body: JSON.stringify(data)
+ });
+ return await res.json();
+ } catch (e) {
+ console.error('Post error:', endpoint, e);
+ return null;
+ }
+ }
+
+ async function mutateAction(id, status) {
+ const res = await postData('/action/mutate', {id, status});
+ if (res && res.status === 'ok') {
+ refreshData();
+ } else {
+ alert('Mutation failed');
+ }
+ }
+
+ function setOperatorMode(mode) {
+ console.log('Operator mode set to:', mode);
+ // In real system, this would call backend
+ }
+
function formatTime(ts) {
if (!ts) return 'N/A';
return new Date(ts * 1000).toLocaleString();
@@ -368,6 +445,53 @@
}
}
+ if (currentView === 'dashboard' || currentView === 'actions') {
+ const actions = await fetchData('/actions');
+ if (actions) {
+ if (currentView === 'dashboard') {
+ const dashActions = document.getElementById('dashboard-actions-summary');
+ const pendingCount = actions.pending.length;
+ dashActions.innerHTML = `
+
Pending
${pendingCount}
+ Running
${actions.running.length}
+ `;
+ }
+ if (currentView === 'actions') {
+ const pendingEl = document.getElementById('actions-pending');
+ const historyEl = document.getElementById('actions-history');
+
+ pendingEl.innerHTML = actions.pending.map(a => `
+
+
+ `).join('') || 'No pending actions.';
+
+ const history = [...actions.approved, ...actions.running, ...actions.completed, ...actions.failed];
+ historyEl.innerHTML = history.sort((a,b) => b.timestamp - a.timestamp).map(a => `
+
+
+ ${a.type.toUpperCase()}
+ ${a.risk_level}
+ ${a.description}
+Target
${a.target.node} ${a.target.service || ''}
+ Confidence
${Math.round(a.confidence*100)}%
+
+
+
+
+
+
+ `).join('') || 'No history.';
+ }
+ }
+ }
+
if (currentView === 'dashboard' || currentView === 'events') {
const incidents = await fetchData('/incidents');
if (currentView === 'dashboard') {
@@ -474,6 +598,64 @@
`).join('');
}
+ if (currentView === 'topology') {
+ const nodes = await fetchData('/nodes');
+ const services = await fetchData('/services');
+ const topMap = document.getElementById('topology-map');
+ if (nodes && services) {
+ topMap.innerHTML = nodes.map(node => {
+ const nodeServices = services.filter(s => s.node === node.hostname || s.node === node.id);
+ return `
+
+ ${a.type.toUpperCase()}
+ ${a.status}
+
+ ${a.description}
+ ${formatTime(a.timestamp)} | Target: ${a.target.node}
+ ${a.status === 'approved' ? `` : ''}
+
+
+ `;
+ }).join('');
+ }
+ }
+
+ if (currentView === 'correlation') {
+ const incidents = await fetchData('/incidents');
+ const actions = await fetchData('/actions');
+ const list = document.getElementById('correlation-chains');
+ if (incidents && actions) {
+ const allActions = Object.values(actions).flat();
+ list.innerHTML = incidents.map(inc => {
+ const related = allActions.filter(a => a.correlation_chain && a.correlation_chain.includes(inc.id));
+ return `
+
+
+ ${node.hostname}
+ ${node.health}
+ Capabilities
+ ${node.capabilities.join(', ')}
+ Services
+
+ ${nodeServices.length > 0 ? nodeServices.map(s => `
+
+
+ ${s.name}
+ ${s.health}
+
+ ${s.dependencies.length > 0 ? `dep: ${s.dependencies.join(', ')}
` : ''}
+ `).join('') : 'None
'}
+
+
+ `;
+ }).join('');
+ }
+ }
if (currentView === 'settings') {
const config = await fetchData('/config');
const content = document.getElementById('settings-content');
@@ -482,6 +664,8 @@
+
+ Incident: ${inc.id || 'INC-001'}
+ Active
+ ${inc.message}
+Related Actions
+ ${related.map(a => `
+
+ ${a.type} (${a.status})
+ ${a.description} +
+ `).join('') || '+ ${a.description} +
No actions yet
'}
+ ${config.auto_mode ? 'Enabled' : 'Disabled'}
Action Thresholds
${JSON.stringify(config.action_thresholds, null, 2)}
+ Telegram Integration
+ Ready for mobile approval flows. Hook: /api/v1/telegram/webhook
`;
}
diff --git a/webui/web.py b/webui/web.py
index 053ac1a..4727274 100644
--- a/webui/web.py
+++ b/webui/web.py
@@ -8,6 +8,7 @@ from pathlib import Path
STATE_DIR = Path("/opt/homelab/state")
EVENTS_DIR = Path("/opt/homelab/events")
WORLD_DIR = Path("/opt/homelab/world")
+ACTIONS_DIR = Path("/opt/homelab/actions")
EVENT_LOG = Path("/tmp/agent-events.log")
STATIC_DIR = Path(__file__).parent
REDIS_HOST = os.getenv("REDIS_HOST", "redis")
@@ -164,6 +165,55 @@ def current_events():
return sorted(events, key=lambda x: x.get("timestamp", 0), reverse=True)
+def current_actions():
+ actions = {}
+ statuses = ["pending", "approved", "running", "completed", "failed", "rejected"]
+ for status in statuses:
+ actions[status] = []
+ status_dir = ACTIONS_DIR / status
+ if status_dir.exists():
+ for f in status_dir.glob("*.json"):
+ data = read_json_file(f)
+ if data:
+ actions[status].append(data)
+ return actions
+
+
+def mutate_action(action_id, target_status):
+ statuses = ["pending", "approved", "running", "completed", "failed", "rejected"]
+ if target_status not in statuses:
+ return False, f"Invalid target status: {target_status}"
+
+ # Find where the action is
+ source_path = None
+ for status in statuses:
+ p = ACTIONS_DIR / status / f"{action_id}.json"
+ if p.exists():
+ source_path = p
+ break
+
+ if not source_path:
+ return False, f"Action {action_id} not found"
+
+ target_dir = ACTIONS_DIR / target_status
+ target_dir.mkdir(parents=True, exist_ok=True)
+ target_path = target_dir / f"{action_id}.json"
+
+ try:
+ data = json.loads(source_path.read_text())
+ data["status"] = target_status
+ data["last_mutation"] = os.path.getmtime(source_path) # or current time
+ import time
+ data["last_mutation"] = time.time()
+
+ target_path.write_text(json.dumps(data, indent=2))
+ if source_path != target_path:
+ source_path.unlink()
+ return True, "Success"
+ except Exception as e:
+ return False, str(e)
+
+
def send_json(status, payload, handler):
body = (json.dumps(payload) + "\n").encode("utf-8")
handler.send_response(status)
@@ -207,6 +257,10 @@ class Handler(BaseHTTPRequestHandler):
send_json(200, current_events(), self)
return
+ if self.path == "/actions":
+ send_json(200, current_actions(), self)
+ return
+
if self.path == "/logs":
print("LOGS endpoint called", flush=True)
body = ("\n".join(tail_lines(EVENT_LOG, 200)) + "\n").encode("utf-8")
@@ -236,6 +290,7 @@ class Handler(BaseHTTPRequestHandler):
"/auto-mode",
"/config",
"/events",
+ "/action/mutate",
):
self.send_error(404)
return
@@ -291,6 +346,19 @@ class Handler(BaseHTTPRequestHandler):
send_json(200, {"status": "sent"}, self)
return
+ if self.path == "/action/mutate":
+ action_id = payload.get("id")
+ target = payload.get("status")
+ if not action_id or not target:
+ self.send_error(400, "id and status are required")
+ return
+ success, msg = mutate_action(action_id, target)
+ if success:
+ send_json(200, {"status": "ok"}, self)
+ else:
+ self.send_error(500, msg)
+ return
+
if not command:
self.send_error(400, "command is required")
return