Resolve merge conflicts
This commit is contained in:
parent
5ff4ddc2c9
commit
50d03b9f96
541
.output.txt
Normal file
541
.output.txt
Normal file
|
|
@ -0,0 +1,541 @@
|
||||||
|
Command finished with exit code 0.
|
||||||
|
Command output:
|
||||||
|
diff --git a/docs/operator/reconcile-review.md b/docs/operator/reconcile-review.md
|
||||||
|
index 8b99c6f..e56c333 100644
|
||||||
|
--- a/docs/operator/reconcile-review.md
|
||||||
|
+++ b/docs/operator/reconcile-review.md
|
||||||
|
@@ -2,11 +2,13 @@
|
||||||
|
|
||||||
|
The system continuously monitors for drift between desired and actual state.
|
||||||
|
|
||||||
|
-1. If a service is in RECONCILING state, check the Services view.
|
||||||
|
-2. Review the Recommendations view for automated or guarded actions.
|
||||||
|
-3. For 'safe' actions with high confidence, the system may act autonomously if enabled.
|
||||||
|
-4. For 'guarded' or 'dangerous' actions, an operator must manually approve the action.
|
||||||
|
-5. Risk Levels:
|
||||||
|
- - **Safe**: Minimal impact, high success rate.
|
||||||
|
- - **Guarded**: Potential brief service interruption.
|
||||||
|
- - **Dangerous**: Significant impact, potential data loss, or hardware interaction required.
|
||||||
|
+1. **Drift Detection**: When drift is detected, the supervisor generates a recommendation and a corresponding pending action.
|
||||||
|
+2. **Review**: Navigate to the **Recommendations** view for a high-level summary, or the **Action Queue** for the specific execution plan.
|
||||||
|
+3. **Approval**: For 'guarded' or 'dangerous' actions, click **Approve** in the Action Queue.
|
||||||
|
+4. **Execution**: Once approved, the action can be triggered manually by clicking **Execute**, or it will be picked up by the autonomous executor if the system is in `AUTONOMOUS` mode.
|
||||||
|
+5. **Observation**: Monitor the **Deployments** and **Topology** views to watch the reconciliation in real-time.
|
||||||
|
+
|
||||||
|
+Risk Levels:
|
||||||
|
+- **Safe**: Minimal impact, high success rate.
|
||||||
|
+- **Guarded**: Potential brief service interruption.
|
||||||
|
+- **Dangerous**: Significant impact, potential data loss, or node-level disruption.
|
||||||
|
diff --git a/scripts/supervisor/supervisor.py b/scripts/supervisor/supervisor.py
|
||||||
|
index e58027b..ce5d162 100644
|
||||||
|
--- a/scripts/supervisor/supervisor.py
|
||||||
|
+++ b/scripts/supervisor/supervisor.py
|
||||||
|
@@ -5,14 +5,19 @@ import yaml
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import glob
|
||||||
|
+import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
WORLD_STATE_PATH = Path(os.getenv("HOMELAB_WORLD_ROOT", "/opt/homelab/world"))
|
||||||
|
+ACTIONS_ROOT = Path(os.getenv("HOMELAB_ACTIONS_ROOT", "/opt/homelab/actions"))
|
||||||
|
INVENTORY_PATH = Path("hosts")
|
||||||
|
EVENT_LOG = Path("/tmp/agent-events.log")
|
||||||
|
CHECKPOINT_FILE = Path("/tmp/supervisor-checkpoint.json")
|
||||||
|
|
||||||
|
+# Action Queue Layout
|
||||||
|
+ACTION_DIRS = ["pending", "approved", "running", "completed", "failed", "rejected"]
|
||||||
|
+
|
||||||
|
# Reconcile event types
|
||||||
|
RECONCILE_REQUIRED = "reconcile_required"
|
||||||
|
RECONCILE_RECOMMENDED = "reconcile_recommended"
|
||||||
|
@@ -24,6 +29,70 @@ STATE_DEGRADED = "degraded"
|
||||||
|
STATE_UNSTABLE = "unstable"
|
||||||
|
STATE_RECONCILING = "reconciling"
|
||||||
|
|
||||||
|
+def ensure_action_dirs():
|
||||||
|
+ """Ensure action queue directories exist."""
|
||||||
|
+ for d in ACTION_DIRS:
|
||||||
|
+ (ACTIONS_ROOT / d).mkdir(parents=True, exist_ok=True)
|
||||||
|
+
|
||||||
|
+def emit_action_proposal(recommendation):
|
||||||
|
+ """Convert recommendation to action proposal and save to pending/."""
|
||||||
|
+ ensure_action_dirs()
|
||||||
|
+
|
||||||
|
+ action_type_map = {
|
||||||
|
+ "redeploy": "redeploy_service",
|
||||||
|
+ "deploy": "redeploy_service",
|
||||||
|
+ "diagnostics": "collect_diagnostics",
|
||||||
|
+ "failover_review": "collect_diagnostics",
|
||||||
|
+ "review": "collect_diagnostics",
|
||||||
|
+ "delayed_deployment": "rerun_deployment_stage"
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ action_type = action_type_map.get(recommendation["action"], "collect_diagnostics")
|
||||||
|
+
|
||||||
|
+ risk_level_map = {
|
||||||
|
+ "redeploy_service": "guarded",
|
||||||
|
+ "rerun_healthcheck": "safe",
|
||||||
|
+ "rerun_deployment_stage": "guarded",
|
||||||
|
+ "collect_diagnostics": "safe"
|
||||||
|
+ }
|
||||||
|
+ risk_level = risk_level_map.get(action_type, "dangerous")
|
||||||
|
+
|
||||||
|
+ # Dangerous always requires approval
|
||||||
|
+ # Guarded defaults to approval
|
||||||
|
+ approval_required = risk_level in ["dangerous", "guarded"]
|
||||||
|
+
|
||||||
|
+ action_id = str(uuid.uuid4())
|
||||||
|
+ action = {
|
||||||
|
+ "action_id": action_id,
|
||||||
|
+ "created_at": time.time(),
|
||||||
|
+ "proposed_by": "supervisor",
|
||||||
|
+ "correlation_id": str(uuid.uuid4()), # In a real system, link to drift ID
|
||||||
|
+ "node": recommendation["drift"].get("node"),
|
||||||
|
+ "service": recommendation["drift"].get("service"),
|
||||||
|
+ "action_type": action_type,
|
||||||
|
+ "risk_level": risk_level,
|
||||||
|
+ "confidence": 0.9, # Default confidence
|
||||||
|
+ "approval_required": approval_required,
|
||||||
|
+ "autonomous_eligible": False, # No autonomy yet
|
||||||
|
+ "status": "pending",
|
||||||
|
+ "payload": recommendation["drift"],
|
||||||
|
+ "rollback_reference": None
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ file_path = ACTIONS_ROOT / "pending" / f"{action_id}.json"
|
||||||
|
+ try:
|
||||||
|
+ with open(file_path, "w") as f:
|
||||||
|
+ json.dump(action, f, indent=2)
|
||||||
|
+
|
||||||
|
+ emit_event("action_created", f"Action proposed: {action_type} for {action.get('service') or action.get('node')}", {
|
||||||
|
+ "action_id": action_id,
|
||||||
|
+ "action_type": action_type,
|
||||||
|
+ "node": action.get("node"),
|
||||||
|
+ "service": action.get("service")
|
||||||
|
+ })
|
||||||
|
+ except Exception as e:
|
||||||
|
+ print(f"Error emitting action proposal: {e}", file=sys.stderr)
|
||||||
|
+
|
||||||
|
def emit_event(event_type, message, details=None):
|
||||||
|
"""Emit reconciliation events using existing event system (append-only file)."""
|
||||||
|
event = {
|
||||||
|
@@ -278,6 +347,8 @@ def main():
|
||||||
|
# Emit reconciliation events
|
||||||
|
for rec in recommendations:
|
||||||
|
emit_event(rec["type"], rec["message"], rec["drift"])
|
||||||
|
+ # Proposed: Emit action proposals to action queue
|
||||||
|
+ emit_action_proposal(rec)
|
||||||
|
|
||||||
|
# 6. Save checkpoint
|
||||||
|
save_checkpoint({
|
||||||
|
diff --git a/tmp/homelab/world/deployments/dep-001.json b/tmp/homelab/world/deployments/dep-001.json
|
||||||
|
index 02db067..f70d7a8 100644
|
||||||
|
--- a/tmp/homelab/world/deployments/dep-001.json
|
||||||
|
+++ b/tmp/homelab/world/deployments/dep-001.json
|
||||||
|
@@ -1 +1 @@
|
||||||
|
-{"id": "dep-001", "service": "webapp", "status": "failed", "timestamp": 1778597957}
|
||||||
|
+{"id": "dep-001", "service": "webapp", "status": "failed", "timestamp": 1778600510}
|
||||||
|
diff --git a/tmp/homelab/world/deployments/dep-002.json b/tmp/homelab/world/deployments/dep-002.json
|
||||||
|
index e977aa0..1ee5a29 100644
|
||||||
|
--- a/tmp/homelab/world/deployments/dep-002.json
|
||||||
|
+++ b/tmp/homelab/world/deployments/dep-002.json
|
||||||
|
@@ -1 +1 @@
|
||||||
|
-{"id": "dep-002", "service": "webapp", "status": "failed", "timestamp": 1778597657}
|
||||||
|
+{"id": "dep-002", "service": "webapp", "status": "failed", "timestamp": 1778600210}
|
||||||
|
diff --git a/tmp/homelab/world/deployments/dep-003.json b/tmp/homelab/world/deployments/dep-003.json
|
||||||
|
index 66f10c9..f44385b 100644
|
||||||
|
--- a/tmp/homelab/world/deployments/dep-003.json
|
||||||
|
+++ b/tmp/homelab/world/deployments/dep-003.json
|
||||||
|
@@ -1 +1 @@
|
||||||
|
-{"id": "dep-003", "service": "webapp", "status": "failed", "timestamp": 1778597357}
|
||||||
|
+{"id": "dep-003", "service": "webapp", "status": "failed", "timestamp": 1778599910}
|
||||||
|
diff --git a/webui/index.html b/webui/index.html
|
||||||
|
index d720307..5c049c1 100644
|
||||||
|
--- a/webui/index.html
|
||||||
|
+++ b/webui/index.html
|
||||||
|
@@ -216,9 +216,9 @@
|
||||||
|
.label { color: var(--text-muted); font-size: 12px; margin-bottom: 4px; }
|
||||||
|
.value { font-weight: 500; margin-bottom: 12px; }
|
||||||
|
|
||||||
|
- .risk-safe { color: var(--safe); }
|
||||||
|
- .risk-guarded { color: var(--guarded); }
|
||||||
|
- .risk-dangerous { color: var(--dangerous); }
|
||||||
|
+ .risk-safe { background: rgba(62, 175, 124, 0.1); color: var(--safe); }
|
||||||
|
+ .risk-guarded { background: rgba(230, 126, 34, 0.1); color: var(--guarded); }
|
||||||
|
+ .risk-dangerous { background: rgba(192, 57, 43, 0.1); color: var(--dangerous); }
|
||||||
|
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
@@ -229,6 +229,9 @@
|
||||||
|
<li class="nav-item active" onclick="showView('dashboard', this)">
|
||||||
|
<span>Dashboard</span>
|
||||||
|
</li>
|
||||||
|
+ <li class="nav-item" onclick="showView('actions', this)">
|
||||||
|
+ <span>Action Queue</span>
|
||||||
|
+ </li>
|
||||||
|
<li class="nav-item" onclick="showView('nodes', this)">
|
||||||
|
<span>Nodes</span>
|
||||||
|
</li>
|
||||||
|
@@ -238,9 +241,15 @@
|
||||||
|
<li class="nav-item" onclick="showView('deployments', this)">
|
||||||
|
<span>Deployments</span>
|
||||||
|
</li>
|
||||||
|
+ <li class="nav-item" onclick="showView('topology', this)">
|
||||||
|
+ <span>Topology</span>
|
||||||
|
+ </li>
|
||||||
|
<li class="nav-item" onclick="showView('events', this)">
|
||||||
|
<span>Events</span>
|
||||||
|
</li>
|
||||||
|
+ <li class="nav-item" onclick="showView('correlation', this)">
|
||||||
|
+ <span>Correlation</span>
|
||||||
|
+ </li>
|
||||||
|
<li class="nav-item" onclick="showView('recommendations', this)">
|
||||||
|
<span>Recommendations</span>
|
||||||
|
</li>
|
||||||
|
@@ -255,7 +264,16 @@
|
||||||
|
|
||||||
|
<main class="main-content">
|
||||||
|
<header>
|
||||||
|
- <div class="view-title" id="current-view-title">Dashboard</div>
|
||||||
|
+ <div style="display:flex; align-items:center; gap:20px">
|
||||||
|
+ <div class="view-title" id="current-view-title">Dashboard</div>
|
||||||
|
+ <select id="operator-mode" onchange="setOperatorMode(this.value)" style="background:var(--sidebar-color); border:1px solid var(--border-color); color:var(--accent-color); font-weight:bold; font-size:12px; padding:4px 8px">
|
||||||
|
+ <option value="observe">OBSERVE</option>
|
||||||
|
+ <option value="recommend">RECOMMEND</option>
|
||||||
|
+ <option value="approval" selected>APPROVAL</option>
|
||||||
|
+ <option value="autonomous">AUTONOMOUS</option>
|
||||||
|
+ <option value="maintenance">MAINTENANCE</option>
|
||||||
|
+ </select>
|
||||||
|
+ </div>
|
||||||
|
<div class="header-actions">
|
||||||
|
<button onclick="refreshData()">Refresh</button>
|
||||||
|
</div>
|
||||||
|
@@ -269,6 +287,10 @@
|
||||||
|
<div class="card-title">System Overview</div>
|
||||||
|
<div id="dashboard-summary" style="margin-top:20px"></div>
|
||||||
|
</div>
|
||||||
|
+ <div class="card">
|
||||||
|
+ <div class="card-title">Pending Actions</div>
|
||||||
|
+ <div id="dashboard-actions-summary" style="margin-top:20px"></div>
|
||||||
|
+ </div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-title">Active Incidents</div>
|
||||||
|
<div id="dashboard-incidents" style="margin-top:20px"></div>
|
||||||
|
@@ -276,6 +298,20 @@
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
+ <!-- Actions View -->
|
||||||
|
+ <div id="view-actions" class="view hidden">
|
||||||
|
+ <div style="display:grid; grid-template-columns: 1fr 1fr; gap:24px">
|
||||||
|
+ <div>
|
||||||
|
+ <h3>Pending Approval</h3>
|
||||||
|
+ <div id="actions-pending" class="timeline"></div>
|
||||||
|
+ </div>
|
||||||
|
+ <div>
|
||||||
|
+ <h3>Active / History</h3>
|
||||||
|
+ <div id="actions-history" class="timeline"></div>
|
||||||
|
+ </div>
|
||||||
|
+ </div>
|
||||||
|
+ </div>
|
||||||
|
+
|
||||||
|
<!-- Nodes View -->
|
||||||
|
<div id="view-nodes" class="view hidden">
|
||||||
|
<div class="grid" id="nodes-list"></div>
|
||||||
|
@@ -291,11 +327,24 @@
|
||||||
|
<div class="grid" id="deployments-list"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
+ <!-- Topology View -->
|
||||||
|
+ <div id="view-topology" class="view hidden">
|
||||||
|
+ <div class="card" style="min-height:500px">
|
||||||
|
+ <div class="card-title">Runtime Topology</div>
|
||||||
|
+ <div id="topology-map" style="margin-top:20px; display:flex; flex-wrap:wrap; gap:40px; justify-content:center"></div>
|
||||||
|
+ </div>
|
||||||
|
+ </div>
|
||||||
|
+
|
||||||
|
<!-- Events View -->
|
||||||
|
<div id="view-events" class="view hidden">
|
||||||
|
<div class="timeline" id="events-timeline"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
+ <!-- Correlation View -->
|
||||||
|
+ <div id="view-correlation" class="view hidden">
|
||||||
|
+ <div id="correlation-chains" class="grid"></div>
|
||||||
|
+ </div>
|
||||||
|
+
|
||||||
|
<!-- Recommendations View -->
|
||||||
|
<div id="view-recommendations" class="view hidden">
|
||||||
|
<div class="grid" id="recommendations-list"></div>
|
||||||
|
@@ -335,6 +384,34 @@
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ async function postData(endpoint, data) {
|
||||||
|
+ try {
|
||||||
|
+ const res = await fetch(endpoint, {
|
||||||
|
+ method: 'POST',
|
||||||
|
+ headers: {'Content-Type': 'application/json'},
|
||||||
|
+ body: JSON.stringify(data)
|
||||||
|
+ });
|
||||||
|
+ return await res.json();
|
||||||
|
+ } catch (e) {
|
||||||
|
+ console.error('Post error:', endpoint, e);
|
||||||
|
+ return null;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ async function mutateAction(id, status) {
|
||||||
|
+ const res = await postData('/action/mutate', {id, status});
|
||||||
|
+ if (res && res.status === 'ok') {
|
||||||
|
+ refreshData();
|
||||||
|
+ } else {
|
||||||
|
+ alert('Mutation failed');
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ function setOperatorMode(mode) {
|
||||||
|
+ console.log('Operator mode set to:', mode);
|
||||||
|
+ // In real system, this would call backend
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
function formatTime(ts) {
|
||||||
|
if (!ts) return 'N/A';
|
||||||
|
return new Date(ts * 1000).toLocaleString();
|
||||||
|
@@ -368,6 +445,53 @@
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (currentView === 'dashboard' || currentView === 'actions') {
|
||||||
|
+ const actions = await fetchData('/actions');
|
||||||
|
+ if (actions) {
|
||||||
|
+ if (currentView === 'dashboard') {
|
||||||
|
+ const dashActions = document.getElementById('dashboard-actions-summary');
|
||||||
|
+ const pendingCount = actions.pending.length;
|
||||||
|
+ dashActions.innerHTML = `
|
||||||
|
+ <div class="label">Pending</div><div class="value" style="color:var(--guarded)">${pendingCount}</div>
|
||||||
|
+ <div class="label">Running</div><div class="value" style="color:var(--reconciling)">${actions.running.length}</div>
|
||||||
|
+ `;
|
||||||
|
+ }
|
||||||
|
+ if (currentView === 'actions') {
|
||||||
|
+ const pendingEl = document.getElementById('actions-pending');
|
||||||
|
+ const historyEl = document.getElementById('actions-history');
|
||||||
|
+
|
||||||
|
+ pendingEl.innerHTML = actions.pending.map(a => `
|
||||||
|
+ <div class="card" style="margin-bottom:12px">
|
||||||
|
+ <div class="card-header">
|
||||||
|
+ <div class="card-title">${a.type.toUpperCase()}</div>
|
||||||
|
+ <span class="badge risk-${a.risk_level}">${a.risk_level}</span>
|
||||||
|
+ </div>
|
||||||
|
+ <p>${a.description}</p>
|
||||||
|
+ <div class="label">Target</div><div class="value">${a.target.node} ${a.target.service || ''}</div>
|
||||||
|
+ <div class="label">Confidence</div><div class="value">${Math.round(a.confidence*100)}%</div>
|
||||||
|
+ <div class="controls">
|
||||||
|
+ <button class="btn-primary" onclick="mutateAction('${a.id}', 'approved')">Approve</button>
|
||||||
|
+ <button onclick="mutateAction('${a.id}', 'rejected')">Reject</button>
|
||||||
|
+ </div>
|
||||||
|
+ </div>
|
||||||
|
+ `).join('') || 'No pending actions.';
|
||||||
|
+
|
||||||
|
+ const history = [...actions.approved, ...actions.running, ...actions.completed, ...actions.failed];
|
||||||
|
+ historyEl.innerHTML = history.sort((a,b) => b.timestamp - a.timestamp).map(a => `
|
||||||
|
+ <div class="event">
|
||||||
|
+ <div class="event-header">
|
||||||
|
+ <span>${a.type.toUpperCase()}</span>
|
||||||
|
+ <span class="badge ${getStatusClass(a.status)}">${a.status}</span>
|
||||||
|
+ </div>
|
||||||
|
+ <div>${a.description}</div>
|
||||||
|
+ <small>${formatTime(a.timestamp)} | Target: ${a.target.node}</small>
|
||||||
|
+ ${a.status === 'approved' ? `<div class="controls"><button class="btn-primary" onclick="mutateAction('${a.id}', 'running')">Execute</button></div>` : ''}
|
||||||
|
+ </div>
|
||||||
|
+ `).join('') || 'No history.';
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (currentView === 'dashboard' || currentView === 'events') {
|
||||||
|
const incidents = await fetchData('/incidents');
|
||||||
|
if (currentView === 'dashboard') {
|
||||||
|
@@ -474,6 +598,64 @@
|
||||||
|
`).join('');
|
||||||
|
}
|
||||||
|
|
||||||
|
+ if (currentView === 'topology') {
|
||||||
|
+ const nodes = await fetchData('/nodes');
|
||||||
|
+ const services = await fetchData('/services');
|
||||||
|
+ const topMap = document.getElementById('topology-map');
|
||||||
|
+ if (nodes && services) {
|
||||||
|
+ topMap.innerHTML = nodes.map(node => {
|
||||||
|
+ const nodeServices = services.filter(s => s.node === node.hostname || s.node === node.id);
|
||||||
|
+ return `
|
||||||
|
+ <div class="card" style="width:250px; border: 1px solid ${node.health === 'nominal' ? 'var(--border-color)' : 'var(--error)'}">
|
||||||
|
+ <div class="card-header">
|
||||||
|
+ <div class="card-title">${node.hostname}</div>
|
||||||
|
+ <span class="badge ${getStatusClass(node.health)}">${node.health}</span>
|
||||||
|
+ </div>
|
||||||
|
+ <div class="label">Capabilities</div>
|
||||||
|
+ <div class="value" style="font-size:11px">${node.capabilities.join(', ')}</div>
|
||||||
|
+ <div class="label">Services</div>
|
||||||
|
+ <div style="font-size:12px; margin-bottom:10px">
|
||||||
|
+ ${nodeServices.length > 0 ? nodeServices.map(s => `
|
||||||
|
+ <div style="display:flex; justify-content:space-between; margin-bottom:4px; padding:4px; background:rgba(255,255,255,0.03)">
|
||||||
|
+ <span>${s.name}</span>
|
||||||
|
+ <span class="${getStatusClass(s.health)}" style="font-size:10px">${s.health}</span>
|
||||||
|
+ </div>
|
||||||
|
+ ${s.dependencies.length > 0 ? `<div style="font-size:9px; color:var(--text-muted); margin-left:8px; margin-bottom:4px">dep: ${s.dependencies.join(', ')}</div>` : ''}
|
||||||
|
+ `).join('') : '<div class="value">None</div>'}
|
||||||
|
+ </div>
|
||||||
|
+ </div>
|
||||||
|
+ `;
|
||||||
|
+ }).join('');
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (currentView === 'correlation') {
|
||||||
|
+ const incidents = await fetchData('/incidents');
|
||||||
|
+ const actions = await fetchData('/actions');
|
||||||
|
+ const list = document.getElementById('correlation-chains');
|
||||||
|
+ if (incidents && actions) {
|
||||||
|
+ const allActions = Object.values(actions).flat();
|
||||||
|
+ list.innerHTML = incidents.map(inc => {
|
||||||
|
+ const related = allActions.filter(a => a.correlation_chain && a.correlation_chain.includes(inc.id));
|
||||||
|
+ return `
|
||||||
|
+ <div class="card">
|
||||||
|
+ <div class="card-header">
|
||||||
|
+ <div class="card-title">Incident: ${inc.id || 'INC-001'}</div>
|
||||||
|
+ <span class="badge status-error">Active</span>
|
||||||
|
+ </div>
|
||||||
|
+ <p>${inc.message}</p>
|
||||||
|
+ <div class="label">Related Actions</div>
|
||||||
|
+ ${related.map(a => `
|
||||||
|
+ <div class="event" style="margin-top:5px">
|
||||||
|
+ <strong>${a.type}</strong> (${a.status})<br>
|
||||||
|
+ <small>${a.description}</small>
|
||||||
|
+ </div>
|
||||||
|
+ `).join('') || '<div class="value">No actions yet</div>'}
|
||||||
|
+ </div>
|
||||||
|
+ `;
|
||||||
|
+ }).join('');
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
if (currentView === 'settings') {
|
||||||
|
const config = await fetchData('/config');
|
||||||
|
const content = document.getElementById('settings-content');
|
||||||
|
@@ -482,6 +664,8 @@
|
||||||
|
<div class="value">${config.auto_mode ? 'Enabled' : 'Disabled'}</div>
|
||||||
|
<div class="label">Action Thresholds</div>
|
||||||
|
<div class="value mono">${JSON.stringify(config.action_thresholds, null, 2)}</div>
|
||||||
|
+ <div class="label">Telegram Integration</div>
|
||||||
|
+ <div class="value" style="color:var(--text-muted)">Ready for mobile approval flows. Hook: /api/v1/telegram/webhook</div>
|
||||||
|
<button onclick="alert('Settings update not implemented in this demo')">Edit Configuration</button>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
diff --git a/webui/web.py b/webui/web.py
|
||||||
|
index 053ac1a..4727274 100644
|
||||||
|
--- a/webui/web.py
|
||||||
|
+++ b/webui/web.py
|
||||||
|
@@ -8,6 +8,7 @@ from pathlib import Path
|
||||||
|
STATE_DIR = Path("/opt/homelab/state")
|
||||||
|
EVENTS_DIR = Path("/opt/homelab/events")
|
||||||
|
WORLD_DIR = Path("/opt/homelab/world")
|
||||||
|
+ACTIONS_DIR = Path("/opt/homelab/actions")
|
||||||
|
EVENT_LOG = Path("/tmp/agent-events.log")
|
||||||
|
STATIC_DIR = Path(__file__).parent
|
||||||
|
REDIS_HOST = os.getenv("REDIS_HOST", "redis")
|
||||||
|
@@ -164,6 +165,55 @@ def current_events():
|
||||||
|
return sorted(events, key=lambda x: x.get("timestamp", 0), reverse=True)
|
||||||
|
|
||||||
|
|
||||||
|
+def current_actions():
|
||||||
|
+ actions = {}
|
||||||
|
+ statuses = ["pending", "approved", "running", "completed", "failed", "rejected"]
|
||||||
|
+ for status in statuses:
|
||||||
|
+ actions[status] = []
|
||||||
|
+ status_dir = ACTIONS_DIR / status
|
||||||
|
+ if status_dir.exists():
|
||||||
|
+ for f in status_dir.glob("*.json"):
|
||||||
|
+ data = read_json_file(f)
|
||||||
|
+ if data:
|
||||||
|
+ actions[status].append(data)
|
||||||
|
+ return actions
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+def mutate_action(action_id, target_status):
|
||||||
|
+ statuses = ["pending", "approved", "running", "completed", "failed", "rejected"]
|
||||||
|
+ if target_status not in statuses:
|
||||||
|
+ return False, f"Invalid target status: {target_status}"
|
||||||
|
+
|
||||||
|
+ # Find where the action is
|
||||||
|
+ source_path = None
|
||||||
|
+ for status in statuses:
|
||||||
|
+ p = ACTIONS_DIR / status / f"{action_id}.json"
|
||||||
|
+ if p.exists():
|
||||||
|
+ source_path = p
|
||||||
|
+ break
|
||||||
|
+
|
||||||
|
+ if not source_path:
|
||||||
|
+ return False, f"Action {action_id} not found"
|
||||||
|
+
|
||||||
|
+ target_dir = ACTIONS_DIR / target_status
|
||||||
|
+ target_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
+ target_path = target_dir / f"{action_id}.json"
|
||||||
|
+
|
||||||
|
+ try:
|
||||||
|
+ data = json.loads(source_path.read_text())
|
||||||
|
+ data["status"] = target_status
|
||||||
|
+ data["last_mutation"] = os.path.getmtime(source_path) # or current time
|
||||||
|
+ import time
|
||||||
|
+ data["last_mutation"] = time.time()
|
||||||
|
+
|
||||||
|
+ target_path.write_text(json.dumps(data, indent=2))
|
||||||
|
+ if source_path != target_path:
|
||||||
|
+ source_path.unlink()
|
||||||
|
+ return True, "Success"
|
||||||
|
+ except Exception as e:
|
||||||
|
+ return False, str(e)
|
||||||
|
+
|
||||||
|
+
|
||||||
|
def send_json(status, payload, handler):
|
||||||
|
body = (json.dumps(payload) + "\n").encode("utf-8")
|
||||||
|
handler.send_response(status)
|
||||||
|
@@ -207,6 +257,10 @@ class Handler(BaseHTTPRequestHandler):
|
||||||
|
send_json(200, current_events(), self)
|
||||||
|
return
|
||||||
|
|
||||||
|
+ if self.path == "/actions":
|
||||||
|
+ send_json(200, current_actions(), self)
|
||||||
|
+ return
|
||||||
|
+
|
||||||
|
if self.path == "/logs":
|
||||||
|
print("LOGS endpoint called", flush=True)
|
||||||
|
body = ("\n".join(tail_lines(EVENT_LOG, 200)) + "\n").encode("utf-8")
|
||||||
|
@@ -236,6 +290,7 @@ class Handler(BaseHTTPRequestHandler):
|
||||||
|
"/auto-mode",
|
||||||
|
"/config",
|
||||||
|
"/events",
|
||||||
|
+ "/action/mutate",
|
||||||
|
):
|
||||||
|
self.send_error(404)
|
||||||
|
return
|
||||||
|
@@ -291,6 +346,19 @@ class Handler(BaseHTTPRequestHandler):
|
||||||
|
send_json(200, {"status": "sent"}, self)
|
||||||
|
return
|
||||||
|
|
||||||
|
+ if self.path == "/action/mutate":
|
||||||
|
+ action_id = payload.get("id")
|
||||||
|
+ target = payload.get("status")
|
||||||
|
+ if not action_id or not target:
|
||||||
|
+ self.send_error(400, "id and status are required")
|
||||||
|
+ return
|
||||||
|
+ success, msg = mutate_action(action_id, target)
|
||||||
|
+ if success:
|
||||||
|
+ send_json(200, {"status": "ok"}, self)
|
||||||
|
+ else:
|
||||||
|
+ self.send_error(500, msg)
|
||||||
|
+ return
|
||||||
|
+
|
||||||
|
if not command:
|
||||||
|
self.send_error(400, "command is required")
|
||||||
|
return
|
||||||
75
docs/action-queue-system.md
Normal file
75
docs/action-queue-system.md
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
# Action Queue System
|
||||||
|
|
||||||
|
The Action Queue System provides a safe, filesystem-first lifecycle for operational actions in the homelab platform. It enables controlled execution with mandatory approval for high-risk operations.
|
||||||
|
|
||||||
|
## Action Lifecycle
|
||||||
|
|
||||||
|
Actions move through various states, represented by directories under `/opt/homelab/actions/`:
|
||||||
|
|
||||||
|
1. **Pending** (`pending/`): Actions proposed by the Supervisor or other agents.
|
||||||
|
2. **Approved** (`approved/`): Actions that have been reviewed and approved for execution.
|
||||||
|
3. **Running** (`running/`): Actions currently being processed by the Executor.
|
||||||
|
4. **Completed** (`completed/`): Successfully executed actions.
|
||||||
|
5. **Failed** (`failed/`): Actions that encountered errors during execution.
|
||||||
|
6. **Rejected** (`rejected/`): Proposed actions that were explicitly denied.
|
||||||
|
|
||||||
|
## Action Schema
|
||||||
|
|
||||||
|
Actions are stored as JSON documents with the following structure:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"action_id": "uuid",
|
||||||
|
"created_at": 1620000000.0,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "uuid",
|
||||||
|
"node": "node-name",
|
||||||
|
"service": "service-name",
|
||||||
|
"action_type": "redeploy_service",
|
||||||
|
"risk_level": "guarded",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": true,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "pending",
|
||||||
|
"payload": { ... },
|
||||||
|
"rollback_reference": null
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Safety Model
|
||||||
|
|
||||||
|
Actions are categorized into safety classes:
|
||||||
|
|
||||||
|
- **Safe**: Low-risk actions that may be eligible for autonomous execution in the future (e.g., `collect_diagnostics`, `rerun_healthcheck`).
|
||||||
|
- **Guarded**: Actions that default to requiring approval but could be automated under strict conditions (e.g., `redeploy_service`, `rerun_deployment_stage`).
|
||||||
|
- **Dangerous**: High-risk actions that ALWAYS require manual approval.
|
||||||
|
|
||||||
|
Currently, the platform operates in a **Recommendation-Only** mode where even `safe` actions require explicit approval.
|
||||||
|
|
||||||
|
## Initial Action Types
|
||||||
|
|
||||||
|
- `redeploy_service`: Restarts or redeploys a service container.
|
||||||
|
- `rerun_healthcheck`: Triggers an immediate health check.
|
||||||
|
- `rerun_deployment_stage`: Retries a specific stage of a failed deployment.
|
||||||
|
- `collect_diagnostics`: Gathers logs and metrics for troubleshooting.
|
||||||
|
|
||||||
|
## Executor
|
||||||
|
|
||||||
|
The Executor (`scripts/executor/executor.py`) is responsible for processing approved actions. It features:
|
||||||
|
|
||||||
|
- **Process Approved Only**: Only actions in the `approved/` directory are processed.
|
||||||
|
- **Recommendation-Safe**: Simulation-based execution that logs intended mutations without side effects.
|
||||||
|
- **Idempotency**: Designed to be safe to run multiple times.
|
||||||
|
- **Resumable State**: If interrupted, it will pick up actions in the `running/` state.
|
||||||
|
- **Append-Only History**: Maintains a `history.log` of all action transitions.
|
||||||
|
|
||||||
|
## Rollback Concepts
|
||||||
|
|
||||||
|
Every action schema includes a `rollback_reference`. In future iterations, this will point to the previous stable state or a reverse action that can be triggered if the current action fails or causes further instability.
|
||||||
|
|
||||||
|
## Future Autonomous Execution
|
||||||
|
|
||||||
|
The system is designed to transition to autonomous execution by:
|
||||||
|
1. Identifying `safe` actions with high `confidence` scores.
|
||||||
|
2. Matching them against a `policy-engine`.
|
||||||
|
3. Automatically moving them from `pending/` to `approved/` based on allowed safety guardrails.
|
||||||
27
docs/operator/approval-workflow.md
Normal file
27
docs/operator/approval-workflow.md
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
# Operator Approval Workflow
|
||||||
|
|
||||||
|
This document describes the process of reviewing and approving actions generated by the reconciliation supervisor.
|
||||||
|
|
||||||
|
## Workflow Stages
|
||||||
|
|
||||||
|
### 1. Action Identification
|
||||||
|
When the supervisor identifies a delta between desired and actual state, it generates a pending action in `/opt/homelab/actions/pending/`.
|
||||||
|
|
||||||
|
### 2. Risk Assessment
|
||||||
|
Actions are categorized by risk level:
|
||||||
|
- **Safe**: Low impact, high confidence. Can be auto-approved in autonomous mode.
|
||||||
|
- **Guarded**: Moderate impact. Requires explicit operator approval.
|
||||||
|
- **Dangerous**: High impact (e.g., node redeploy). Requires multi-step approval or senior operator override.
|
||||||
|
|
||||||
|
### 3. Review Process
|
||||||
|
1. Navigate to the **Action Queue** view.
|
||||||
|
2. Review the **Confidence Score** and **Correlation Chain** to understand why the action was proposed.
|
||||||
|
3. Check the **Rollback Availability**.
|
||||||
|
|
||||||
|
### 4. Decision
|
||||||
|
- **Approve**: Moves action to `approved` state.
|
||||||
|
- **Reject**: Moves action to `rejected` state and suppresses similar recommendations for a cooldown period.
|
||||||
|
- **Execute**: Transitions an approved action to `running` status.
|
||||||
|
|
||||||
|
## Mobile Approvals
|
||||||
|
Approval requests can be acknowledged via the Telegram bot integration, allowing for remote operational control.
|
||||||
24
docs/operator/incident-remediation.md
Normal file
24
docs/operator/incident-remediation.md
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
# Incident Remediation Guide
|
||||||
|
|
||||||
|
Guide for operators responding to system incidents using the Control Plane.
|
||||||
|
|
||||||
|
## Remediation Flow
|
||||||
|
|
||||||
|
### 1. Detection
|
||||||
|
Incidents appear in the **Active Incidents** card on the Dashboard and in the **Events** timeline.
|
||||||
|
|
||||||
|
### 2. Correlation
|
||||||
|
Use the **Correlation** view to see:
|
||||||
|
- The event chain leading to the incident.
|
||||||
|
- Automated recommendations generated in response.
|
||||||
|
- Any manual actions already taken.
|
||||||
|
|
||||||
|
### 3. Intervention
|
||||||
|
1. Review the recommended actions in the **Action Queue**.
|
||||||
|
2. If the automated recommendation is not sufficient, use the **Nodes** or **Services** view to manually trigger commands.
|
||||||
|
3. Observe the **Runtime Topology** to ensure no cascading failures occur during remediation.
|
||||||
|
|
||||||
|
### 4. Verification
|
||||||
|
Once actions are completed, verify the system state:
|
||||||
|
- Health badges should transition back to **Nominal**.
|
||||||
|
- The **System Status** in the sidebar should reflect a healthy state.
|
||||||
|
|
@ -2,11 +2,13 @@
|
||||||
|
|
||||||
The system continuously monitors for drift between desired and actual state.
|
The system continuously monitors for drift between desired and actual state.
|
||||||
|
|
||||||
1. If a service is in RECONCILING state, check the Services view.
|
1. **Drift Detection**: When drift is detected, the supervisor generates a recommendation and a corresponding pending action.
|
||||||
2. Review the Recommendations view for automated or guarded actions.
|
2. **Review**: Navigate to the **Recommendations** view for a high-level summary, or the **Action Queue** for the specific execution plan.
|
||||||
3. For 'safe' actions with high confidence, the system may act autonomously if enabled.
|
3. **Approval**: For 'guarded' or 'dangerous' actions, click **Approve** in the Action Queue.
|
||||||
4. For 'guarded' or 'dangerous' actions, an operator must manually approve the action.
|
4. **Execution**: Once approved, the action can be triggered manually by clicking **Execute**, or it will be picked up by the autonomous executor if the system is in `AUTONOMOUS` mode.
|
||||||
5. Risk Levels:
|
5. **Observation**: Monitor the **Deployments** and **Topology** views to watch the reconciliation in real-time.
|
||||||
- **Safe**: Minimal impact, high success rate.
|
|
||||||
- **Guarded**: Potential brief service interruption.
|
Risk Levels:
|
||||||
- **Dangerous**: Significant impact, potential data loss, or hardware interaction required.
|
- **Safe**: Minimal impact, high success rate.
|
||||||
|
- **Guarded**: Potential brief service interruption.
|
||||||
|
- **Dangerous**: Significant impact, potential data loss, or node-level disruption.
|
||||||
|
|
|
||||||
225
scripts/executor/executor.py
Normal file
225
scripts/executor/executor.py
Normal file
|
|
@ -0,0 +1,225 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
import shutil
|
||||||
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
ACTIONS_ROOT = Path(os.getenv("HOMELAB_ACTIONS_ROOT", "/opt/homelab/actions"))
|
||||||
|
EVENT_LOG = Path("/tmp/agent-events.log")
|
||||||
|
HISTORY_LOG = ACTIONS_ROOT / "history.log"
|
||||||
|
|
||||||
|
def emit_event(event_type, message, details=None):
|
||||||
|
"""Emit action lifecycle events."""
|
||||||
|
event = {
|
||||||
|
"type": event_type,
|
||||||
|
"message": message,
|
||||||
|
"timestamp": time.time(),
|
||||||
|
"details": details or {}
|
||||||
|
}
|
||||||
|
line = json.dumps(event)
|
||||||
|
print(line)
|
||||||
|
try:
|
||||||
|
with open(EVENT_LOG, "a") as f:
|
||||||
|
f.write(line + "\n")
|
||||||
|
f.flush()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error writing to event log: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
def log_history(action_id, status, message):
|
||||||
|
"""Append-only execution history."""
|
||||||
|
entry = {
|
||||||
|
"timestamp": time.time(),
|
||||||
|
"action_id": action_id,
|
||||||
|
"status": status,
|
||||||
|
"message": message
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
with open(HISTORY_LOG, "a") as f:
|
||||||
|
f.write(json.dumps(entry) + "\n")
|
||||||
|
f.flush()
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error writing history: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
def ensure_dirs():
|
||||||
|
for d in ["pending", "approved", "running", "completed", "failed", "rejected"]:
|
||||||
|
(ACTIONS_ROOT / d).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
def approve_action(action_id):
|
||||||
|
ensure_dirs()
|
||||||
|
if not action_id.endswith(".json"):
|
||||||
|
filename = f"{action_id}.json"
|
||||||
|
else:
|
||||||
|
filename = action_id
|
||||||
|
|
||||||
|
pending_path = ACTIONS_ROOT / "pending" / filename
|
||||||
|
if not pending_path.exists():
|
||||||
|
print(f"Action {filename} not found in pending.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
approved_path = ACTIONS_ROOT / "approved" / filename
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(pending_path, "r") as f:
|
||||||
|
action = json.load(f)
|
||||||
|
|
||||||
|
action["status"] = "approved"
|
||||||
|
action["approved_at"] = time.time()
|
||||||
|
|
||||||
|
with open(pending_path, "w") as f:
|
||||||
|
json.dump(action, f, indent=2)
|
||||||
|
|
||||||
|
shutil.move(pending_path, approved_path)
|
||||||
|
|
||||||
|
emit_event("action_approved", f"Action approved: {action['action_id']}", {"action_id": action['action_id']})
|
||||||
|
log_history(action['action_id'], "approved", "Manual approval received")
|
||||||
|
print(f"Action {action['action_id']} approved.")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error approving action: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def reject_action(action_id):
|
||||||
|
ensure_dirs()
|
||||||
|
if not action_id.endswith(".json"):
|
||||||
|
filename = f"{action_id}.json"
|
||||||
|
else:
|
||||||
|
filename = action_id
|
||||||
|
|
||||||
|
pending_path = ACTIONS_ROOT / "pending" / filename
|
||||||
|
if not pending_path.exists():
|
||||||
|
print(f"Action {filename} not found in pending.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
rejected_path = ACTIONS_ROOT / "rejected" / filename
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(pending_path, "r") as f:
|
||||||
|
action = json.load(f)
|
||||||
|
|
||||||
|
action["status"] = "rejected"
|
||||||
|
action["rejected_at"] = time.time()
|
||||||
|
|
||||||
|
with open(pending_path, "w") as f:
|
||||||
|
json.dump(action, f, indent=2)
|
||||||
|
|
||||||
|
shutil.move(pending_path, rejected_path)
|
||||||
|
|
||||||
|
emit_event("action_rejected", f"Action rejected: {action['action_id']}", {"action_id": action['action_id']})
|
||||||
|
log_history(action['action_id'], "rejected", "Manual rejection received")
|
||||||
|
print(f"Action {action['action_id']} rejected.")
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error rejecting action: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def process_action(action_path, dry_run=False):
|
||||||
|
"""Process a single approved action."""
|
||||||
|
try:
|
||||||
|
with open(action_path, "r") as f:
|
||||||
|
action = json.load(f)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error reading action {action_path}: {e}")
|
||||||
|
return
|
||||||
|
|
||||||
|
action_id = action["action_id"]
|
||||||
|
action_type = action["action_type"]
|
||||||
|
|
||||||
|
# Move to running (Resumable execution state)
|
||||||
|
running_path = ACTIONS_ROOT / "running" / action_path.name
|
||||||
|
shutil.move(action_path, running_path)
|
||||||
|
|
||||||
|
action["status"] = "running"
|
||||||
|
action["started_at"] = time.time()
|
||||||
|
with open(running_path, "w") as f:
|
||||||
|
json.dump(action, f, indent=2)
|
||||||
|
|
||||||
|
emit_event("action_started", f"Started action {action_id} ({action_type})", {"action_id": action_id})
|
||||||
|
log_history(action_id, "running", f"Execution started (dry_run={dry_run})")
|
||||||
|
|
||||||
|
# Simulation logic (Recommendation-safe execution model)
|
||||||
|
print(f"Executing {action_type} for {action.get('service') or action.get('node')}...")
|
||||||
|
|
||||||
|
# Idempotent simulation: in a real world, we'd check if it's already done
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
success = True
|
||||||
|
if dry_run:
|
||||||
|
print(f"[DRY-RUN] Would execute {action_type} logic here.")
|
||||||
|
else:
|
||||||
|
# Initial action types implementation (Simulation)
|
||||||
|
if action_type == "redeploy_service":
|
||||||
|
print(f"DEBUG: Triggering container restart/redeploy for {action.get('service')}")
|
||||||
|
elif action_type == "rerun_healthcheck":
|
||||||
|
print(f"DEBUG: Running healthcheck for {action.get('service')}")
|
||||||
|
elif action_type == "rerun_deployment_stage":
|
||||||
|
print(f"DEBUG: Retrying deployment stage for {action.get('service')}")
|
||||||
|
elif action_type == "collect_diagnostics":
|
||||||
|
print(f"DEBUG: Collecting logs and metrics for {action.get('service') or action.get('node')}")
|
||||||
|
else:
|
||||||
|
print(f"DEBUG: Executing unknown action type: {action_type}")
|
||||||
|
|
||||||
|
# Finalize
|
||||||
|
if success:
|
||||||
|
final_status = "completed"
|
||||||
|
target_dir = ACTIONS_ROOT / "completed"
|
||||||
|
else:
|
||||||
|
final_status = "failed"
|
||||||
|
target_dir = ACTIONS_ROOT / "failed"
|
||||||
|
|
||||||
|
final_path = target_dir / action_path.name
|
||||||
|
action["status"] = final_status
|
||||||
|
action["finished_at"] = time.time()
|
||||||
|
|
||||||
|
with open(running_path, "w") as f:
|
||||||
|
json.dump(action, f, indent=2)
|
||||||
|
|
||||||
|
shutil.move(running_path, final_path)
|
||||||
|
|
||||||
|
emit_event(f"action_{final_status}", f"Action {action_id} {final_status}", {"action_id": action_id})
|
||||||
|
log_history(action_id, final_status, "Execution finished")
|
||||||
|
|
||||||
|
def run_executor(dry_run=False):
|
||||||
|
ensure_dirs()
|
||||||
|
print(f"--- Executor Run: {time.ctime()} (dry_run={dry_run}) ---")
|
||||||
|
|
||||||
|
# 1. Resume running actions
|
||||||
|
running_actions = list((ACTIONS_ROOT / "running").glob("*.json"))
|
||||||
|
for action_file in running_actions:
|
||||||
|
print(f"Resuming action: {action_file.name}")
|
||||||
|
process_action(action_file, dry_run=dry_run)
|
||||||
|
|
||||||
|
# 2. Process approved actions
|
||||||
|
approved_actions = list((ACTIONS_ROOT / "approved").glob("*.json"))
|
||||||
|
if not approved_actions:
|
||||||
|
print("No approved actions found.")
|
||||||
|
else:
|
||||||
|
for action_file in approved_actions:
|
||||||
|
process_action(action_file, dry_run=dry_run)
|
||||||
|
|
||||||
|
print("Run complete.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser(description="Homelab Action Executor")
|
||||||
|
parser.add_argument("command", choices=["run", "approve", "reject"], nargs="?", default="run")
|
||||||
|
parser.add_argument("action_id", nargs="?")
|
||||||
|
parser.add_argument("--dry-run", action="store_true")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.command == "run":
|
||||||
|
run_executor(dry_run=args.dry_run)
|
||||||
|
elif args.command == "approve":
|
||||||
|
if not args.action_id:
|
||||||
|
print("Error: action_id required for approve")
|
||||||
|
sys.exit(1)
|
||||||
|
approve_action(args.action_id)
|
||||||
|
elif args.command == "reject":
|
||||||
|
if not args.action_id:
|
||||||
|
print("Error: action_id required for reject")
|
||||||
|
sys.exit(1)
|
||||||
|
reject_action(args.action_id)
|
||||||
74
scripts/executor/test_actions.sh
Normal file
74
scripts/executor/test_actions.sh
Normal file
|
|
@ -0,0 +1,74 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# Validation script for Homelab Action Queue System
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
BASE_DIR=$(pwd)
|
||||||
|
export HOMELAB_WORLD_ROOT="$BASE_DIR/tmp/homelab/world"
|
||||||
|
export HOMELAB_ACTIONS_ROOT="$BASE_DIR/tmp/homelab/actions"
|
||||||
|
EVENT_LOG="/tmp/agent-events.log"
|
||||||
|
|
||||||
|
echo "=== Starting Action Queue Validation ==="
|
||||||
|
|
||||||
|
# 1. Setup drift scenarios
|
||||||
|
echo "Setting up drift scenarios..."
|
||||||
|
bash scripts/supervisor/test_scenarios.sh
|
||||||
|
|
||||||
|
# 2. Run supervisor to generate action proposals
|
||||||
|
echo "Running supervisor..."
|
||||||
|
python3 scripts/supervisor/supervisor.py
|
||||||
|
|
||||||
|
# 3. Check for pending actions
|
||||||
|
echo "Checking pending actions..."
|
||||||
|
ls -l "$HOMELAB_ACTIONS_ROOT/pending/"
|
||||||
|
|
||||||
|
# Get an action ID from pending
|
||||||
|
ACTION_FILE=$(ls "$HOMELAB_ACTIONS_ROOT/pending/" | head -n 1)
|
||||||
|
if [ -z "$ACTION_FILE" ]; then
|
||||||
|
echo "Error: No pending actions found!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
ACTION_ID="${ACTION_FILE%.json}"
|
||||||
|
echo "Found action: $ACTION_ID"
|
||||||
|
|
||||||
|
# 4. Approve the action
|
||||||
|
echo "Approving action $ACTION_ID..."
|
||||||
|
python3 scripts/executor/executor.py approve "$ACTION_ID"
|
||||||
|
|
||||||
|
# 5. Run executor
|
||||||
|
echo "Running executor..."
|
||||||
|
python3 scripts/executor/executor.py run
|
||||||
|
|
||||||
|
# 6. Verify completion
|
||||||
|
if [ -f "$HOMELAB_ACTIONS_ROOT/completed/$ACTION_FILE" ]; then
|
||||||
|
echo "SUCCESS: Action $ACTION_ID moved to completed."
|
||||||
|
else
|
||||||
|
echo "FAILURE: Action $ACTION_ID NOT found in completed."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 7. Test rejection
|
||||||
|
echo "Testing rejection..."
|
||||||
|
NEXT_ACTION_FILE=$(ls "$HOMELAB_ACTIONS_ROOT/pending/" | head -n 1)
|
||||||
|
if [ -n "$NEXT_ACTION_FILE" ]; then
|
||||||
|
NEXT_ACTION_ID="${NEXT_ACTION_FILE%.json}"
|
||||||
|
echo "Rejecting action $NEXT_ACTION_ID..."
|
||||||
|
python3 scripts/executor/executor.py reject "$NEXT_ACTION_ID"
|
||||||
|
|
||||||
|
if [ -f "$HOMELAB_ACTIONS_ROOT/rejected/$NEXT_ACTION_FILE" ]; then
|
||||||
|
echo "SUCCESS: Action $NEXT_ACTION_ID moved to rejected."
|
||||||
|
else
|
||||||
|
echo "FAILURE: Action $NEXT_ACTION_ID NOT found in rejected."
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 8. Verify events
|
||||||
|
echo "Verifying events in $EVENT_LOG..."
|
||||||
|
grep "action_created" "$EVENT_LOG" | tail -n 1
|
||||||
|
grep "action_approved" "$EVENT_LOG" | tail -n 1
|
||||||
|
grep "action_started" "$EVENT_LOG" | tail -n 1
|
||||||
|
grep "action_completed" "$EVENT_LOG" | tail -n 1
|
||||||
|
grep "action_rejected" "$EVENT_LOG" | tail -n 1
|
||||||
|
|
||||||
|
echo "=== Validation Complete ==="
|
||||||
|
|
@ -5,14 +5,19 @@ import yaml
|
||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import glob
|
import glob
|
||||||
|
import uuid
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
WORLD_STATE_PATH = Path(os.getenv("HOMELAB_WORLD_ROOT", "/opt/homelab/world"))
|
WORLD_STATE_PATH = Path(os.getenv("HOMELAB_WORLD_ROOT", "/opt/homelab/world"))
|
||||||
|
ACTIONS_ROOT = Path(os.getenv("HOMELAB_ACTIONS_ROOT", "/opt/homelab/actions"))
|
||||||
INVENTORY_PATH = Path("hosts")
|
INVENTORY_PATH = Path("hosts")
|
||||||
EVENT_LOG = Path("/tmp/agent-events.log")
|
EVENT_LOG = Path("/tmp/agent-events.log")
|
||||||
CHECKPOINT_FILE = Path("/tmp/supervisor-checkpoint.json")
|
CHECKPOINT_FILE = Path("/tmp/supervisor-checkpoint.json")
|
||||||
|
|
||||||
|
# Action Queue Layout
|
||||||
|
ACTION_DIRS = ["pending", "approved", "running", "completed", "failed", "rejected"]
|
||||||
|
|
||||||
# Reconcile event types
|
# Reconcile event types
|
||||||
RECONCILE_REQUIRED = "reconcile_required"
|
RECONCILE_REQUIRED = "reconcile_required"
|
||||||
RECONCILE_RECOMMENDED = "reconcile_recommended"
|
RECONCILE_RECOMMENDED = "reconcile_recommended"
|
||||||
|
|
@ -24,6 +29,70 @@ STATE_DEGRADED = "degraded"
|
||||||
STATE_UNSTABLE = "unstable"
|
STATE_UNSTABLE = "unstable"
|
||||||
STATE_RECONCILING = "reconciling"
|
STATE_RECONCILING = "reconciling"
|
||||||
|
|
||||||
|
def ensure_action_dirs():
|
||||||
|
"""Ensure action queue directories exist."""
|
||||||
|
for d in ACTION_DIRS:
|
||||||
|
(ACTIONS_ROOT / d).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
def emit_action_proposal(recommendation):
|
||||||
|
"""Convert recommendation to action proposal and save to pending/."""
|
||||||
|
ensure_action_dirs()
|
||||||
|
|
||||||
|
action_type_map = {
|
||||||
|
"redeploy": "redeploy_service",
|
||||||
|
"deploy": "redeploy_service",
|
||||||
|
"diagnostics": "collect_diagnostics",
|
||||||
|
"failover_review": "collect_diagnostics",
|
||||||
|
"review": "collect_diagnostics",
|
||||||
|
"delayed_deployment": "rerun_deployment_stage"
|
||||||
|
}
|
||||||
|
|
||||||
|
action_type = action_type_map.get(recommendation["action"], "collect_diagnostics")
|
||||||
|
|
||||||
|
risk_level_map = {
|
||||||
|
"redeploy_service": "guarded",
|
||||||
|
"rerun_healthcheck": "safe",
|
||||||
|
"rerun_deployment_stage": "guarded",
|
||||||
|
"collect_diagnostics": "safe"
|
||||||
|
}
|
||||||
|
risk_level = risk_level_map.get(action_type, "dangerous")
|
||||||
|
|
||||||
|
# Dangerous always requires approval
|
||||||
|
# Guarded defaults to approval
|
||||||
|
approval_required = risk_level in ["dangerous", "guarded"]
|
||||||
|
|
||||||
|
action_id = str(uuid.uuid4())
|
||||||
|
action = {
|
||||||
|
"action_id": action_id,
|
||||||
|
"created_at": time.time(),
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": str(uuid.uuid4()), # In a real system, link to drift ID
|
||||||
|
"node": recommendation["drift"].get("node"),
|
||||||
|
"service": recommendation["drift"].get("service"),
|
||||||
|
"action_type": action_type,
|
||||||
|
"risk_level": risk_level,
|
||||||
|
"confidence": 0.9, # Default confidence
|
||||||
|
"approval_required": approval_required,
|
||||||
|
"autonomous_eligible": False, # No autonomy yet
|
||||||
|
"status": "pending",
|
||||||
|
"payload": recommendation["drift"],
|
||||||
|
"rollback_reference": None
|
||||||
|
}
|
||||||
|
|
||||||
|
file_path = ACTIONS_ROOT / "pending" / f"{action_id}.json"
|
||||||
|
try:
|
||||||
|
with open(file_path, "w") as f:
|
||||||
|
json.dump(action, f, indent=2)
|
||||||
|
|
||||||
|
emit_event("action_created", f"Action proposed: {action_type} for {action.get('service') or action.get('node')}", {
|
||||||
|
"action_id": action_id,
|
||||||
|
"action_type": action_type,
|
||||||
|
"node": action.get("node"),
|
||||||
|
"service": action.get("service")
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error emitting action proposal: {e}", file=sys.stderr)
|
||||||
|
|
||||||
def emit_event(event_type, message, details=None):
|
def emit_event(event_type, message, details=None):
|
||||||
"""Emit reconciliation events using existing event system (append-only file)."""
|
"""Emit reconciliation events using existing event system (append-only file)."""
|
||||||
event = {
|
event = {
|
||||||
|
|
@ -278,6 +347,8 @@ def main():
|
||||||
# Emit reconciliation events
|
# Emit reconciliation events
|
||||||
for rec in recommendations:
|
for rec in recommendations:
|
||||||
emit_event(rec["type"], rec["message"], rec["drift"])
|
emit_event(rec["type"], rec["message"], rec["drift"])
|
||||||
|
# Proposed: Emit action proposals to action queue
|
||||||
|
emit_action_proposal(rec)
|
||||||
|
|
||||||
# 6. Save checkpoint
|
# 6. Save checkpoint
|
||||||
save_checkpoint({
|
save_checkpoint({
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
{
|
||||||
|
"action_id": "0083f8ad-1f2b-47a4-81a8-81e59740879e",
|
||||||
|
"created_at": 1778600485.050643,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "6d88755b-ca89-45eb-bf2d-506fca631144",
|
||||||
|
"node": "node1",
|
||||||
|
"service": "homeassistant",
|
||||||
|
"action_type": "redeploy_service",
|
||||||
|
"risk_level": "guarded",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": true,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "completed",
|
||||||
|
"payload": {
|
||||||
|
"type": "unhealthy_service",
|
||||||
|
"service": "homeassistant",
|
||||||
|
"status": "unhealthy",
|
||||||
|
"node": "node1"
|
||||||
|
},
|
||||||
|
"rollback_reference": null,
|
||||||
|
"approved_at": 1778600485.1278665,
|
||||||
|
"started_at": 1778600485.1792338,
|
||||||
|
"finished_at": 1778600485.6797137
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
{
|
||||||
|
"action_id": "050add79-3265-4e35-bb88-41c368bbccda",
|
||||||
|
"created_at": 1778600510.7529757,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "d8ba7d84-74dd-46c8-a085-5ed8ba186770",
|
||||||
|
"node": null,
|
||||||
|
"service": "webapp",
|
||||||
|
"action_type": "collect_diagnostics",
|
||||||
|
"risk_level": "safe",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": false,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "completed",
|
||||||
|
"payload": {
|
||||||
|
"type": "failed_deployment",
|
||||||
|
"deployment_id": "dep-001",
|
||||||
|
"service": "webapp"
|
||||||
|
},
|
||||||
|
"rollback_reference": null,
|
||||||
|
"approved_at": 1778600510.8252015,
|
||||||
|
"started_at": 1778600510.8744874,
|
||||||
|
"finished_at": 1778600511.3750403
|
||||||
|
}
|
||||||
7
tmp/homelab/actions/completed/resumable-task.json
Normal file
7
tmp/homelab/actions/completed/resumable-task.json
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
"action_id": "resumable-task",
|
||||||
|
"action_type": "rerun_healthcheck",
|
||||||
|
"status": "completed",
|
||||||
|
"started_at": 1778600488.5642526,
|
||||||
|
"finished_at": 1778600489.0646975
|
||||||
|
}
|
||||||
10
tmp/homelab/actions/history.log
Normal file
10
tmp/homelab/actions/history.log
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
{"timestamp": 1778600485.1282582, "action_id": "0083f8ad-1f2b-47a4-81a8-81e59740879e", "status": "approved", "message": "Manual approval received"}
|
||||||
|
{"timestamp": 1778600485.179484, "action_id": "0083f8ad-1f2b-47a4-81a8-81e59740879e", "status": "running", "message": "Execution started (dry_run=False)"}
|
||||||
|
{"timestamp": 1778600485.680433, "action_id": "0083f8ad-1f2b-47a4-81a8-81e59740879e", "status": "completed", "message": "Execution finished"}
|
||||||
|
{"timestamp": 1778600485.7410686, "action_id": "2143ae5b-bcc6-410b-b925-e7def70fc013", "status": "rejected", "message": "Manual rejection received"}
|
||||||
|
{"timestamp": 1778600488.5644836, "action_id": "resumable-task", "status": "running", "message": "Execution started (dry_run=False)"}
|
||||||
|
{"timestamp": 1778600489.0652084, "action_id": "resumable-task", "status": "completed", "message": "Execution finished"}
|
||||||
|
{"timestamp": 1778600510.825529, "action_id": "050add79-3265-4e35-bb88-41c368bbccda", "status": "approved", "message": "Manual approval received"}
|
||||||
|
{"timestamp": 1778600510.8747966, "action_id": "050add79-3265-4e35-bb88-41c368bbccda", "status": "running", "message": "Execution started (dry_run=False)"}
|
||||||
|
{"timestamp": 1778600511.3755214, "action_id": "050add79-3265-4e35-bb88-41c368bbccda", "status": "completed", "message": "Execution finished"}
|
||||||
|
{"timestamp": 1778600511.4307747, "action_id": "240cbbc0-891e-4032-bf73-1fa40ff850b4", "status": "rejected", "message": "Manual rejection received"}
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"action_id": "50d7cdab-2f12-449f-965a-0383e32babaa",
|
||||||
|
"created_at": 1778600485.053174,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "a2899a7f-548f-455d-a8dd-4e208be58e00",
|
||||||
|
"node": null,
|
||||||
|
"service": null,
|
||||||
|
"action_type": "collect_diagnostics",
|
||||||
|
"risk_level": "safe",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": false,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "pending",
|
||||||
|
"payload": {
|
||||||
|
"type": "unresolved_incident",
|
||||||
|
"incident_id": "inc-99",
|
||||||
|
"description": "High memory usage on node1",
|
||||||
|
"status": "investigating"
|
||||||
|
},
|
||||||
|
"rollback_reference": null
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"action_id": "5e239d96-ff3f-48a3-a71a-ad5aa6b7ff88",
|
||||||
|
"created_at": 1778600485.05199,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "c5fa628e-35a1-44f9-9119-07d93f20af80",
|
||||||
|
"node": null,
|
||||||
|
"service": "webapp",
|
||||||
|
"action_type": "collect_diagnostics",
|
||||||
|
"risk_level": "safe",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": false,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "pending",
|
||||||
|
"payload": {
|
||||||
|
"type": "failed_deployment",
|
||||||
|
"deployment_id": "dep-002",
|
||||||
|
"service": "webapp"
|
||||||
|
},
|
||||||
|
"rollback_reference": null
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"action_id": "7cde5093-3394-43af-9391-321c50ac5362",
|
||||||
|
"created_at": 1778600510.7521193,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "2a91f58e-e10d-4de5-abd7-5f4fe6fdc325",
|
||||||
|
"node": null,
|
||||||
|
"service": "webapp",
|
||||||
|
"action_type": "collect_diagnostics",
|
||||||
|
"risk_level": "safe",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": false,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "pending",
|
||||||
|
"payload": {
|
||||||
|
"type": "failed_deployment",
|
||||||
|
"deployment_id": "dep-002",
|
||||||
|
"service": "webapp"
|
||||||
|
},
|
||||||
|
"rollback_reference": null
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"action_id": "a42e2183-ca22-4a50-97a7-eb53ab0e039a",
|
||||||
|
"created_at": 1778600510.75163,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "ec2a1960-5baa-453a-8380-65fc9376cc82",
|
||||||
|
"node": "node2",
|
||||||
|
"service": null,
|
||||||
|
"action_type": "collect_diagnostics",
|
||||||
|
"risk_level": "safe",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": false,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "pending",
|
||||||
|
"payload": {
|
||||||
|
"type": "offline_node",
|
||||||
|
"node": "node2",
|
||||||
|
"status": "offline"
|
||||||
|
},
|
||||||
|
"rollback_reference": null
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"action_id": "aae83bcd-455f-4b59-bab0-7c7994116468",
|
||||||
|
"created_at": 1778600510.7506568,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "0a786305-46cb-4837-8725-53d99203f39e",
|
||||||
|
"node": "node1",
|
||||||
|
"service": "homeassistant",
|
||||||
|
"action_type": "redeploy_service",
|
||||||
|
"risk_level": "guarded",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": true,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "pending",
|
||||||
|
"payload": {
|
||||||
|
"type": "unhealthy_service",
|
||||||
|
"service": "homeassistant",
|
||||||
|
"status": "unhealthy",
|
||||||
|
"node": "node1"
|
||||||
|
},
|
||||||
|
"rollback_reference": null
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"action_id": "c2e6c844-6d96-4ea7-b924-5e33764e5493",
|
||||||
|
"created_at": 1778600510.7533653,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "6ffc0579-71ac-417f-8ea1-fc46e54527c6",
|
||||||
|
"node": null,
|
||||||
|
"service": null,
|
||||||
|
"action_type": "collect_diagnostics",
|
||||||
|
"risk_level": "safe",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": false,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "pending",
|
||||||
|
"payload": {
|
||||||
|
"type": "unresolved_incident",
|
||||||
|
"incident_id": "inc-99",
|
||||||
|
"description": "High memory usage on node1",
|
||||||
|
"status": "investigating"
|
||||||
|
},
|
||||||
|
"rollback_reference": null
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"action_id": "c91a4171-e636-4194-a146-6e003d2f2586",
|
||||||
|
"created_at": 1778600510.7511823,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "966a62ee-f81b-497d-96cb-7749f4da0c6f",
|
||||||
|
"node": "node2",
|
||||||
|
"service": "webapp",
|
||||||
|
"action_type": "rerun_deployment_stage",
|
||||||
|
"risk_level": "guarded",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": true,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "pending",
|
||||||
|
"payload": {
|
||||||
|
"type": "missing_service",
|
||||||
|
"service": "webapp",
|
||||||
|
"node": "node2"
|
||||||
|
},
|
||||||
|
"rollback_reference": null
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"action_id": "e6d3f0d6-c294-4282-b9f4-a730f9cec9dc",
|
||||||
|
"created_at": 1778600485.0515254,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "bf51852b-0b34-4b4b-98c9-fffff38f77ce",
|
||||||
|
"node": "node2",
|
||||||
|
"service": null,
|
||||||
|
"action_type": "collect_diagnostics",
|
||||||
|
"risk_level": "safe",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": false,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "pending",
|
||||||
|
"payload": {
|
||||||
|
"type": "offline_node",
|
||||||
|
"node": "node2",
|
||||||
|
"status": "offline"
|
||||||
|
},
|
||||||
|
"rollback_reference": null
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"action_id": "f4c56df2-6775-484b-806e-cdecdcc19584",
|
||||||
|
"created_at": 1778600485.0527768,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "f974d640-d0fb-4a85-bf8a-eda100182181",
|
||||||
|
"node": null,
|
||||||
|
"service": "webapp",
|
||||||
|
"action_type": "collect_diagnostics",
|
||||||
|
"risk_level": "safe",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": false,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "pending",
|
||||||
|
"payload": {
|
||||||
|
"type": "failed_deployment",
|
||||||
|
"deployment_id": "dep-001",
|
||||||
|
"service": "webapp"
|
||||||
|
},
|
||||||
|
"rollback_reference": null
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"action_id": "ff3da03c-fffa-49a7-985d-ed4589ab6856",
|
||||||
|
"created_at": 1778600485.0510974,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "37da2d5b-3ecd-4a29-97c2-7e9461b1792e",
|
||||||
|
"node": "node2",
|
||||||
|
"service": "webapp",
|
||||||
|
"action_type": "rerun_deployment_stage",
|
||||||
|
"risk_level": "guarded",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": true,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "pending",
|
||||||
|
"payload": {
|
||||||
|
"type": "missing_service",
|
||||||
|
"service": "webapp",
|
||||||
|
"node": "node2"
|
||||||
|
},
|
||||||
|
"rollback_reference": null
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"action_id": "2143ae5b-bcc6-410b-b925-e7def70fc013",
|
||||||
|
"created_at": 1778600485.0523734,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "dc23556c-68d2-41a3-a5d2-9ad66705f989",
|
||||||
|
"node": null,
|
||||||
|
"service": "webapp",
|
||||||
|
"action_type": "collect_diagnostics",
|
||||||
|
"risk_level": "safe",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": false,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "rejected",
|
||||||
|
"payload": {
|
||||||
|
"type": "failed_deployment",
|
||||||
|
"deployment_id": "dep-003",
|
||||||
|
"service": "webapp"
|
||||||
|
},
|
||||||
|
"rollback_reference": null,
|
||||||
|
"rejected_at": 1778600485.740686
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
{
|
||||||
|
"action_id": "240cbbc0-891e-4032-bf73-1fa40ff850b4",
|
||||||
|
"created_at": 1778600510.7525399,
|
||||||
|
"proposed_by": "supervisor",
|
||||||
|
"correlation_id": "fd234809-82aa-459d-858b-18bc3205a6c5",
|
||||||
|
"node": null,
|
||||||
|
"service": "webapp",
|
||||||
|
"action_type": "collect_diagnostics",
|
||||||
|
"risk_level": "safe",
|
||||||
|
"confidence": 0.9,
|
||||||
|
"approval_required": false,
|
||||||
|
"autonomous_eligible": false,
|
||||||
|
"status": "rejected",
|
||||||
|
"payload": {
|
||||||
|
"type": "failed_deployment",
|
||||||
|
"deployment_id": "dep-003",
|
||||||
|
"service": "webapp"
|
||||||
|
},
|
||||||
|
"rollback_reference": null,
|
||||||
|
"rejected_at": 1778600511.4303465
|
||||||
|
}
|
||||||
|
|
@ -1 +1 @@
|
||||||
{"id": "dep-001", "service": "webapp", "status": "failed", "timestamp": 1778597957}
|
{"id": "dep-001", "service": "webapp", "status": "failed", "timestamp": 1778600510}
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
{"id": "dep-002", "service": "webapp", "status": "failed", "timestamp": 1778597657}
|
{"id": "dep-002", "service": "webapp", "status": "failed", "timestamp": 1778600210}
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
{"id": "dep-003", "service": "webapp", "status": "failed", "timestamp": 1778597357}
|
{"id": "dep-003", "service": "webapp", "status": "failed", "timestamp": 1778599910}
|
||||||
|
|
|
||||||
192
webui/index.html
192
webui/index.html
|
|
@ -216,9 +216,9 @@
|
||||||
.label { color: var(--text-muted); font-size: 12px; margin-bottom: 4px; }
|
.label { color: var(--text-muted); font-size: 12px; margin-bottom: 4px; }
|
||||||
.value { font-weight: 500; margin-bottom: 12px; }
|
.value { font-weight: 500; margin-bottom: 12px; }
|
||||||
|
|
||||||
.risk-safe { color: var(--safe); }
|
.risk-safe { background: rgba(62, 175, 124, 0.1); color: var(--safe); }
|
||||||
.risk-guarded { color: var(--guarded); }
|
.risk-guarded { background: rgba(230, 126, 34, 0.1); color: var(--guarded); }
|
||||||
.risk-dangerous { color: var(--dangerous); }
|
.risk-dangerous { background: rgba(192, 57, 43, 0.1); color: var(--dangerous); }
|
||||||
|
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
|
|
@ -229,6 +229,9 @@
|
||||||
<li class="nav-item active" onclick="showView('dashboard', this)">
|
<li class="nav-item active" onclick="showView('dashboard', this)">
|
||||||
<span>Dashboard</span>
|
<span>Dashboard</span>
|
||||||
</li>
|
</li>
|
||||||
|
<li class="nav-item" onclick="showView('actions', this)">
|
||||||
|
<span>Action Queue</span>
|
||||||
|
</li>
|
||||||
<li class="nav-item" onclick="showView('nodes', this)">
|
<li class="nav-item" onclick="showView('nodes', this)">
|
||||||
<span>Nodes</span>
|
<span>Nodes</span>
|
||||||
</li>
|
</li>
|
||||||
|
|
@ -238,9 +241,15 @@
|
||||||
<li class="nav-item" onclick="showView('deployments', this)">
|
<li class="nav-item" onclick="showView('deployments', this)">
|
||||||
<span>Deployments</span>
|
<span>Deployments</span>
|
||||||
</li>
|
</li>
|
||||||
|
<li class="nav-item" onclick="showView('topology', this)">
|
||||||
|
<span>Topology</span>
|
||||||
|
</li>
|
||||||
<li class="nav-item" onclick="showView('events', this)">
|
<li class="nav-item" onclick="showView('events', this)">
|
||||||
<span>Events</span>
|
<span>Events</span>
|
||||||
</li>
|
</li>
|
||||||
|
<li class="nav-item" onclick="showView('correlation', this)">
|
||||||
|
<span>Correlation</span>
|
||||||
|
</li>
|
||||||
<li class="nav-item" onclick="showView('recommendations', this)">
|
<li class="nav-item" onclick="showView('recommendations', this)">
|
||||||
<span>Recommendations</span>
|
<span>Recommendations</span>
|
||||||
</li>
|
</li>
|
||||||
|
|
@ -255,7 +264,16 @@
|
||||||
|
|
||||||
<main class="main-content">
|
<main class="main-content">
|
||||||
<header>
|
<header>
|
||||||
<div class="view-title" id="current-view-title">Dashboard</div>
|
<div style="display:flex; align-items:center; gap:20px">
|
||||||
|
<div class="view-title" id="current-view-title">Dashboard</div>
|
||||||
|
<select id="operator-mode" onchange="setOperatorMode(this.value)" style="background:var(--sidebar-color); border:1px solid var(--border-color); color:var(--accent-color); font-weight:bold; font-size:12px; padding:4px 8px">
|
||||||
|
<option value="observe">OBSERVE</option>
|
||||||
|
<option value="recommend">RECOMMEND</option>
|
||||||
|
<option value="approval" selected>APPROVAL</option>
|
||||||
|
<option value="autonomous">AUTONOMOUS</option>
|
||||||
|
<option value="maintenance">MAINTENANCE</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
<div class="header-actions">
|
<div class="header-actions">
|
||||||
<button onclick="refreshData()">Refresh</button>
|
<button onclick="refreshData()">Refresh</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
@ -269,6 +287,10 @@
|
||||||
<div class="card-title">System Overview</div>
|
<div class="card-title">System Overview</div>
|
||||||
<div id="dashboard-summary" style="margin-top:20px"></div>
|
<div id="dashboard-summary" style="margin-top:20px"></div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-title">Pending Actions</div>
|
||||||
|
<div id="dashboard-actions-summary" style="margin-top:20px"></div>
|
||||||
|
</div>
|
||||||
<div class="card">
|
<div class="card">
|
||||||
<div class="card-title">Active Incidents</div>
|
<div class="card-title">Active Incidents</div>
|
||||||
<div id="dashboard-incidents" style="margin-top:20px"></div>
|
<div id="dashboard-incidents" style="margin-top:20px"></div>
|
||||||
|
|
@ -276,6 +298,20 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Actions View -->
|
||||||
|
<div id="view-actions" class="view hidden">
|
||||||
|
<div style="display:grid; grid-template-columns: 1fr 1fr; gap:24px">
|
||||||
|
<div>
|
||||||
|
<h3>Pending Approval</h3>
|
||||||
|
<div id="actions-pending" class="timeline"></div>
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<h3>Active / History</h3>
|
||||||
|
<div id="actions-history" class="timeline"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Nodes View -->
|
<!-- Nodes View -->
|
||||||
<div id="view-nodes" class="view hidden">
|
<div id="view-nodes" class="view hidden">
|
||||||
<div class="grid" id="nodes-list"></div>
|
<div class="grid" id="nodes-list"></div>
|
||||||
|
|
@ -291,11 +327,24 @@
|
||||||
<div class="grid" id="deployments-list"></div>
|
<div class="grid" id="deployments-list"></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Topology View -->
|
||||||
|
<div id="view-topology" class="view hidden">
|
||||||
|
<div class="card" style="min-height:500px">
|
||||||
|
<div class="card-title">Runtime Topology</div>
|
||||||
|
<div id="topology-map" style="margin-top:20px; display:flex; flex-wrap:wrap; gap:40px; justify-content:center"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Events View -->
|
<!-- Events View -->
|
||||||
<div id="view-events" class="view hidden">
|
<div id="view-events" class="view hidden">
|
||||||
<div class="timeline" id="events-timeline"></div>
|
<div class="timeline" id="events-timeline"></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<!-- Correlation View -->
|
||||||
|
<div id="view-correlation" class="view hidden">
|
||||||
|
<div id="correlation-chains" class="grid"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<!-- Recommendations View -->
|
<!-- Recommendations View -->
|
||||||
<div id="view-recommendations" class="view hidden">
|
<div id="view-recommendations" class="view hidden">
|
||||||
<div class="grid" id="recommendations-list"></div>
|
<div class="grid" id="recommendations-list"></div>
|
||||||
|
|
@ -335,6 +384,34 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function postData(endpoint, data) {
|
||||||
|
try {
|
||||||
|
const res = await fetch(endpoint, {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify(data)
|
||||||
|
});
|
||||||
|
return await res.json();
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Post error:', endpoint, e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function mutateAction(id, status) {
|
||||||
|
const res = await postData('/action/mutate', {id, status});
|
||||||
|
if (res && res.status === 'ok') {
|
||||||
|
refreshData();
|
||||||
|
} else {
|
||||||
|
alert('Mutation failed');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function setOperatorMode(mode) {
|
||||||
|
console.log('Operator mode set to:', mode);
|
||||||
|
// In real system, this would call backend
|
||||||
|
}
|
||||||
|
|
||||||
function formatTime(ts) {
|
function formatTime(ts) {
|
||||||
if (!ts) return 'N/A';
|
if (!ts) return 'N/A';
|
||||||
return new Date(ts * 1000).toLocaleString();
|
return new Date(ts * 1000).toLocaleString();
|
||||||
|
|
@ -368,6 +445,53 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (currentView === 'dashboard' || currentView === 'actions') {
|
||||||
|
const actions = await fetchData('/actions');
|
||||||
|
if (actions) {
|
||||||
|
if (currentView === 'dashboard') {
|
||||||
|
const dashActions = document.getElementById('dashboard-actions-summary');
|
||||||
|
const pendingCount = actions.pending.length;
|
||||||
|
dashActions.innerHTML = `
|
||||||
|
<div class="label">Pending</div><div class="value" style="color:var(--guarded)">${pendingCount}</div>
|
||||||
|
<div class="label">Running</div><div class="value" style="color:var(--reconciling)">${actions.running.length}</div>
|
||||||
|
`;
|
||||||
|
}
|
||||||
|
if (currentView === 'actions') {
|
||||||
|
const pendingEl = document.getElementById('actions-pending');
|
||||||
|
const historyEl = document.getElementById('actions-history');
|
||||||
|
|
||||||
|
pendingEl.innerHTML = actions.pending.map(a => `
|
||||||
|
<div class="card" style="margin-bottom:12px">
|
||||||
|
<div class="card-header">
|
||||||
|
<div class="card-title">${a.type.toUpperCase()}</div>
|
||||||
|
<span class="badge risk-${a.risk_level}">${a.risk_level}</span>
|
||||||
|
</div>
|
||||||
|
<p>${a.description}</p>
|
||||||
|
<div class="label">Target</div><div class="value">${a.target.node} ${a.target.service || ''}</div>
|
||||||
|
<div class="label">Confidence</div><div class="value">${Math.round(a.confidence*100)}%</div>
|
||||||
|
<div class="controls">
|
||||||
|
<button class="btn-primary" onclick="mutateAction('${a.id}', 'approved')">Approve</button>
|
||||||
|
<button onclick="mutateAction('${a.id}', 'rejected')">Reject</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`).join('') || 'No pending actions.';
|
||||||
|
|
||||||
|
const history = [...actions.approved, ...actions.running, ...actions.completed, ...actions.failed];
|
||||||
|
historyEl.innerHTML = history.sort((a,b) => b.timestamp - a.timestamp).map(a => `
|
||||||
|
<div class="event">
|
||||||
|
<div class="event-header">
|
||||||
|
<span>${a.type.toUpperCase()}</span>
|
||||||
|
<span class="badge ${getStatusClass(a.status)}">${a.status}</span>
|
||||||
|
</div>
|
||||||
|
<div>${a.description}</div>
|
||||||
|
<small>${formatTime(a.timestamp)} | Target: ${a.target.node}</small>
|
||||||
|
${a.status === 'approved' ? `<div class="controls"><button class="btn-primary" onclick="mutateAction('${a.id}', 'running')">Execute</button></div>` : ''}
|
||||||
|
</div>
|
||||||
|
`).join('') || 'No history.';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (currentView === 'dashboard' || currentView === 'events') {
|
if (currentView === 'dashboard' || currentView === 'events') {
|
||||||
const incidents = await fetchData('/incidents');
|
const incidents = await fetchData('/incidents');
|
||||||
if (currentView === 'dashboard') {
|
if (currentView === 'dashboard') {
|
||||||
|
|
@ -474,6 +598,64 @@
|
||||||
`).join('');
|
`).join('');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (currentView === 'topology') {
|
||||||
|
const nodes = await fetchData('/nodes');
|
||||||
|
const services = await fetchData('/services');
|
||||||
|
const topMap = document.getElementById('topology-map');
|
||||||
|
if (nodes && services) {
|
||||||
|
topMap.innerHTML = nodes.map(node => {
|
||||||
|
const nodeServices = services.filter(s => s.node === node.hostname || s.node === node.id);
|
||||||
|
return `
|
||||||
|
<div class="card" style="width:250px; border: 1px solid ${node.health === 'nominal' ? 'var(--border-color)' : 'var(--error)'}">
|
||||||
|
<div class="card-header">
|
||||||
|
<div class="card-title">${node.hostname}</div>
|
||||||
|
<span class="badge ${getStatusClass(node.health)}">${node.health}</span>
|
||||||
|
</div>
|
||||||
|
<div class="label">Capabilities</div>
|
||||||
|
<div class="value" style="font-size:11px">${node.capabilities.join(', ')}</div>
|
||||||
|
<div class="label">Services</div>
|
||||||
|
<div style="font-size:12px; margin-bottom:10px">
|
||||||
|
${nodeServices.length > 0 ? nodeServices.map(s => `
|
||||||
|
<div style="display:flex; justify-content:space-between; margin-bottom:4px; padding:4px; background:rgba(255,255,255,0.03)">
|
||||||
|
<span>${s.name}</span>
|
||||||
|
<span class="${getStatusClass(s.health)}" style="font-size:10px">${s.health}</span>
|
||||||
|
</div>
|
||||||
|
${s.dependencies.length > 0 ? `<div style="font-size:9px; color:var(--text-muted); margin-left:8px; margin-bottom:4px">dep: ${s.dependencies.join(', ')}</div>` : ''}
|
||||||
|
`).join('') : '<div class="value">None</div>'}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
}).join('');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentView === 'correlation') {
|
||||||
|
const incidents = await fetchData('/incidents');
|
||||||
|
const actions = await fetchData('/actions');
|
||||||
|
const list = document.getElementById('correlation-chains');
|
||||||
|
if (incidents && actions) {
|
||||||
|
const allActions = Object.values(actions).flat();
|
||||||
|
list.innerHTML = incidents.map(inc => {
|
||||||
|
const related = allActions.filter(a => a.correlation_chain && a.correlation_chain.includes(inc.id));
|
||||||
|
return `
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-header">
|
||||||
|
<div class="card-title">Incident: ${inc.id || 'INC-001'}</div>
|
||||||
|
<span class="badge status-error">Active</span>
|
||||||
|
</div>
|
||||||
|
<p>${inc.message}</p>
|
||||||
|
<div class="label">Related Actions</div>
|
||||||
|
${related.map(a => `
|
||||||
|
<div class="event" style="margin-top:5px">
|
||||||
|
<strong>${a.type}</strong> (${a.status})<br>
|
||||||
|
<small>${a.description}</small>
|
||||||
|
</div>
|
||||||
|
`).join('') || '<div class="value">No actions yet</div>'}
|
||||||
|
</div>
|
||||||
|
`;
|
||||||
|
}).join('');
|
||||||
|
}
|
||||||
|
}
|
||||||
if (currentView === 'settings') {
|
if (currentView === 'settings') {
|
||||||
const config = await fetchData('/config');
|
const config = await fetchData('/config');
|
||||||
const content = document.getElementById('settings-content');
|
const content = document.getElementById('settings-content');
|
||||||
|
|
@ -482,6 +664,8 @@
|
||||||
<div class="value">${config.auto_mode ? 'Enabled' : 'Disabled'}</div>
|
<div class="value">${config.auto_mode ? 'Enabled' : 'Disabled'}</div>
|
||||||
<div class="label">Action Thresholds</div>
|
<div class="label">Action Thresholds</div>
|
||||||
<div class="value mono">${JSON.stringify(config.action_thresholds, null, 2)}</div>
|
<div class="value mono">${JSON.stringify(config.action_thresholds, null, 2)}</div>
|
||||||
|
<div class="label">Telegram Integration</div>
|
||||||
|
<div class="value" style="color:var(--text-muted)">Ready for mobile approval flows. Hook: /api/v1/telegram/webhook</div>
|
||||||
<button onclick="alert('Settings update not implemented in this demo')">Edit Configuration</button>
|
<button onclick="alert('Settings update not implemented in this demo')">Edit Configuration</button>
|
||||||
`;
|
`;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
68
webui/web.py
68
webui/web.py
|
|
@ -8,6 +8,7 @@ from pathlib import Path
|
||||||
STATE_DIR = Path("/opt/homelab/state")
|
STATE_DIR = Path("/opt/homelab/state")
|
||||||
EVENTS_DIR = Path("/opt/homelab/events")
|
EVENTS_DIR = Path("/opt/homelab/events")
|
||||||
WORLD_DIR = Path("/opt/homelab/world")
|
WORLD_DIR = Path("/opt/homelab/world")
|
||||||
|
ACTIONS_DIR = Path("/opt/homelab/actions")
|
||||||
EVENT_LOG = Path("/tmp/agent-events.log")
|
EVENT_LOG = Path("/tmp/agent-events.log")
|
||||||
STATIC_DIR = Path(__file__).parent
|
STATIC_DIR = Path(__file__).parent
|
||||||
REDIS_HOST = os.getenv("REDIS_HOST", "redis")
|
REDIS_HOST = os.getenv("REDIS_HOST", "redis")
|
||||||
|
|
@ -164,6 +165,55 @@ def current_events():
|
||||||
return sorted(events, key=lambda x: x.get("timestamp", 0), reverse=True)
|
return sorted(events, key=lambda x: x.get("timestamp", 0), reverse=True)
|
||||||
|
|
||||||
|
|
||||||
|
def current_actions():
|
||||||
|
actions = {}
|
||||||
|
statuses = ["pending", "approved", "running", "completed", "failed", "rejected"]
|
||||||
|
for status in statuses:
|
||||||
|
actions[status] = []
|
||||||
|
status_dir = ACTIONS_DIR / status
|
||||||
|
if status_dir.exists():
|
||||||
|
for f in status_dir.glob("*.json"):
|
||||||
|
data = read_json_file(f)
|
||||||
|
if data:
|
||||||
|
actions[status].append(data)
|
||||||
|
return actions
|
||||||
|
|
||||||
|
|
||||||
|
def mutate_action(action_id, target_status):
|
||||||
|
statuses = ["pending", "approved", "running", "completed", "failed", "rejected"]
|
||||||
|
if target_status not in statuses:
|
||||||
|
return False, f"Invalid target status: {target_status}"
|
||||||
|
|
||||||
|
# Find where the action is
|
||||||
|
source_path = None
|
||||||
|
for status in statuses:
|
||||||
|
p = ACTIONS_DIR / status / f"{action_id}.json"
|
||||||
|
if p.exists():
|
||||||
|
source_path = p
|
||||||
|
break
|
||||||
|
|
||||||
|
if not source_path:
|
||||||
|
return False, f"Action {action_id} not found"
|
||||||
|
|
||||||
|
target_dir = ACTIONS_DIR / target_status
|
||||||
|
target_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
target_path = target_dir / f"{action_id}.json"
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(source_path.read_text())
|
||||||
|
data["status"] = target_status
|
||||||
|
data["last_mutation"] = os.path.getmtime(source_path) # or current time
|
||||||
|
import time
|
||||||
|
data["last_mutation"] = time.time()
|
||||||
|
|
||||||
|
target_path.write_text(json.dumps(data, indent=2))
|
||||||
|
if source_path != target_path:
|
||||||
|
source_path.unlink()
|
||||||
|
return True, "Success"
|
||||||
|
except Exception as e:
|
||||||
|
return False, str(e)
|
||||||
|
|
||||||
|
|
||||||
def send_json(status, payload, handler):
|
def send_json(status, payload, handler):
|
||||||
body = (json.dumps(payload) + "\n").encode("utf-8")
|
body = (json.dumps(payload) + "\n").encode("utf-8")
|
||||||
handler.send_response(status)
|
handler.send_response(status)
|
||||||
|
|
@ -207,6 +257,10 @@ class Handler(BaseHTTPRequestHandler):
|
||||||
send_json(200, current_events(), self)
|
send_json(200, current_events(), self)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if self.path == "/actions":
|
||||||
|
send_json(200, current_actions(), self)
|
||||||
|
return
|
||||||
|
|
||||||
if self.path == "/logs":
|
if self.path == "/logs":
|
||||||
print("LOGS endpoint called", flush=True)
|
print("LOGS endpoint called", flush=True)
|
||||||
body = ("\n".join(tail_lines(EVENT_LOG, 200)) + "\n").encode("utf-8")
|
body = ("\n".join(tail_lines(EVENT_LOG, 200)) + "\n").encode("utf-8")
|
||||||
|
|
@ -236,6 +290,7 @@ class Handler(BaseHTTPRequestHandler):
|
||||||
"/auto-mode",
|
"/auto-mode",
|
||||||
"/config",
|
"/config",
|
||||||
"/events",
|
"/events",
|
||||||
|
"/action/mutate",
|
||||||
):
|
):
|
||||||
self.send_error(404)
|
self.send_error(404)
|
||||||
return
|
return
|
||||||
|
|
@ -291,6 +346,19 @@ class Handler(BaseHTTPRequestHandler):
|
||||||
send_json(200, {"status": "sent"}, self)
|
send_json(200, {"status": "sent"}, self)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
if self.path == "/action/mutate":
|
||||||
|
action_id = payload.get("id")
|
||||||
|
target = payload.get("status")
|
||||||
|
if not action_id or not target:
|
||||||
|
self.send_error(400, "id and status are required")
|
||||||
|
return
|
||||||
|
success, msg = mutate_action(action_id, target)
|
||||||
|
if success:
|
||||||
|
send_json(200, {"status": "ok"}, self)
|
||||||
|
else:
|
||||||
|
self.send_error(500, msg)
|
||||||
|
return
|
||||||
|
|
||||||
if not command:
|
if not command:
|
||||||
self.send_error(400, "command is required")
|
self.send_error(400, "command is required")
|
||||||
return
|
return
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue