"""Tests for HA diagnostic event routing in the supervisor.""" from __future__ import annotations import json import sys import time from pathlib import Path import pytest # Add src/ to path so we can import supervisor without installing sys.path.insert(0, str(Path(__file__).parent.parent / "src")) import supervisor as supervisor_module from supervisor import Supervisor # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- def _make_event(event_type: str, node: str = "chelsty-ha", service: str = "homeassistant", payload: dict | None = None, message: str = "") -> dict: return { "id": f"evt-{node}-{int(time.time())}-{event_type}-{service}-1", "type": event_type, "node": node, "service": service, "severity": "warning", "timestamp": int(time.time()), "message": message or f"Test event: {event_type}", "payload": payload or {"location_tag": "chelsty"}, } def _write_event(events_dir: Path, event: dict) -> Path: path = events_dir / f"{event['id']}.json" path.write_text(json.dumps(event)) return path def _setup_supervisor(tmp_path: Path, monkeypatch) -> Supervisor: """Return a Supervisor instance with all paths redirected to tmp_path.""" actions = tmp_path / "actions" events = tmp_path / "events" world = tmp_path / "world" repo = tmp_path / "repo" state = tmp_path / "state" for d in (actions, events, world, repo / "hosts", state): d.mkdir(parents=True, exist_ok=True) monkeypatch.setattr(supervisor_module, "ACTIONS_DIR", actions) monkeypatch.setattr(supervisor_module, "EVENTS_DIR", events) monkeypatch.setattr(supervisor_module, "WORLD_DIR", world) monkeypatch.setattr(supervisor_module, "REPO_ROOT", repo) sup = Supervisor() # Empty desired/actual state so reconcile drift loop is a no-op sup.desired_state = {"services": {}} sup.actual_state = {"services": {}, "nodes": {}, "incidents": {}} return sup def _pending(tmp_path: Path, action_id: str) -> Path: return tmp_path / "actions" / "pending" / f"{action_id}.json" def _read_action(tmp_path: Path, state: str, action_id: str) -> dict: return json.loads((tmp_path / "actions" / state / f"{action_id}.json").read_text()) # --------------------------------------------------------------------------- # 1. Each event type → correct action type # --------------------------------------------------------------------------- def test_ha_websocket_dead_generates_container_restart(tmp_path, monkeypatch): sup = _setup_supervisor(tmp_path, monkeypatch) events_dir = tmp_path / "events" _write_event(events_dir, _make_event("ha_websocket_dead")) sup._process_ha_events() action_id = "container-restart-chelsty-ha-homeassistant" assert _pending(tmp_path, action_id).exists() action = _read_action(tmp_path, "pending", action_id) assert action["type"] == "container_restart" assert action["service"] == "homeassistant" assert action["node"] == "chelsty-ha" @pytest.mark.parametrize("event_type,expected_suffix", [ ("ha_integration_failed", "integration-failed"), ("ha_entity_unavailable_long", "entity-unavailable"), ("ha_automation_failing", "automation-failing"), ("ha_update_available", "update-available"), ("ha_recorder_lag", "recorder-lag"), ("ha_system_health_degraded", "system-health-degraded"), ]) def test_alert_only_events_generate_alert_actions( tmp_path, monkeypatch, event_type, expected_suffix ): sup = _setup_supervisor(tmp_path, monkeypatch) _write_event(tmp_path / "events", _make_event(event_type)) sup._process_ha_events() action_id = f"alert-ha-{expected_suffix}-chelsty-ha" assert _pending(tmp_path, action_id).exists(), f"No pending action for {event_type}" action = _read_action(tmp_path, "pending", action_id) assert action["type"] == "alert_only" assert action["node"] == "chelsty-ha" # --------------------------------------------------------------------------- # 2. Transition suppression # --------------------------------------------------------------------------- def test_ha_websocket_dead_suppressed_during_transition(tmp_path, monkeypatch): sup = _setup_supervisor(tmp_path, monkeypatch) # Set up world state: homeassistant has an active containers_not_running incident inc_id = "inc-123-chelsty-ha-homeassistant" sup.actual_state["services"]["chelsty-ha/homeassistant"] = { "node": "chelsty-ha", "service": "homeassistant", "status": "unhealthy", "incident_id": inc_id, } sup.actual_state["incidents"][inc_id] = { "id": inc_id, "status": "active", "trigger_type": "containers_not_running", "last_occurrence": time.time() - 60, # 1 min ago — within 5-min window } _write_event(tmp_path / "events", _make_event("ha_websocket_dead")) sup._process_ha_events() action_id = "container-restart-chelsty-ha-homeassistant" assert not _pending(tmp_path, action_id).exists(), "Action should be suppressed during transition" def test_ha_alert_suppressed_during_transition(tmp_path, monkeypatch): sup = _setup_supervisor(tmp_path, monkeypatch) inc_id = "inc-456-chelsty-ha-homeassistant" sup.actual_state["services"]["chelsty-ha/homeassistant"] = { "node": "chelsty-ha", "service": "homeassistant", "status": "unhealthy", "incident_id": inc_id, } sup.actual_state["incidents"][inc_id] = { "id": inc_id, "status": "active", "trigger_type": "containers_not_running", "last_occurrence": time.time() - 30, } for event_type in supervisor_module.HA_ALERT_ONLY_EVENTS: _write_event(tmp_path / "events", _make_event(event_type)) sup._process_ha_events() for suffix in supervisor_module._HA_ALERT_ID_SUFFIX.values(): action_id = f"alert-ha-{suffix}-chelsty-ha" assert not _pending(tmp_path, action_id).exists(), \ f"{action_id} should be suppressed" def test_transition_suppression_expires_after_window(tmp_path, monkeypatch): """After 5 min, transition window expires and events are routed normally.""" sup = _setup_supervisor(tmp_path, monkeypatch) inc_id = "inc-789-chelsty-ha-homeassistant" sup.actual_state["services"]["chelsty-ha/homeassistant"] = { "node": "chelsty-ha", "service": "homeassistant", "status": "unhealthy", "incident_id": inc_id, } sup.actual_state["incidents"][inc_id] = { "id": inc_id, "status": "active", "trigger_type": "containers_not_running", "last_occurrence": time.time() - 400, # 6.7 min ago — outside window } _write_event(tmp_path / "events", _make_event("ha_websocket_dead")) sup._process_ha_events() action_id = "container-restart-chelsty-ha-homeassistant" assert _pending(tmp_path, action_id).exists(), "Should not be suppressed after window" # --------------------------------------------------------------------------- # 3. Recovery cancellation # --------------------------------------------------------------------------- def test_ha_websocket_recovered_cancels_pending_restart(tmp_path, monkeypatch): sup = _setup_supervisor(tmp_path, monkeypatch) events_dir = tmp_path / "events" actions = tmp_path / "actions" (actions / "cancelled").mkdir(parents=True, exist_ok=True) # Pre-create a pending container_restart for homeassistant action_id = "container-restart-chelsty-ha-homeassistant" pending_action = { "action_id": action_id, "type": "container_restart", "node": "chelsty-ha", "service": "homeassistant", "status": "pending", "timestamp": time.time(), } _pending(tmp_path, action_id).write_text(json.dumps(pending_action)) _write_event(events_dir, _make_event("ha_websocket_recovered")) sup._process_ha_events() assert not _pending(tmp_path, action_id).exists(), "Pending action should be cancelled" cancelled = actions / "cancelled" / f"{action_id}.json" assert cancelled.exists() data = json.loads(cancelled.read_text()) assert data["cancelled_reason"] == "ha_websocket_recovered" def test_ha_websocket_recovered_no_pending_action_is_noop(tmp_path, monkeypatch): """Recovery event when no pending restart exists must not raise.""" sup = _setup_supervisor(tmp_path, monkeypatch) _write_event(tmp_path / "events", _make_event("ha_websocket_recovered")) sup._process_ha_events() # should not raise # --------------------------------------------------------------------------- # 4. Cooldown # --------------------------------------------------------------------------- def test_ha_websocket_dead_cooldown_prevents_second_restart(tmp_path, monkeypatch): """Two ha_websocket_dead events within 30 min → only one container_restart.""" sup = _setup_supervisor(tmp_path, monkeypatch) events_dir = tmp_path / "events" actions = tmp_path / "actions" (actions / "completed").mkdir(parents=True, exist_ok=True) # First event → action generated _write_event(events_dir, _make_event("ha_websocket_dead", service="homeassistant")) sup._process_ha_events() action_id = "container-restart-chelsty-ha-homeassistant" assert _pending(tmp_path, action_id).exists() # Simulate: action completed recently (< 30 min ago) action_data = json.loads(_pending(tmp_path, action_id).read_text()) action_data["status"] = "completed" action_data["finished_at"] = time.time() - 60 # 1 min ago (actions / "completed" / f"{action_id}.json").write_text(json.dumps(action_data)) _pending(tmp_path, action_id).unlink() # Second event — should be suppressed by cooldown event2 = _make_event("ha_websocket_dead", service="homeassistant") event2["id"] = event2["id"] + "-2" # different event ID _write_event(events_dir, event2) sup._process_ha_events() assert not _pending(tmp_path, action_id).exists(), "Second restart within cooldown should be suppressed" def test_ha_websocket_dead_cooldown_expires(tmp_path, monkeypatch): """After cooldown expires, a new ha_websocket_dead should generate an action.""" sup = _setup_supervisor(tmp_path, monkeypatch) events_dir = tmp_path / "events" actions = tmp_path / "actions" (actions / "completed").mkdir(parents=True, exist_ok=True) action_id = "container-restart-chelsty-ha-homeassistant" # Pre-populate completed action with timestamp > 30 min ago old_action = { "action_id": action_id, "type": "container_restart", "status": "completed", "finished_at": time.time() - 3700, # > 30 min } (actions / "completed" / f"{action_id}.json").write_text(json.dumps(old_action)) _write_event(events_dir, _make_event("ha_websocket_dead")) sup._process_ha_events() assert _pending(tmp_path, action_id).exists(), "Should generate new restart after cooldown" # --------------------------------------------------------------------------- # 5. Location tag preserved # --------------------------------------------------------------------------- def test_location_tag_preserved_in_container_restart_payload(tmp_path, monkeypatch): sup = _setup_supervisor(tmp_path, monkeypatch) _write_event(tmp_path / "events", _make_event("ha_websocket_dead", payload={"location_tag": "chelsty", "extra": "data"})) sup._process_ha_events() action = _read_action(tmp_path, "pending", "container-restart-chelsty-ha-homeassistant") assert action["payload"]["location_tag"] == "chelsty" def test_location_tag_preserved_in_alert_only_payload(tmp_path, monkeypatch): sup = _setup_supervisor(tmp_path, monkeypatch) _write_event(tmp_path / "events", _make_event("ha_entity_unavailable_long", payload={"location_tag": "ken", "count": 3})) sup._process_ha_events() action = _read_action(tmp_path, "pending", "alert-ha-entity-unavailable-chelsty-ha") assert action["payload"]["location_tag"] == "ken" # --------------------------------------------------------------------------- # 6. Dedup — same alert type twice → only one pending action # --------------------------------------------------------------------------- def test_alert_only_dedup_second_event_skipped(tmp_path, monkeypatch): sup = _setup_supervisor(tmp_path, monkeypatch) events_dir = tmp_path / "events" event1 = _make_event("ha_entity_unavailable_long") event2 = _make_event("ha_entity_unavailable_long") event2["id"] = event2["id"] + "-2" _write_event(events_dir, event1) _write_event(events_dir, event2) sup._process_ha_events() action_id = "alert-ha-entity-unavailable-chelsty-ha" assert _pending(tmp_path, action_id).exists() # Only one file — not duplicated pending_files = list((tmp_path / "actions" / "pending").glob("alert-ha-entity-unavailable*.json")) assert len(pending_files) == 1 # --------------------------------------------------------------------------- # 7. Non-HA events are ignored # --------------------------------------------------------------------------- def test_non_ha_events_not_routed(tmp_path, monkeypatch): sup = _setup_supervisor(tmp_path, monkeypatch) events_dir = tmp_path / "events" for etype in ("service_unhealthy", "containers_not_running", "node_online", "deployment_failed"): e = _make_event(etype, service="mosquitto") e["type"] = etype _write_event(events_dir, e) sup._process_ha_events() pending_files = list((tmp_path / "actions" / "pending").glob("*.json")) assert pending_files == [], "Non-HA events should not generate actions via HA path"