homelab-codex-ws/services/ha-diag-agent/src/ha_diag/api.py
Oskar Kapala 31b48d162a feat(ha-diag-agent): WebSocketMonitor for real-time HA liveness
- persistent WS connection to HA with auth + state_changed subscription
- watchdog detects silence > 5min → emits ha_websocket_dead
- immediate ha_websocket_dead on disconnect, exponential reconnect with jitter
- cooldown prevents alert spam (10min repeat window while HA stays down)
- ha_websocket_recovered emitted on reconnect after a dead alert (allows
  supervisor to clear active incidents in Phase 5)
- new monitors/ subpackage for long-running tasks (vs interval checks/)
- /health endpoint now includes ws_connected field
- 26 unit tests, 3 integration tests (real HA + container stop/restart)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-29 15:00:18 +02:00

59 lines
1.5 KiB
Python

from __future__ import annotations
from typing import TYPE_CHECKING
from fastapi import FastAPI, HTTPException
if TYPE_CHECKING:
from .checks.base import Check
from .monitors.base import Monitor
app = FastAPI(title="ha-diag-agent", version="0.1.0")
# Populated by main.py during startup
_checks: dict[str, "Check"] = {}
_ws_monitor: "Monitor | None" = None
_node_name: str = "unknown"
_location_tag: str = "default"
def register_checks(checks: list["Check"], node_name: str, location_tag: str) -> None:
global _node_name, _location_tag
_checks.update({c.name: c for c in checks})
_node_name = node_name
_location_tag = location_tag
def register_ws_monitor(monitor: "Monitor") -> None:
global _ws_monitor
_ws_monitor = monitor
@app.get("/health")
async def health() -> dict:
response: dict = {
"status": "ok",
"node": _node_name,
"location_tag": _location_tag,
"checks": list(_checks.keys()),
}
if _ws_monitor is not None:
response["ws_connected"] = _ws_monitor.is_healthy
return response
@app.post("/trigger/{check_name}")
async def trigger(check_name: str) -> dict:
check = _checks.get(check_name)
if check is None:
raise HTTPException(status_code=404, detail=f"Unknown check: {check_name!r}")
result = await check.run()
return {
"check": check_name,
"healthy": result.healthy,
"event_type": result.event_type,
"severity": result.severity,
"message": result.message,
"payload": result.payload,
}