- persistent WS connection to HA with auth + state_changed subscription - watchdog detects silence > 5min → emits ha_websocket_dead - immediate ha_websocket_dead on disconnect, exponential reconnect with jitter - cooldown prevents alert spam (10min repeat window while HA stays down) - ha_websocket_recovered emitted on reconnect after a dead alert (allows supervisor to clear active incidents in Phase 5) - new monitors/ subpackage for long-running tasks (vs interval checks/) - /health endpoint now includes ws_connected field - 26 unit tests, 3 integration tests (real HA + container stop/restart) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
59 lines
1.5 KiB
Python
59 lines
1.5 KiB
Python
from __future__ import annotations
|
|
|
|
from typing import TYPE_CHECKING
|
|
|
|
from fastapi import FastAPI, HTTPException
|
|
|
|
if TYPE_CHECKING:
|
|
from .checks.base import Check
|
|
from .monitors.base import Monitor
|
|
|
|
app = FastAPI(title="ha-diag-agent", version="0.1.0")
|
|
|
|
# Populated by main.py during startup
|
|
_checks: dict[str, "Check"] = {}
|
|
_ws_monitor: "Monitor | None" = None
|
|
_node_name: str = "unknown"
|
|
_location_tag: str = "default"
|
|
|
|
|
|
def register_checks(checks: list["Check"], node_name: str, location_tag: str) -> None:
|
|
global _node_name, _location_tag
|
|
_checks.update({c.name: c for c in checks})
|
|
_node_name = node_name
|
|
_location_tag = location_tag
|
|
|
|
|
|
def register_ws_monitor(monitor: "Monitor") -> None:
|
|
global _ws_monitor
|
|
_ws_monitor = monitor
|
|
|
|
|
|
@app.get("/health")
|
|
async def health() -> dict:
|
|
response: dict = {
|
|
"status": "ok",
|
|
"node": _node_name,
|
|
"location_tag": _location_tag,
|
|
"checks": list(_checks.keys()),
|
|
}
|
|
if _ws_monitor is not None:
|
|
response["ws_connected"] = _ws_monitor.is_healthy
|
|
return response
|
|
|
|
|
|
@app.post("/trigger/{check_name}")
|
|
async def trigger(check_name: str) -> dict:
|
|
check = _checks.get(check_name)
|
|
if check is None:
|
|
raise HTTPException(status_code=404, detail=f"Unknown check: {check_name!r}")
|
|
result = await check.run()
|
|
return {
|
|
"check": check_name,
|
|
"healthy": result.healthy,
|
|
"event_type": result.event_type,
|
|
"severity": result.severity,
|
|
"message": result.message,
|
|
"payload": result.payload,
|
|
}
|