Polls /summary on VPS over Tailscale every 60s; computes freshness locally from last_update epoch (never trusts self-reported status). Alerts via Telegram Bot API directly after 3 consecutive failures; sends recovery message on heal. State (fail_count, alerted) persisted to volume so debounce survives restarts. - services/brain-watchdog/: Python service, no external deps (stdlib only) - hosts/piha/runtime/brain-watchdog/: override with mem_limit 64m - hosts/piha/services.yaml + inventory/topology.yaml: manifest entries Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
43 lines
1 KiB
YAML
43 lines
1 KiB
YAML
host: piha
|
|
|
|
services:
|
|
ha-diag-agent:
|
|
role: ha-diagnostic-agent
|
|
deployment_model: docker-compose
|
|
exposure: local-only
|
|
offline_required: false
|
|
depends_on:
|
|
local: []
|
|
external: [homeassistant]
|
|
config:
|
|
target_url: http://localhost:8123
|
|
location_tag: "ken"
|
|
events_dir: /opt/homelab/events/piha
|
|
runtime:
|
|
config_path: /opt/homelab/config/ha-diag-agent
|
|
data_path: /var/lib/ha-diag-agent
|
|
|
|
node-agent:
|
|
role: node-stability-monitor
|
|
deployment_model: docker-compose
|
|
exposure: local-only
|
|
offline_required: true
|
|
depends_on:
|
|
local: []
|
|
external: []
|
|
runtime:
|
|
config_path: /opt/homelab/config/node-agent
|
|
data_path: /opt/homelab/state
|
|
logs_path: /opt/homelab/events
|
|
|
|
brain-watchdog:
|
|
role: control-plane-watchdog
|
|
deployment_model: docker-compose
|
|
exposure: private
|
|
offline_required: false
|
|
depends_on:
|
|
local: []
|
|
external: [control-plane]
|
|
runtime:
|
|
config_path: /opt/homelab/config/brain-watchdog
|