Fix pending actions: node_exporter, zigbee2mqtt, chelsty-ha monitoring
node_exporter (new service): - Add services/node_exporter/docker-compose.yml matching solaria deployment (network_mode: host, pid: host, /:/host:ro,rslave mount) - Add services/node_exporter/service.yaml zigbee2mqtt chelsty-infra override: - Fix network_mode: host (mosquitto runs on host network, port 1883 on localhost) - Fix volume mount: ./configuration.yaml → absolute /opt/homelab/config/zigbee2mqtt/ (secrets stay in runtime config dir, never in Git) - Remove MQTT_USER/MQTT_PASSWORD (mosquitto uses allow_anonymous true) - Extend healthcheck start_period to 60s (z2m takes time on first start) chelsty-ha/services.yaml: - Remove node-agent entry entirely (never deployed, no plans to bootstrap now) - Keep homeassistant with monitor: false (no node-agent = no health events) supervisor: respect monitor: false in services.yaml - Skip action generation for services where monitor=false - Cleans up chelsty-ha entries from action queue without removing desired-state docs Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
fb7828b52b
commit
51002d4502
|
|
@ -2,14 +2,11 @@ host: chelsty-ha
|
|||
site: chelsty
|
||||
|
||||
services:
|
||||
node-agent:
|
||||
role: node-stability-monitor
|
||||
# LTE node: monitors homeassistant container health and emits events only.
|
||||
# No disk cleanup — HA database size is managed by recorder purge_keep_days
|
||||
# in HA configuration, not by Docker or filesystem operations.
|
||||
deployment_model: docker-compose
|
||||
exposure: local-only
|
||||
offline_required: true
|
||||
|
||||
homeassistant:
|
||||
role: home-automation-controller
|
||||
offline_required: true
|
||||
# monitor: false — chelsty-ha has no node-agent deployed, so there are no
|
||||
# container-health events for the observer to track. HA is monitored
|
||||
# indirectly via the chelsty-infra MQTT broker (if MQTT goes silent, HA
|
||||
# is likely down). Re-enable once node-agent is bootstrapped on this VM.
|
||||
monitor: false
|
||||
|
|
|
|||
|
|
@ -1,13 +1,17 @@
|
|||
services:
|
||||
zigbee2mqtt:
|
||||
# host network: mosquitto runs with network_mode: host on chelsty-infra,
|
||||
# so zigbee2mqtt must also use host networking to reach localhost:1883.
|
||||
network_mode: host
|
||||
volumes:
|
||||
- ./configuration.yaml:/app/data/configuration.yaml:ro
|
||||
# configuration.yaml lives in the runtime config dir (not in Git).
|
||||
# On chelsty-infra: /opt/homelab/config/zigbee2mqtt/configuration.yaml
|
||||
- /opt/homelab/config/zigbee2mqtt/configuration.yaml:/app/data/configuration.yaml:ro
|
||||
environment:
|
||||
- MQTT_USER=${MQTT_USER}
|
||||
- MQTT_PASSWORD=${MQTT_PASSWORD}
|
||||
# Healthcheck is already defined in base service, but we ensure compatibility
|
||||
- TZ=Europe/Warsaw
|
||||
healthcheck:
|
||||
test: ["CMD", "curl", "-f", "http://localhost:8080"]
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
test: ["CMD-SHELL", "wget -qO- http://localhost:8080 > /dev/null 2>&1 || exit 1"]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
start_period: 60s
|
||||
|
|
|
|||
|
|
@ -99,6 +99,16 @@ class Supervisor:
|
|||
data = yaml.safe_load(f)
|
||||
host_name = data.get("host")
|
||||
for svc_name, svc_info in data.get("services", {}).items():
|
||||
svc_info = svc_info or {}
|
||||
# monitor: false — service is documented as desired but
|
||||
# intentionally excluded from supervisor action generation.
|
||||
# Use this when a service is not yet bootstrapped on an
|
||||
# offline/LTE node so the queue stays clean until it is.
|
||||
if svc_info.get("monitor") is False:
|
||||
logger.debug(
|
||||
f"Skipping {host_name}/{svc_name}: monitor=false"
|
||||
)
|
||||
continue
|
||||
svc_key = f"{host_name}/{svc_name}"
|
||||
services[svc_key] = {
|
||||
"node": host_name,
|
||||
|
|
|
|||
11
services/node_exporter/docker-compose.yml
Normal file
11
services/node_exporter/docker-compose.yml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
services:
|
||||
node_exporter:
|
||||
image: quay.io/prometheus/node-exporter:latest
|
||||
container_name: node_exporter
|
||||
command:
|
||||
- '--path.rootfs=/host'
|
||||
network_mode: host
|
||||
pid: host
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- '/:/host:ro,rslave'
|
||||
17
services/node_exporter/service.yaml
Normal file
17
services/node_exporter/service.yaml
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
name: node_exporter
|
||||
owner_node: vps
|
||||
role: metrics-exporter
|
||||
description: >
|
||||
Prometheus Node Exporter — exposes host-level metrics (CPU, memory, disk,
|
||||
network) for scraping by a Prometheus instance.
|
||||
exposure: local-only
|
||||
dependencies: []
|
||||
restart_policy: unless-stopped
|
||||
healthcheck:
|
||||
type: http
|
||||
endpoint: http://localhost:9100/metrics
|
||||
interval: 30s
|
||||
persistence:
|
||||
paths: []
|
||||
runtime:
|
||||
env_vars: []
|
||||
Loading…
Reference in a new issue