From 07bd498fd656ab0eb96f680856b2cb43fdba1393 Mon Sep 17 00:00:00 2001 From: Oskar Kapala Date: Fri, 29 May 2026 12:56:13 +0200 Subject: [PATCH] feat(ha-diag-agent): test environment with dual HA Docker instances - dockerized ken + chelsty HA test instances with template fixtures - snapshot/reset/wait scripts for fixture management - integration test infrastructure with separate marker - location_tag promoted from metadata to event payload (Phase 1 flag #3) - chelsty-infra target_url points to chelsty-ha via tailnet (Phase 1 flag #1) Co-Authored-By: Claude Sonnet 4.6 --- hosts/chelsty-infra/services.yaml | 2 +- services/ha-diag-agent/README.md | 21 ++++++-- services/ha-diag-agent/pyproject.toml | 3 ++ .../src/ha_diag/event_emitter.py | 11 +++- services/ha-diag-agent/src/ha_diag/main.py | 2 +- .../tests/integration/conftest.py | 38 +++++++++++++ .../integration/docker-compose.chelsty.yml | 27 ++++++++++ .../tests/integration/docker-compose.ken.yml | 27 ++++++++++ .../fixtures/chelsty/configuration.yaml | 18 +++++++ .../fixtures/ken/configuration.yaml | 18 +++++++ .../tests/integration/scripts/reset.sh | 36 +++++++++++++ .../tests/integration/scripts/snapshot.sh | 21 ++++++++ .../tests/integration/scripts/wait-for-ha.sh | 23 ++++++++ .../integration/test_heartbeat_integration.py | 53 +++++++++++++++++++ .../ha-diag-agent/tests/test_event_emitter.py | 26 +++++++++ 15 files changed, 319 insertions(+), 7 deletions(-) create mode 100644 services/ha-diag-agent/tests/integration/conftest.py create mode 100644 services/ha-diag-agent/tests/integration/docker-compose.chelsty.yml create mode 100644 services/ha-diag-agent/tests/integration/docker-compose.ken.yml create mode 100644 services/ha-diag-agent/tests/integration/fixtures/chelsty/configuration.yaml create mode 100644 services/ha-diag-agent/tests/integration/fixtures/ken/configuration.yaml create mode 100755 services/ha-diag-agent/tests/integration/scripts/reset.sh create mode 100755 services/ha-diag-agent/tests/integration/scripts/snapshot.sh create mode 100755 services/ha-diag-agent/tests/integration/scripts/wait-for-ha.sh create mode 100644 services/ha-diag-agent/tests/integration/test_heartbeat_integration.py diff --git a/hosts/chelsty-infra/services.yaml b/hosts/chelsty-infra/services.yaml index 0bdc4d7..5bad6a8 100644 --- a/hosts/chelsty-infra/services.yaml +++ b/hosts/chelsty-infra/services.yaml @@ -11,7 +11,7 @@ services: local: [] external: [homeassistant] config: - target_url: http://localhost:8123 + target_url: http://100.70.180.90:8123 # chelsty-ha via Tailscale (HAOS, separate VM) location_tag: "chelsty" events_dir: /opt/homelab/events/chelsty-infra runtime: diff --git a/services/ha-diag-agent/README.md b/services/ha-diag-agent/README.md index 3cd391f..1955d1f 100644 --- a/services/ha-diag-agent/README.md +++ b/services/ha-diag-agent/README.md @@ -38,6 +38,21 @@ SQLite (/data/ha_diag.db) Event routing in supervisor (Phase 5) maps these to `notify` actions. +## Deployment model + +The agent is deployed **per-host** but targets a potentially remote HA instance: + +| Node | Agent runs on | HA lives on | HA URL | +|------|--------------|-------------|--------| +| piha | piha | piha (localhost) | `http://localhost:8123` | +| chelsty-infra | chelsty-infra | chelsty-ha (HAOS VM, separate machine) | `http://100.70.180.90:8123` | + +**chelsty-infra note:** Home Assistant runs on `chelsty-ha`, a dedicated Home Assistant +OS VM. `chelsty-infra` is the hypervisor but does not run HA itself. The agent on +`chelsty-infra` reaches HA over the Tailscale network (`100.70.180.90:8123`). If `chelsty-ha` +gets a new Tailscale IP, update `HA_URL` in `/opt/homelab/config/ha-diag-agent/.env` on +`chelsty-infra`. + ## Deployment ```bash @@ -45,10 +60,10 @@ Event routing in supervisor (Phase 5) maps these to `notify` actions. ssh oskar@ mkdir -p /opt/homelab/config/ha-diag-agent /var/lib/ha-diag-agent cat > /opt/homelab/config/ha-diag-agent/.env << 'EOF' -HA_URL=http://homeassistant.local:8123 +HA_URL=http://homeassistant.local:8123 # or http://100.70.180.90:8123 for chelsty-infra HA_TOKEN= -NODE_NAME=piha -LOCATION_TAG=ken +NODE_NAME=piha # or chelsty-infra +LOCATION_TAG=ken # or chelsty CHECK_INTERVAL=60 EOF diff --git a/services/ha-diag-agent/pyproject.toml b/services/ha-diag-agent/pyproject.toml index 92836da..7a975f6 100644 --- a/services/ha-diag-agent/pyproject.toml +++ b/services/ha-diag-agent/pyproject.toml @@ -31,3 +31,6 @@ where = ["src"] [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] +markers = [ + "integration: requires running HA instances — run with -m integration", +] diff --git a/services/ha-diag-agent/src/ha_diag/event_emitter.py b/services/ha-diag-agent/src/ha_diag/event_emitter.py index 5f55d63..3254d7a 100644 --- a/services/ha-diag-agent/src/ha_diag/event_emitter.py +++ b/services/ha-diag-agent/src/ha_diag/event_emitter.py @@ -13,9 +13,12 @@ from .models import EventRecord class EventEmitter: """Writes atomic JSON event files to the events directory.""" - def __init__(self, events_dir: Path, node_name: str) -> None: + def __init__( + self, events_dir: Path, node_name: str, location_tag: str = "" + ) -> None: self._events_dir = events_dir self._node_name = node_name + self._location_tag = location_tag self._seq = 0 events_dir.mkdir(parents=True, exist_ok=True) @@ -36,6 +39,10 @@ class EventEmitter: payload: dict[str, Any] | None = None, ) -> str: event_id = self._make_id(event_type, service) + merged: dict[str, Any] = {} + if self._location_tag: + merged["location_tag"] = self._location_tag + merged.update(payload or {}) record = EventRecord( id=event_id, timestamp=int(time.time()), @@ -45,7 +52,7 @@ class EventEmitter: node=self._node_name, service=service, message=message, - payload=payload or {}, + payload=merged, ) path = self._events_dir / f"{event_id}.json" tmp = path.with_suffix(".tmp") diff --git a/services/ha-diag-agent/src/ha_diag/main.py b/services/ha-diag-agent/src/ha_diag/main.py index 128a5c2..cfc5e55 100644 --- a/services/ha-diag-agent/src/ha_diag/main.py +++ b/services/ha-diag-agent/src/ha_diag/main.py @@ -77,7 +77,7 @@ async def run(settings: Settings) -> None: storage = Storage(settings.data_dir / "ha_diag.db") await storage.open() - emitter = EventEmitter(settings.events_dir, settings.node_name) + emitter = EventEmitter(settings.events_dir, settings.node_name, settings.location_tag) ha_client = HAClient(settings.ha_url, settings.ha_token) checks = [HeartbeatCheck(ha_client)] diff --git a/services/ha-diag-agent/tests/integration/conftest.py b/services/ha-diag-agent/tests/integration/conftest.py new file mode 100644 index 0000000..5fafc8f --- /dev/null +++ b/services/ha-diag-agent/tests/integration/conftest.py @@ -0,0 +1,38 @@ +"""Integration test fixtures. + +Integration tests require real HA instances. Start them with: + + docker compose -f tests/integration/docker-compose.ken.yml up -d + docker compose -f tests/integration/docker-compose.chelsty.yml up -d + tests/integration/scripts/wait-for-ha.sh http://localhost:8123 + tests/integration/scripts/wait-for-ha.sh http://localhost:8124 + +Then set TEST_HA_TOKEN (a long-lived HA token) and run: + + pytest tests/ -m integration + +All tests in this module are automatically skipped when TEST_HA_TOKEN is unset. +""" +from __future__ import annotations + +import os + +import pytest + + +@pytest.fixture(scope="session") +def ha_ken_url() -> str: + return os.getenv("TEST_HA_KEN_URL", "http://localhost:8123") + + +@pytest.fixture(scope="session") +def ha_chelsty_url() -> str: + return os.getenv("TEST_HA_CHELSTY_URL", "http://localhost:8124") + + +@pytest.fixture(scope="session") +def ha_token() -> str: + token = os.getenv("TEST_HA_TOKEN", "") + if not token: + pytest.skip("TEST_HA_TOKEN not set — skipping integration tests") + return token diff --git a/services/ha-diag-agent/tests/integration/docker-compose.chelsty.yml b/services/ha-diag-agent/tests/integration/docker-compose.chelsty.yml new file mode 100644 index 0000000..2273070 --- /dev/null +++ b/services/ha-diag-agent/tests/integration/docker-compose.chelsty.yml @@ -0,0 +1,27 @@ +services: + ha-chelsty-init: + image: busybox + container_name: ha-test-chelsty-init + command: sh -c "cp -rn /fixtures/. /config/ && echo 'Fixtures copied'" + volumes: + - ./fixtures/chelsty:/fixtures:ro + - ha_chelsty_config:/config + restart: "no" + + ha-chelsty: + image: ghcr.io/home-assistant/home-assistant:stable + container_name: ha-test-chelsty + privileged: true + depends_on: + ha-chelsty-init: + condition: service_completed_successfully + ports: + - "8124:8123" + volumes: + - ha_chelsty_config:/config + environment: + TZ: UTC + restart: "no" + +volumes: + ha_chelsty_config: diff --git a/services/ha-diag-agent/tests/integration/docker-compose.ken.yml b/services/ha-diag-agent/tests/integration/docker-compose.ken.yml new file mode 100644 index 0000000..b074dbc --- /dev/null +++ b/services/ha-diag-agent/tests/integration/docker-compose.ken.yml @@ -0,0 +1,27 @@ +services: + ha-ken-init: + image: busybox + container_name: ha-test-ken-init + command: sh -c "cp -rn /fixtures/. /config/ && echo 'Fixtures copied'" + volumes: + - ./fixtures/ken:/fixtures:ro + - ha_ken_config:/config + restart: "no" + + ha-ken: + image: ghcr.io/home-assistant/home-assistant:stable + container_name: ha-test-ken + privileged: true + depends_on: + ha-ken-init: + condition: service_completed_successfully + ports: + - "8123:8123" + volumes: + - ha_ken_config:/config + environment: + TZ: UTC + restart: "no" + +volumes: + ha_ken_config: diff --git a/services/ha-diag-agent/tests/integration/fixtures/chelsty/configuration.yaml b/services/ha-diag-agent/tests/integration/fixtures/chelsty/configuration.yaml new file mode 100644 index 0000000..ae0db53 --- /dev/null +++ b/services/ha-diag-agent/tests/integration/fixtures/chelsty/configuration.yaml @@ -0,0 +1,18 @@ +# Home Assistant test fixture — chelsty site +# Used by integration tests only. Not for production. + +homeassistant: + name: "Test HA - Chelsty" + latitude: 0.0 + longitude: 0.0 + elevation: 0 + unit_system: metric + time_zone: UTC + country: PL + +# Enable REST API +api: + +# Disable analytics +analytics: + reporting: false diff --git a/services/ha-diag-agent/tests/integration/fixtures/ken/configuration.yaml b/services/ha-diag-agent/tests/integration/fixtures/ken/configuration.yaml new file mode 100644 index 0000000..3b04a08 --- /dev/null +++ b/services/ha-diag-agent/tests/integration/fixtures/ken/configuration.yaml @@ -0,0 +1,18 @@ +# Home Assistant test fixture — ken (piha) site +# Used by integration tests only. Not for production. + +homeassistant: + name: "Test HA - Ken" + latitude: 0.0 + longitude: 0.0 + elevation: 0 + unit_system: metric + time_zone: UTC + country: PL + +# Enable REST API (no auth required for trusted networks in tests) +api: + +# Disable analytics +analytics: + reporting: false diff --git a/services/ha-diag-agent/tests/integration/scripts/reset.sh b/services/ha-diag-agent/tests/integration/scripts/reset.sh new file mode 100755 index 0000000..6afced7 --- /dev/null +++ b/services/ha-diag-agent/tests/integration/scripts/reset.sh @@ -0,0 +1,36 @@ +#!/bin/sh +# Reset an HA Docker volume from a snapshot or fixture directory. +# Usage: reset.sh +# +# Stops the service, clears and repopulates its volume from the fixture +# directory, then restarts. + +set -e + +COMPOSE_FILE="${1:?Usage: reset.sh }" +SERVICE="${2:?}" +FIXTURE_DIR="${3:?}" +COMPOSE_DIR="$(dirname "$COMPOSE_FILE")" + +printf 'Resetting %s from %s...\n' "$SERVICE" "$FIXTURE_DIR" + +# Stop the service (keep the init container stopped too) +docker compose -f "$COMPOSE_FILE" stop "$SERVICE" 2>/dev/null || true + +# Determine the volume name from compose project + service +VOLUME_NAME="$(docker compose -f "$COMPOSE_FILE" config --volumes 2>/dev/null | head -1)" +if [ -z "$VOLUME_NAME" ]; then + printf 'Could not determine volume name from %s\n' "$COMPOSE_FILE" >&2 + exit 1 +fi + +# Wipe and repopulate the volume +docker run --rm \ + -v "$VOLUME_NAME":/config \ + -v "$(realpath "$FIXTURE_DIR")":/fixtures:ro \ + busybox \ + sh -c "rm -rf /config/.storage && cp -r /fixtures/. /config/" + +# Restart the service +docker compose -f "$COMPOSE_FILE" start "$SERVICE" +printf 'Reset complete. Run wait-for-ha.sh to confirm readiness.\n' diff --git a/services/ha-diag-agent/tests/integration/scripts/snapshot.sh b/services/ha-diag-agent/tests/integration/scripts/snapshot.sh new file mode 100755 index 0000000..18365e6 --- /dev/null +++ b/services/ha-diag-agent/tests/integration/scripts/snapshot.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# Snapshot the current state of an HA Docker volume. +# Usage: snapshot.sh [output_dir] +# +# Saves a tar.gz of the entire volume to output_dir (default: ./snapshots/). +# Use reset.sh to restore. + +VOLUME="${1:?Usage: snapshot.sh [output_dir]}" +OUTPUT_DIR="${2:-./snapshots}" +SNAPSHOT_FILE="$OUTPUT_DIR/$VOLUME-$(date +%Y%m%d-%H%M%S).tar.gz" + +mkdir -p "$OUTPUT_DIR" +printf 'Snapshotting volume %s -> %s\n' "$VOLUME" "$SNAPSHOT_FILE" + +docker run --rm \ + -v "$VOLUME":/data:ro \ + alpine \ + tar czf - -C / data \ + > "$SNAPSHOT_FILE" + +printf 'Snapshot saved: %s\n' "$SNAPSHOT_FILE" diff --git a/services/ha-diag-agent/tests/integration/scripts/wait-for-ha.sh b/services/ha-diag-agent/tests/integration/scripts/wait-for-ha.sh new file mode 100755 index 0000000..710dce3 --- /dev/null +++ b/services/ha-diag-agent/tests/integration/scripts/wait-for-ha.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# Wait until a Home Assistant instance is ready (responds to /api/). +# Usage: wait-for-ha.sh [timeout_seconds] +# +# Exit 0 = HA ready, Exit 1 = timeout reached. + +URL="${1:-http://localhost:8123}" +TIMEOUT="${2:-120}" + +elapsed=0 +printf 'Waiting for HA at %s (timeout %ss)...\n' "$URL" "$TIMEOUT" + +while [ "$elapsed" -lt "$TIMEOUT" ]; do + if curl -sf --max-time 3 "$URL/api/" -o /dev/null 2>/dev/null; then + printf 'HA ready at %s (after %ss)\n' "$URL" "$elapsed" + exit 0 + fi + sleep 2 + elapsed=$((elapsed + 2)) +done + +printf 'Timeout: HA not ready at %s after %ss\n' "$URL" "$TIMEOUT" >&2 +exit 1 diff --git a/services/ha-diag-agent/tests/integration/test_heartbeat_integration.py b/services/ha-diag-agent/tests/integration/test_heartbeat_integration.py new file mode 100644 index 0000000..35fc44d --- /dev/null +++ b/services/ha-diag-agent/tests/integration/test_heartbeat_integration.py @@ -0,0 +1,53 @@ +"""Integration tests for HeartbeatCheck against real HA instances. + +Requires: + - docker compose -f tests/integration/docker-compose.ken.yml up -d + - docker compose -f tests/integration/docker-compose.chelsty.yml up -d + - TEST_HA_TOKEN= pytest tests/ -m integration +""" +from __future__ import annotations + +import pytest + +from ha_diag.checks.heartbeat import HeartbeatCheck +from ha_diag.event_emitter import EventEmitter +from ha_diag.ha_client import HAClient + + +@pytest.mark.integration +async def test_heartbeat_ken_healthy(ha_ken_url: str, ha_token: str, tmp_path): + client = HAClient(ha_ken_url, ha_token) + check = HeartbeatCheck(client) + result = await check.run() + assert result.healthy is True, f"HA ken not healthy: {result.message}" + assert result.event_type is None + + +@pytest.mark.integration +async def test_heartbeat_chelsty_healthy(ha_chelsty_url: str, ha_token: str): + client = HAClient(ha_chelsty_url, ha_token) + check = HeartbeatCheck(client) + result = await check.run() + assert result.healthy is True, f"HA chelsty not healthy: {result.message}" + assert result.event_type is None + + +@pytest.mark.integration +async def test_heartbeat_emits_event_on_failure(tmp_path): + client = HAClient("http://127.0.0.1:19999", "bad-token") # nothing here + check = HeartbeatCheck(client) + result = await check.run() + assert result.healthy is False + assert result.event_type == "ha_websocket_dead" + + +@pytest.mark.integration +async def test_heartbeat_event_written_to_filesystem(ha_ken_url: str, ha_token: str, tmp_path): + emitter = EventEmitter(tmp_path / "events", node_name="test-piha", location_tag="ken") + client = HAClient(ha_ken_url, ha_token) + check = HeartbeatCheck(client) + result = await check.run() + + assert result.healthy is True + # No event emitted for a healthy result + assert not list((tmp_path / "events").glob("*.json")) or result.event_type is None diff --git a/services/ha-diag-agent/tests/test_event_emitter.py b/services/ha-diag-agent/tests/test_event_emitter.py index 3b7a62e..e70e02b 100644 --- a/services/ha-diag-agent/tests/test_event_emitter.py +++ b/services/ha-diag-agent/tests/test_event_emitter.py @@ -60,3 +60,29 @@ def test_emitter_creates_events_dir(tmp_path: Path): new_dir = tmp_path / "nested" / "events" emitter = EventEmitter(new_dir, "my-node") assert new_dir.exists() + + +def test_location_tag_included_in_payload(tmp_events_dir: Path): + emitter = EventEmitter(tmp_events_dir, node_name="piha", location_tag="ken") + event_id = emitter.emit("ha_websocket_dead", "error", "homeassistant", "msg") + data = json.loads((tmp_events_dir / f"{event_id}.json").read_text()) + assert data["payload"]["location_tag"] == "ken" + + +def test_location_tag_empty_not_in_payload(tmp_events_dir: Path): + emitter = EventEmitter(tmp_events_dir, node_name="piha", location_tag="") + event_id = emitter.emit("ha_websocket_dead", "error", "homeassistant", "msg") + data = json.loads((tmp_events_dir / f"{event_id}.json").read_text()) + assert "location_tag" not in data["payload"] + + +def test_location_tag_does_not_override_explicit_payload_key(tmp_events_dir: Path): + emitter = EventEmitter(tmp_events_dir, node_name="piha", location_tag="ken") + event_id = emitter.emit( + "ha_websocket_dead", "error", "homeassistant", "msg", + payload={"location_tag": "override", "other": "value"}, + ) + data = json.loads((tmp_events_dir / f"{event_id}.json").read_text()) + # Explicit payload key wins over the emitter's location_tag + assert data["payload"]["location_tag"] == "override" + assert data["payload"]["other"] == "value"