feat(ha-diag-agent): test environment with dual HA Docker instances

- dockerized ken + chelsty HA test instances with template fixtures
- snapshot/reset/wait scripts for fixture management
- integration test infrastructure with separate marker
- location_tag promoted from metadata to event payload (Phase 1 flag #3)
- chelsty-infra target_url points to chelsty-ha via tailnet (Phase 1 flag #1)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Oskar Kapala 2026-05-29 12:56:13 +02:00
parent 90c8e77bf7
commit 07bd498fd6
15 changed files with 319 additions and 7 deletions

View file

@ -11,7 +11,7 @@ services:
local: [] local: []
external: [homeassistant] external: [homeassistant]
config: config:
target_url: http://localhost:8123 target_url: http://100.70.180.90:8123 # chelsty-ha via Tailscale (HAOS, separate VM)
location_tag: "chelsty" location_tag: "chelsty"
events_dir: /opt/homelab/events/chelsty-infra events_dir: /opt/homelab/events/chelsty-infra
runtime: runtime:

View file

@ -38,6 +38,21 @@ SQLite (/data/ha_diag.db)
Event routing in supervisor (Phase 5) maps these to `notify` actions. Event routing in supervisor (Phase 5) maps these to `notify` actions.
## Deployment model
The agent is deployed **per-host** but targets a potentially remote HA instance:
| Node | Agent runs on | HA lives on | HA URL |
|------|--------------|-------------|--------|
| piha | piha | piha (localhost) | `http://localhost:8123` |
| chelsty-infra | chelsty-infra | chelsty-ha (HAOS VM, separate machine) | `http://100.70.180.90:8123` |
**chelsty-infra note:** Home Assistant runs on `chelsty-ha`, a dedicated Home Assistant
OS VM. `chelsty-infra` is the hypervisor but does not run HA itself. The agent on
`chelsty-infra` reaches HA over the Tailscale network (`100.70.180.90:8123`). If `chelsty-ha`
gets a new Tailscale IP, update `HA_URL` in `/opt/homelab/config/ha-diag-agent/.env` on
`chelsty-infra`.
## Deployment ## Deployment
```bash ```bash
@ -45,10 +60,10 @@ Event routing in supervisor (Phase 5) maps these to `notify` actions.
ssh oskar@<node-ip> ssh oskar@<node-ip>
mkdir -p /opt/homelab/config/ha-diag-agent /var/lib/ha-diag-agent mkdir -p /opt/homelab/config/ha-diag-agent /var/lib/ha-diag-agent
cat > /opt/homelab/config/ha-diag-agent/.env << 'EOF' cat > /opt/homelab/config/ha-diag-agent/.env << 'EOF'
HA_URL=http://homeassistant.local:8123 HA_URL=http://homeassistant.local:8123 # or http://100.70.180.90:8123 for chelsty-infra
HA_TOKEN=<long-lived-token> HA_TOKEN=<long-lived-token>
NODE_NAME=piha NODE_NAME=piha # or chelsty-infra
LOCATION_TAG=ken LOCATION_TAG=ken # or chelsty
CHECK_INTERVAL=60 CHECK_INTERVAL=60
EOF EOF

View file

@ -31,3 +31,6 @@ where = ["src"]
[tool.pytest.ini_options] [tool.pytest.ini_options]
asyncio_mode = "auto" asyncio_mode = "auto"
testpaths = ["tests"] testpaths = ["tests"]
markers = [
"integration: requires running HA instances — run with -m integration",
]

View file

@ -13,9 +13,12 @@ from .models import EventRecord
class EventEmitter: class EventEmitter:
"""Writes atomic JSON event files to the events directory.""" """Writes atomic JSON event files to the events directory."""
def __init__(self, events_dir: Path, node_name: str) -> None: def __init__(
self, events_dir: Path, node_name: str, location_tag: str = ""
) -> None:
self._events_dir = events_dir self._events_dir = events_dir
self._node_name = node_name self._node_name = node_name
self._location_tag = location_tag
self._seq = 0 self._seq = 0
events_dir.mkdir(parents=True, exist_ok=True) events_dir.mkdir(parents=True, exist_ok=True)
@ -36,6 +39,10 @@ class EventEmitter:
payload: dict[str, Any] | None = None, payload: dict[str, Any] | None = None,
) -> str: ) -> str:
event_id = self._make_id(event_type, service) event_id = self._make_id(event_type, service)
merged: dict[str, Any] = {}
if self._location_tag:
merged["location_tag"] = self._location_tag
merged.update(payload or {})
record = EventRecord( record = EventRecord(
id=event_id, id=event_id,
timestamp=int(time.time()), timestamp=int(time.time()),
@ -45,7 +52,7 @@ class EventEmitter:
node=self._node_name, node=self._node_name,
service=service, service=service,
message=message, message=message,
payload=payload or {}, payload=merged,
) )
path = self._events_dir / f"{event_id}.json" path = self._events_dir / f"{event_id}.json"
tmp = path.with_suffix(".tmp") tmp = path.with_suffix(".tmp")

View file

@ -77,7 +77,7 @@ async def run(settings: Settings) -> None:
storage = Storage(settings.data_dir / "ha_diag.db") storage = Storage(settings.data_dir / "ha_diag.db")
await storage.open() await storage.open()
emitter = EventEmitter(settings.events_dir, settings.node_name) emitter = EventEmitter(settings.events_dir, settings.node_name, settings.location_tag)
ha_client = HAClient(settings.ha_url, settings.ha_token) ha_client = HAClient(settings.ha_url, settings.ha_token)
checks = [HeartbeatCheck(ha_client)] checks = [HeartbeatCheck(ha_client)]

View file

@ -0,0 +1,38 @@
"""Integration test fixtures.
Integration tests require real HA instances. Start them with:
docker compose -f tests/integration/docker-compose.ken.yml up -d
docker compose -f tests/integration/docker-compose.chelsty.yml up -d
tests/integration/scripts/wait-for-ha.sh http://localhost:8123
tests/integration/scripts/wait-for-ha.sh http://localhost:8124
Then set TEST_HA_TOKEN (a long-lived HA token) and run:
pytest tests/ -m integration
All tests in this module are automatically skipped when TEST_HA_TOKEN is unset.
"""
from __future__ import annotations
import os
import pytest
@pytest.fixture(scope="session")
def ha_ken_url() -> str:
return os.getenv("TEST_HA_KEN_URL", "http://localhost:8123")
@pytest.fixture(scope="session")
def ha_chelsty_url() -> str:
return os.getenv("TEST_HA_CHELSTY_URL", "http://localhost:8124")
@pytest.fixture(scope="session")
def ha_token() -> str:
token = os.getenv("TEST_HA_TOKEN", "")
if not token:
pytest.skip("TEST_HA_TOKEN not set — skipping integration tests")
return token

View file

@ -0,0 +1,27 @@
services:
ha-chelsty-init:
image: busybox
container_name: ha-test-chelsty-init
command: sh -c "cp -rn /fixtures/. /config/ && echo 'Fixtures copied'"
volumes:
- ./fixtures/chelsty:/fixtures:ro
- ha_chelsty_config:/config
restart: "no"
ha-chelsty:
image: ghcr.io/home-assistant/home-assistant:stable
container_name: ha-test-chelsty
privileged: true
depends_on:
ha-chelsty-init:
condition: service_completed_successfully
ports:
- "8124:8123"
volumes:
- ha_chelsty_config:/config
environment:
TZ: UTC
restart: "no"
volumes:
ha_chelsty_config:

View file

@ -0,0 +1,27 @@
services:
ha-ken-init:
image: busybox
container_name: ha-test-ken-init
command: sh -c "cp -rn /fixtures/. /config/ && echo 'Fixtures copied'"
volumes:
- ./fixtures/ken:/fixtures:ro
- ha_ken_config:/config
restart: "no"
ha-ken:
image: ghcr.io/home-assistant/home-assistant:stable
container_name: ha-test-ken
privileged: true
depends_on:
ha-ken-init:
condition: service_completed_successfully
ports:
- "8123:8123"
volumes:
- ha_ken_config:/config
environment:
TZ: UTC
restart: "no"
volumes:
ha_ken_config:

View file

@ -0,0 +1,18 @@
# Home Assistant test fixture — chelsty site
# Used by integration tests only. Not for production.
homeassistant:
name: "Test HA - Chelsty"
latitude: 0.0
longitude: 0.0
elevation: 0
unit_system: metric
time_zone: UTC
country: PL
# Enable REST API
api:
# Disable analytics
analytics:
reporting: false

View file

@ -0,0 +1,18 @@
# Home Assistant test fixture — ken (piha) site
# Used by integration tests only. Not for production.
homeassistant:
name: "Test HA - Ken"
latitude: 0.0
longitude: 0.0
elevation: 0
unit_system: metric
time_zone: UTC
country: PL
# Enable REST API (no auth required for trusted networks in tests)
api:
# Disable analytics
analytics:
reporting: false

View file

@ -0,0 +1,36 @@
#!/bin/sh
# Reset an HA Docker volume from a snapshot or fixture directory.
# Usage: reset.sh <compose_file> <service_name> <fixture_dir>
#
# Stops the service, clears and repopulates its volume from the fixture
# directory, then restarts.
set -e
COMPOSE_FILE="${1:?Usage: reset.sh <compose_file> <service_name> <fixture_dir>}"
SERVICE="${2:?}"
FIXTURE_DIR="${3:?}"
COMPOSE_DIR="$(dirname "$COMPOSE_FILE")"
printf 'Resetting %s from %s...\n' "$SERVICE" "$FIXTURE_DIR"
# Stop the service (keep the init container stopped too)
docker compose -f "$COMPOSE_FILE" stop "$SERVICE" 2>/dev/null || true
# Determine the volume name from compose project + service
VOLUME_NAME="$(docker compose -f "$COMPOSE_FILE" config --volumes 2>/dev/null | head -1)"
if [ -z "$VOLUME_NAME" ]; then
printf 'Could not determine volume name from %s\n' "$COMPOSE_FILE" >&2
exit 1
fi
# Wipe and repopulate the volume
docker run --rm \
-v "$VOLUME_NAME":/config \
-v "$(realpath "$FIXTURE_DIR")":/fixtures:ro \
busybox \
sh -c "rm -rf /config/.storage && cp -r /fixtures/. /config/"
# Restart the service
docker compose -f "$COMPOSE_FILE" start "$SERVICE"
printf 'Reset complete. Run wait-for-ha.sh to confirm readiness.\n'

View file

@ -0,0 +1,21 @@
#!/bin/sh
# Snapshot the current state of an HA Docker volume.
# Usage: snapshot.sh <volume_name> [output_dir]
#
# Saves a tar.gz of the entire volume to output_dir (default: ./snapshots/).
# Use reset.sh to restore.
VOLUME="${1:?Usage: snapshot.sh <volume_name> [output_dir]}"
OUTPUT_DIR="${2:-./snapshots}"
SNAPSHOT_FILE="$OUTPUT_DIR/$VOLUME-$(date +%Y%m%d-%H%M%S).tar.gz"
mkdir -p "$OUTPUT_DIR"
printf 'Snapshotting volume %s -> %s\n' "$VOLUME" "$SNAPSHOT_FILE"
docker run --rm \
-v "$VOLUME":/data:ro \
alpine \
tar czf - -C / data \
> "$SNAPSHOT_FILE"
printf 'Snapshot saved: %s\n' "$SNAPSHOT_FILE"

View file

@ -0,0 +1,23 @@
#!/bin/sh
# Wait until a Home Assistant instance is ready (responds to /api/).
# Usage: wait-for-ha.sh <url> [timeout_seconds]
#
# Exit 0 = HA ready, Exit 1 = timeout reached.
URL="${1:-http://localhost:8123}"
TIMEOUT="${2:-120}"
elapsed=0
printf 'Waiting for HA at %s (timeout %ss)...\n' "$URL" "$TIMEOUT"
while [ "$elapsed" -lt "$TIMEOUT" ]; do
if curl -sf --max-time 3 "$URL/api/" -o /dev/null 2>/dev/null; then
printf 'HA ready at %s (after %ss)\n' "$URL" "$elapsed"
exit 0
fi
sleep 2
elapsed=$((elapsed + 2))
done
printf 'Timeout: HA not ready at %s after %ss\n' "$URL" "$TIMEOUT" >&2
exit 1

View file

@ -0,0 +1,53 @@
"""Integration tests for HeartbeatCheck against real HA instances.
Requires:
- docker compose -f tests/integration/docker-compose.ken.yml up -d
- docker compose -f tests/integration/docker-compose.chelsty.yml up -d
- TEST_HA_TOKEN=<long-lived-token> pytest tests/ -m integration
"""
from __future__ import annotations
import pytest
from ha_diag.checks.heartbeat import HeartbeatCheck
from ha_diag.event_emitter import EventEmitter
from ha_diag.ha_client import HAClient
@pytest.mark.integration
async def test_heartbeat_ken_healthy(ha_ken_url: str, ha_token: str, tmp_path):
client = HAClient(ha_ken_url, ha_token)
check = HeartbeatCheck(client)
result = await check.run()
assert result.healthy is True, f"HA ken not healthy: {result.message}"
assert result.event_type is None
@pytest.mark.integration
async def test_heartbeat_chelsty_healthy(ha_chelsty_url: str, ha_token: str):
client = HAClient(ha_chelsty_url, ha_token)
check = HeartbeatCheck(client)
result = await check.run()
assert result.healthy is True, f"HA chelsty not healthy: {result.message}"
assert result.event_type is None
@pytest.mark.integration
async def test_heartbeat_emits_event_on_failure(tmp_path):
client = HAClient("http://127.0.0.1:19999", "bad-token") # nothing here
check = HeartbeatCheck(client)
result = await check.run()
assert result.healthy is False
assert result.event_type == "ha_websocket_dead"
@pytest.mark.integration
async def test_heartbeat_event_written_to_filesystem(ha_ken_url: str, ha_token: str, tmp_path):
emitter = EventEmitter(tmp_path / "events", node_name="test-piha", location_tag="ken")
client = HAClient(ha_ken_url, ha_token)
check = HeartbeatCheck(client)
result = await check.run()
assert result.healthy is True
# No event emitted for a healthy result
assert not list((tmp_path / "events").glob("*.json")) or result.event_type is None

View file

@ -60,3 +60,29 @@ def test_emitter_creates_events_dir(tmp_path: Path):
new_dir = tmp_path / "nested" / "events" new_dir = tmp_path / "nested" / "events"
emitter = EventEmitter(new_dir, "my-node") emitter = EventEmitter(new_dir, "my-node")
assert new_dir.exists() assert new_dir.exists()
def test_location_tag_included_in_payload(tmp_events_dir: Path):
emitter = EventEmitter(tmp_events_dir, node_name="piha", location_tag="ken")
event_id = emitter.emit("ha_websocket_dead", "error", "homeassistant", "msg")
data = json.loads((tmp_events_dir / f"{event_id}.json").read_text())
assert data["payload"]["location_tag"] == "ken"
def test_location_tag_empty_not_in_payload(tmp_events_dir: Path):
emitter = EventEmitter(tmp_events_dir, node_name="piha", location_tag="")
event_id = emitter.emit("ha_websocket_dead", "error", "homeassistant", "msg")
data = json.loads((tmp_events_dir / f"{event_id}.json").read_text())
assert "location_tag" not in data["payload"]
def test_location_tag_does_not_override_explicit_payload_key(tmp_events_dir: Path):
emitter = EventEmitter(tmp_events_dir, node_name="piha", location_tag="ken")
event_id = emitter.emit(
"ha_websocket_dead", "error", "homeassistant", "msg",
payload={"location_tag": "override", "other": "value"},
)
data = json.loads((tmp_events_dir / f"{event_id}.json").read_text())
# Explicit payload key wins over the emitter's location_tag
assert data["payload"]["location_tag"] == "override"
assert data["payload"]["other"] == "value"