11 new test cases in test_state_reliability.py covering: - atomic_write_json: produces valid JSON, no .tmp left behind, overwrites, works with nested structures - _load_actual_state: returns False on empty / truncated file, returns True on valid files, preserves last-known-good state across a parse failure - reconcile: empty/truncated services.json or incidents.json generates zero actions (skip-cycle semantics proven end-to-end) - healthy service with valid world state generates no spurious action All 32 tests (11 new + 21 existing) pass. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
200 lines
7.6 KiB
Python
200 lines
7.6 KiB
Python
"""Tests for atomic writes and resilient world-state loading in the supervisor."""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
|
import supervisor as supervisor_module
|
|
from supervisor import Supervisor, _atomic_write_json
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers (reused from test_supervisor_ha)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _setup_supervisor(tmp_path: Path, monkeypatch) -> Supervisor:
|
|
actions = tmp_path / "actions"
|
|
events = tmp_path / "events"
|
|
world = tmp_path / "world"
|
|
repo = tmp_path / "repo"
|
|
|
|
for d in (actions, events, world, repo / "hosts"):
|
|
d.mkdir(parents=True, exist_ok=True)
|
|
|
|
monkeypatch.setattr(supervisor_module, "ACTIONS_DIR", actions)
|
|
monkeypatch.setattr(supervisor_module, "EVENTS_DIR", events)
|
|
monkeypatch.setattr(supervisor_module, "WORLD_DIR", world)
|
|
monkeypatch.setattr(supervisor_module, "REPO_ROOT", repo)
|
|
|
|
sup = Supervisor()
|
|
sup.desired_state = {"services": {}}
|
|
sup.actual_state = {"services": {}, "nodes": {}, "incidents": {}}
|
|
return sup
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 1. atomic_write_json correctness
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_atomic_write_json_produces_valid_json(tmp_path):
|
|
path = tmp_path / "out.json"
|
|
data = {"services": {"vps/outline": {"status": "healthy"}}, "count": 42}
|
|
_atomic_write_json(path, data)
|
|
|
|
assert path.exists(), "output file must exist after atomic write"
|
|
loaded = json.loads(path.read_text())
|
|
assert loaded == data
|
|
|
|
|
|
def test_atomic_write_json_no_tmp_left_behind(tmp_path):
|
|
path = tmp_path / "world.json"
|
|
_atomic_write_json(path, {"ok": True})
|
|
|
|
tmp = path.with_suffix(".tmp")
|
|
assert not tmp.exists(), ".tmp must be cleaned up by os.replace"
|
|
|
|
|
|
def test_atomic_write_json_overwrites_existing(tmp_path):
|
|
path = tmp_path / "state.json"
|
|
path.write_text('{"old": true}')
|
|
_atomic_write_json(path, {"new": True})
|
|
assert json.loads(path.read_text()) == {"new": True}
|
|
|
|
|
|
def test_atomic_write_json_nested_structure(tmp_path):
|
|
path = tmp_path / "complex.json"
|
|
data = {
|
|
"nodes": {"vps": {"status": "online", "disk_usage_pct": 42}},
|
|
"incidents": {},
|
|
"list": [1, 2, 3],
|
|
}
|
|
_atomic_write_json(path, data)
|
|
assert json.loads(path.read_text()) == data
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# 2. Resilient loader: empty / truncated file → skip cycle, no drift
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _populate_desired(sup: Supervisor, svc_key: str = "vps/outline"):
|
|
node, service = svc_key.split("/", 1)
|
|
sup.desired_state["services"][svc_key] = {
|
|
"node": node,
|
|
"service": service,
|
|
"desired": "running",
|
|
}
|
|
|
|
|
|
def test_empty_services_json_skips_reconcile(tmp_path, monkeypatch):
|
|
"""Empty services.json (truncated write) must not generate any redeploy action."""
|
|
sup = _setup_supervisor(tmp_path, monkeypatch)
|
|
_populate_desired(sup)
|
|
|
|
# Write empty services.json — simulates a mid-write truncation
|
|
(tmp_path / "world" / "services.json").write_text("")
|
|
(tmp_path / "world" / "nodes.json").write_text("{}")
|
|
(tmp_path / "world" / "incidents.json").write_text("{}")
|
|
|
|
sup.reconcile()
|
|
|
|
pending = list((tmp_path / "actions" / "pending").glob("*.json"))
|
|
assert pending == [], f"No actions should be generated on empty state file, got: {[p.name for p in pending]}"
|
|
|
|
|
|
def test_truncated_services_json_skips_reconcile(tmp_path, monkeypatch):
|
|
"""Partially-written (truncated mid-write) JSON must not generate any action."""
|
|
sup = _setup_supervisor(tmp_path, monkeypatch)
|
|
_populate_desired(sup)
|
|
|
|
(tmp_path / "world" / "services.json").write_text('{"vps/outline": {"status": "hea')
|
|
(tmp_path / "world" / "nodes.json").write_text("{}")
|
|
(tmp_path / "world" / "incidents.json").write_text("{}")
|
|
|
|
sup.reconcile()
|
|
|
|
pending = list((tmp_path / "actions" / "pending").glob("*.json"))
|
|
assert pending == [], f"No actions expected on truncated state, got: {[p.name for p in pending]}"
|
|
|
|
|
|
def test_empty_incidents_json_skips_reconcile(tmp_path, monkeypatch):
|
|
"""Empty incidents.json (any world-state file failing) skips full cycle."""
|
|
sup = _setup_supervisor(tmp_path, monkeypatch)
|
|
_populate_desired(sup)
|
|
|
|
(tmp_path / "world" / "services.json").write_text("{}")
|
|
(tmp_path / "world" / "nodes.json").write_text("{}")
|
|
(tmp_path / "world" / "incidents.json").write_text("")
|
|
|
|
sup.reconcile()
|
|
|
|
pending = list((tmp_path / "actions" / "pending").glob("*.json"))
|
|
assert pending == [], f"No actions expected when any state file is unreadable, got: {[p.name for p in pending]}"
|
|
|
|
|
|
def test_load_actual_state_returns_false_on_empty_file(tmp_path, monkeypatch):
|
|
"""_load_actual_state must return False (not raise) when a file is empty."""
|
|
sup = _setup_supervisor(tmp_path, monkeypatch)
|
|
|
|
(tmp_path / "world" / "services.json").write_text("")
|
|
(tmp_path / "world" / "nodes.json").write_text("{}")
|
|
(tmp_path / "world" / "incidents.json").write_text("{}")
|
|
|
|
result = sup._load_actual_state()
|
|
assert result is False
|
|
|
|
|
|
def test_load_actual_state_returns_true_on_valid_files(tmp_path, monkeypatch):
|
|
"""_load_actual_state returns True and populates actual_state on valid files."""
|
|
sup = _setup_supervisor(tmp_path, monkeypatch)
|
|
|
|
services = {"vps/outline": {"node": "vps", "service": "outline", "status": "healthy"}}
|
|
(tmp_path / "world" / "services.json").write_text(json.dumps(services))
|
|
(tmp_path / "world" / "nodes.json").write_text('{"vps": {"status": "online"}}')
|
|
(tmp_path / "world" / "incidents.json").write_text("{}")
|
|
|
|
result = sup._load_actual_state()
|
|
assert result is True
|
|
assert "vps/outline" in sup.actual_state["services"]
|
|
|
|
|
|
def test_parse_failure_preserves_last_known_good_state(tmp_path, monkeypatch):
|
|
"""When a file becomes unreadable, actual_state retains the previous good values."""
|
|
sup = _setup_supervisor(tmp_path, monkeypatch)
|
|
|
|
# First successful load
|
|
services = {"vps/outline": {"node": "vps", "service": "outline", "status": "healthy"}}
|
|
(tmp_path / "world" / "services.json").write_text(json.dumps(services))
|
|
(tmp_path / "world" / "nodes.json").write_text("{}")
|
|
(tmp_path / "world" / "incidents.json").write_text("{}")
|
|
assert sup._load_actual_state() is True
|
|
assert "vps/outline" in sup.actual_state["services"]
|
|
|
|
# File becomes empty (race condition)
|
|
(tmp_path / "world" / "services.json").write_text("")
|
|
assert sup._load_actual_state() is False
|
|
|
|
# State must be unchanged from the previous good load
|
|
assert "vps/outline" in sup.actual_state["services"], \
|
|
"Last-known-good state must be preserved on parse failure"
|
|
|
|
|
|
def test_healthy_service_does_not_generate_action(tmp_path, monkeypatch):
|
|
"""A desired service that appears healthy in world state generates no action."""
|
|
sup = _setup_supervisor(tmp_path, monkeypatch)
|
|
_populate_desired(sup)
|
|
|
|
services = {"vps/outline": {"node": "vps", "service": "outline", "status": "healthy"}}
|
|
(tmp_path / "world" / "services.json").write_text(json.dumps(services))
|
|
(tmp_path / "world" / "nodes.json").write_text("{}")
|
|
(tmp_path / "world" / "incidents.json").write_text("{}")
|
|
|
|
sup.reconcile()
|
|
|
|
pending = list((tmp_path / "actions" / "pending").glob("*.json"))
|
|
assert pending == [], "Healthy service must not generate any action"
|