Was: malformed event (bad JSON / truncated / corrupted bytes) wedged the node's checkpoint forever — every cycle re-tried, logged, never advanced past the bad file; all subsequent good events for that node lost. Now: first parse failure -> atomic os.replace to STATE_DIR/observer_failed_events/<node>/ with collision handling. Checkpoint advances, downstream events flow. Move failures themselves are logged but don't crash the loop. Complementary to yesterday's atomic_write_json fix (state files); this addresses the same race-pattern on event files instead. Regression test asserts: bad event quarantined to failed_events dir, removed from hot path, subsequent good event processed (node online), checkpoint moves to good event.
37 lines
724 B
TOML
37 lines
724 B
TOML
[build-system]
|
|
requires = ["setuptools>=68"]
|
|
build-backend = "setuptools.build_meta"
|
|
|
|
[project]
|
|
name = "ha-diag-agent"
|
|
version = "0.1.0"
|
|
requires-python = ">=3.11"
|
|
dependencies = [
|
|
"aiohttp>=3.9",
|
|
"fastapi>=0.110",
|
|
"uvicorn[standard]>=0.29",
|
|
"pydantic>=2.6",
|
|
"pydantic-settings>=2.2",
|
|
"apscheduler>=3.10",
|
|
"aiosqlite>=0.20",
|
|
"structlog>=24.1",
|
|
"pyyaml>=6.0",
|
|
]
|
|
|
|
[project.optional-dependencies]
|
|
dev = [
|
|
"pytest>=8.1",
|
|
"pytest-asyncio>=0.23",
|
|
"aioresponses>=0.7",
|
|
]
|
|
|
|
[tool.setuptools.packages.find]
|
|
where = ["src"]
|
|
|
|
[tool.pytest.ini_options]
|
|
asyncio_mode = "auto"
|
|
testpaths = ["tests"]
|
|
markers = [
|
|
"integration: requires running HA instances — run with -m integration",
|
|
]
|