fix(node-agent): unique event IDs per service to prevent same-second overwrites
Multiple service_healthy (or containers_not_running) events emitted in the
same second for different containers shared the same filename pattern
evt-{node}-{ts}-{type}.json — the second write silently overwrote the first,
so the observer only ever saw the last container checked per event type per cycle.
Fix: include a sanitized service name slug in the ID so every event gets a
unique file, e.g. evt-vps-1234-service_healthy-node-agent.json.
Also adds import re (required for re.sub in the slug generation).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
267742c7d7
commit
2f1965733f
|
|
@ -27,6 +27,7 @@ NEVER TOUCHED on any node:
|
|||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import socket
|
||||
import subprocess
|
||||
|
|
@ -153,7 +154,11 @@ class NodeAgent:
|
|||
def emit_event(self, event_type: str, severity: str, service,
|
||||
message: str, payload: dict = None):
|
||||
ts = int(time.time())
|
||||
event_id = f"evt-{self.node_name}-{ts}-{event_type}"
|
||||
# Include service slug in the ID so that multiple events of the same type
|
||||
# emitted within the same second (e.g. service_healthy for N containers)
|
||||
# don't overwrite each other — each gets a unique filename.
|
||||
svc_slug = re.sub(r"[^a-z0-9]", "-", (service or "node").lower())[:32].strip("-")
|
||||
event_id = f"evt-{self.node_name}-{ts}-{event_type}-{svc_slug}"
|
||||
event = {
|
||||
"id": event_id,
|
||||
"timestamp": ts,
|
||||
|
|
|
|||
Loading…
Reference in a new issue