import json import os import socket import time from urllib.error import HTTPError, URLError from urllib.request import Request, urlopen NODE_NAME = os.getenv("NODE_NAME") or socket.gethostname() ORCHESTRATOR_URL = os.getenv("ORCHESTRATOR_URL") SERVICES_TO_CHECK = { name.strip() for name in os.getenv("SERVICES_TO_CHECK", "").split(",") if name.strip() } INTERVAL_SECONDS = int(os.getenv("INTERVAL_SECONDS", "30")) SERVICE_CATALOG = [ {"name": "homeassistant", "type": "http", "url": "http://homeassistant:8123"}, {"name": "lms", "type": "tcp", "host": "192.168.31.6", "port": 9000}, {"name": "forgejo", "type": "http", "url": "http://forgejo:3000"}, {"name": "nginx", "type": "http", "url": "http://nginx"}, {"name": "mosquitto", "type": "tcp", "host": "mosquitto", "port": 1883}, ] def services_to_check(): if not SERVICES_TO_CHECK: return SERVICE_CATALOG return [ service for service in SERVICE_CATALOG if service["name"] in SERVICES_TO_CHECK ] def check_http(url): request = Request(url, headers={"User-Agent": "monitor-agent/1.0"}) try: with urlopen(request, timeout=5) as response: return "ok" if response.status == 200 else "error" except (HTTPError, URLError, TimeoutError, OSError): return "error" def check_tcp(host, port): try: with socket.create_connection((host, int(port)), timeout=5): return "ok" except OSError: return "error" def check_service(service): service_type = service.get("type") if service_type == "http": return check_http(service["url"]) if service_type == "tcp": return check_tcp(service["host"], service["port"]) return "error" def build_event(service, status): return { "type": "health", "service": service["name"], "status": status, "timestamp": time.time(), "run_id": None, "node": NODE_NAME, } def send_event(event): body = json.dumps(event).encode("utf-8") request = Request( ORCHESTRATOR_URL, data=body, headers={"Content-Type": "application/json"}, method="POST", ) with urlopen(request, timeout=5) as response: if response.status >= 300: raise RuntimeError(f"event endpoint returned {response.status}") def main(): if not ORCHESTRATOR_URL: raise SystemExit("ORCHESTRATOR_URL is required") selected_services = services_to_check() print( ( f"[monitor-agent] ready node={NODE_NAME} " f"url={ORCHESTRATOR_URL} " f"services={[service['name'] for service in selected_services]}" ), flush=True, ) while True: started = time.time() for service in selected_services: status = check_service(service) event = build_event(service, status) try: send_event(event) except Exception as exc: print(f"[monitor-agent] send failed: {exc}", flush=True) print(json.dumps(event), flush=True) elapsed = time.time() - started time.sleep(max(0, INTERVAL_SECONDS - elapsed)) if __name__ == "__main__": main() print("MONITOR_AGENT_VERSION=1")