feat(planner-agent): add llm_router.py with local-first fallback chain

services/planner-agent/src/llm_router.py: - LLMRouter: async routing via litellm; chain = Qwen/Ollama → haiku → sonnet - Timeouts: 8s local, 30s cloud; asyncio.wait_for belt-and-suspenders - Rejection triggers: timeout, API error, refusal patterns, JSON schema fail - JSON fence extraction: recovers valid JSON from blocks - ModelMetrics: per-model success/fallback/error counters + success_rate() - Redis publish to 'llm_router_metrics' after every call (failure-safe) - redis_url=None disables Redis (useful in tests / edge nodes) - context= param adds caller label to all log lines for tracing services/planner-agent/tests/test_llm_router.py: - 34 tests, 0 network calls (litellm + Redis fully mocked) - Covers: primary success, JSON error fallback, refusal fallback, timeout fallback, API exception fallback, all-fail RuntimeError, schema validation, fence extraction, metrics recording, Redis publish, Redis failure isolation services/planner-agent/requirements.txt: - litellm>=1.40.0, redis>=5.0.0, jsonschema>=4.21.0 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-27 18:38:06 +02:00 · 2026-05-27 18:38:06 +02:00 · 1bbc511bb7
parent 603e10a364
commit 1bbc511bb7
3 changed files with 899 additions and 0 deletions
--- a/services/planner-agent/requirements.txt
+++ b/services/planner-agent/requirements.txt
@ -0,0 +1,3 @@
 litellm>=1.40.0
 redis[asyncio]>=5.0.0
 jsonschema>=4.21.0
--- a/services/planner-agent/src/llm_router.py
+++ b/services/planner-agent/src/llm_router.py
@ -0,0 +1,447 @@
 """
 llm_router.py — LLM routing with local-first fallback chain.
 Routing strategy:
  1. Local Qwen via Ollama   (piha:11434, timeout 8 s)
  2. claude-haiku-4-5        (Anthropic cloud, timeout 30 s)
  3. claude-sonnet-4-6       (Anthropic cloud, timeout 30 s)
 A model is rejected when it:
  - times out
  - raises any API / network exception
  - returns text matching a refusal pattern
  - returns JSON that fails the caller-supplied JSON Schema
 After every call (success or full-chain failure) a metrics event is
 published to the Redis channel "llm_router_metrics".
 Usage
 -----
    router = LLMRouter()
    result = await router.route(
        messages=[{"role": "user", "content": "What should I do?"}],
        schema={"type": "object", "required": ["action"], "properties": {...}},
    )
    print(result.model_used, result.content)
    await router.close()
 """
 import asyncio
 import json
 import logging
 import time
 from dataclasses import dataclass, field
 from typing import Any, Optional
 import litellm
 import redis.asyncio as aioredis
 from jsonschema import validate, ValidationError
 litellm.suppress_debug_info = True
 logger = logging.getLogger("llm_router")
 # ---------------------------------------------------------------------------
 # Refusal patterns — any substring match (case-insensitive) triggers fallback
 # ---------------------------------------------------------------------------
 REFUSAL_PATTERNS: list[str] = [
    "nie wiem",
    "I cannot",
    "I can't",
    "as an AI",
    "I don't know",
    "I'm not able",
    "I am not able",
    "I'm unable",
    "I am unable",
    "beyond my capabilities",
 ]
 # ---------------------------------------------------------------------------
 # Data structures
 # ---------------------------------------------------------------------------
@dataclass
 class ModelConfig:
    """Configuration for one model in the fallback chain."""
    name: str                          # litellm model string, e.g. "ollama/qwen2.5:7b"
    timeout: float                     # hard wall-clock timeout in seconds
    api_base: Optional[str] = None     # override API base URL (Ollama needs this)
    extra_kwargs: dict = field(default_factory=dict)
    def __str__(self) -> str:
        base = f" @ {self.api_base}" if self.api_base else ""
        return f"{self.name}{base} (timeout={self.timeout}s)"
@dataclass
 class AttemptRecord:
    model: str
    outcome: str          # "success" | "rejected" | "invalid"
    reason: Optional[str] # None on success
    latency_ms: int
@dataclass
 class RouteResult:
    """Return value of LLMRouter.route()."""
    content: Any                        # parsed JSON (if schema given) or raw str
    raw_text: str
    model_used: str                     # "none" if every model failed
    attempts: list[AttemptRecord]
    latency_ms: int                     # wall-clock from first attempt to return
    @property
    def succeeded(self) -> bool:
        return self.model_used != "none"
    def to_dict(self) -> dict:
        return {
            "model_used": self.model_used,
            "latency_ms": self.latency_ms,
            "attempts": [
                {
                    "model":      a.model,
                    "outcome":    a.outcome,
                    "reason":     a.reason,
                    "latency_ms": a.latency_ms,
                }
                for a in self.attempts
            ],
        }
 # ---------------------------------------------------------------------------
 # Metrics
 # ---------------------------------------------------------------------------
 class ModelMetrics:
    """Thread-safe-ish counter per model × outcome.
    Outcomes: "success", "fallback", "error"
    ("fallback" = rejected but another model succeeded after it;
     "error"    = rejected and it was the last in chain or chain exhausted)
    """
    def __init__(self) -> None:
        self._counts: dict[str, dict[str, int]] = {}
    def record(self, model: str, outcome: str) -> None:
        if model not in self._counts:
            self._counts[model] = {"success": 0, "fallback": 0, "error": 0}
        self._counts[model][outcome] = self._counts[model].get(outcome, 0) + 1
    def snapshot(self) -> dict[str, dict[str, int]]:
        return {m: dict(c) for m, c in self._counts.items()}
    def total_calls(self, model: str) -> int:
        return sum(self._counts.get(model, {}).values())
    def success_rate(self, model: str) -> Optional[float]:
        counts = self._counts.get(model, {})
        total = sum(counts.values())
        if total == 0:
            return None
        return counts.get("success", 0) / total
 # ---------------------------------------------------------------------------
 # Router
 # ---------------------------------------------------------------------------
 class LLMRouter:
    """Route LLM calls through a local-first fallback chain.
    Parameters
    ----------
    redis_url:
        Redis connection URL for metrics publishing.
        Set to None to disable Redis (useful in tests / local dev).
    ollama_host:
        Base URL of the Ollama API.  Defaults to piha's Tailscale address.
    ollama_model:
        Model tag as known to Ollama (e.g. "qwen2.5:7b").
    chain:
        Override the entire fallback chain.  When None the default
        Qwen → haiku → sonnet chain is used.
    """
    DEFAULT_OLLAMA_HOST  = "http://100.108.208.3:11434"
    DEFAULT_OLLAMA_MODEL = "qwen2.5:7b"
    DEFAULT_REDIS_URL    = "redis://100.108.208.3:6379"
    def __init__(
        self,
        redis_url: Optional[str] = DEFAULT_REDIS_URL,
        ollama_host: str = DEFAULT_OLLAMA_HOST,
        ollama_model: str = DEFAULT_OLLAMA_MODEL,
        chain: Optional[list[ModelConfig]] = None,
    ) -> None:
        if chain is not None:
            self.chain = chain
        else:
            self.chain = [
                ModelConfig(
                    name=f"ollama/{ollama_model}",
                    timeout=8.0,
                    api_base=ollama_host,
                ),
                ModelConfig(
                    name="claude-haiku-4-5-20251001",
                    timeout=30.0,
                ),
                ModelConfig(
                    name="claude-sonnet-4-6",
                    timeout=30.0,
                ),
            ]
        self.metrics = ModelMetrics()
        self._redis_url = redis_url
        self._redis: Optional[aioredis.Redis] = None
    # ------------------------------------------------------------------
    # Public API
    # ------------------------------------------------------------------
    async def route(
        self,
        messages: list[dict],
        schema: Optional[dict] = None,
        context: Optional[str] = None,
    ) -> RouteResult:
        """Try each model in order; return the first valid response.
        Parameters
        ----------
        messages:
            OpenAI-style message list, e.g.
            [{"role": "user", "content": "..."}]
        schema:
            Optional JSON Schema dict.  When provided the model's response
            must be valid JSON that conforms to the schema.
        context:
            Optional free-text caller label included in log lines (e.g.
            "supervisor.reconcile") for easier tracing.
        Raises
        ------
        RuntimeError
            When every model in the chain fails.  The exception message
            contains a JSON-formatted attempt log.
        """
        tag = f"[{context}] " if context else ""
        start = time.monotonic()
        attempts: list[AttemptRecord] = []
        for i, cfg in enumerate(self.chain):
            is_last = i == len(self.chain) - 1
            attempt_start = time.monotonic()
            logger.info(
                f"{tag}[llm_router] attempt {i+1}/{len(self.chain)}: {cfg}"
            )
            raw_text, call_error = await self._call_model(cfg, messages)
            attempt_ms = round((time.monotonic() - attempt_start) * 1000)
            if call_error:
                self.metrics.record(cfg.name, "error" if is_last else "fallback")
                logger.warning(
                    f"{tag}[llm_router] {cfg.name} → rejected "
                    f"({call_error}) [{attempt_ms}ms]"
                )
                attempts.append(AttemptRecord(
                    model=cfg.name, outcome="rejected",
                    reason=call_error, latency_ms=attempt_ms,
                ))
                continue
            parsed, schema_error = self._validate(raw_text, schema)
            if schema_error:
                self.metrics.record(cfg.name, "error" if is_last else "fallback")
                logger.warning(
                    f"{tag}[llm_router] {cfg.name} → invalid "
                    f"({schema_error}) [{attempt_ms}ms]"
                )
                attempts.append(AttemptRecord(
                    model=cfg.name, outcome="invalid",
                    reason=schema_error, latency_ms=attempt_ms,
                ))
                continue
            # ── success ───────────────────────────────────────────────
            self.metrics.record(cfg.name, "success")
            total_ms = round((time.monotonic() - start) * 1000)
            logger.info(
                f"{tag}[llm_router] {cfg.name} → success "
                f"[attempt {attempt_ms}ms, total {total_ms}ms]"
            )
            attempts.append(AttemptRecord(
                model=cfg.name, outcome="success",
                reason=None, latency_ms=attempt_ms,
            ))
            result = RouteResult(
                content=parsed,
                raw_text=raw_text,
                model_used=cfg.name,
                attempts=attempts,
                latency_ms=total_ms,
            )
            await self._publish_metrics(result)
            return result
        # ── all models exhausted ──────────────────────────────────────
        total_ms = round((time.monotonic() - start) * 1000)
        result = RouteResult(
            content=None,
            raw_text="",
            model_used="none",
            attempts=attempts,
            latency_ms=total_ms,
        )
        await self._publish_metrics(result)
        attempt_log = json.dumps(
            [{"model": a.model, "reason": a.reason} for a in attempts],
            indent=2,
        )
        raise RuntimeError(
            f"{tag}[llm_router] All {len(self.chain)} models in chain failed.\n"
            f"Attempts:\n{attempt_log}"
        )
    async def close(self) -> None:
        """Release the Redis connection."""
        if self._redis is not None:
            await self._redis.aclose()
            self._redis = None
    # ------------------------------------------------------------------
    # Internal helpers
    # ------------------------------------------------------------------
    async def _call_model(
        self,
        cfg: ModelConfig,
        messages: list[dict],
    ) -> tuple[str, Optional[str]]:
        """Invoke one model.  Returns (raw_text, error_reason|None)."""
        kwargs: dict[str, Any] = {
            "model":    cfg.name,
            "messages": messages,
            "timeout":  cfg.timeout,
            **cfg.extra_kwargs,
        }
        if cfg.api_base:
            kwargs["api_base"] = cfg.api_base
        try:
            # asyncio.wait_for as belt-and-suspenders — litellm timeout
            # is passed to the underlying HTTP client, but asyncio task
            # cancellation ensures we never block the event loop.
            resp = await asyncio.wait_for(
                litellm.acompletion(**kwargs),
                timeout=cfg.timeout + 2,   # +2 s grace for HTTP overhead
            )
            text = (resp.choices[0].message.content or "").strip()
        except asyncio.TimeoutError:
            return "", f"Timeout after {cfg.timeout}s"
        except litellm.exceptions.Timeout:
            return "", f"Timeout after {cfg.timeout}s"
        except litellm.exceptions.APIConnectionError as e:
            return "", f"APIConnectionError: {e}"
        except litellm.exceptions.AuthenticationError as e:
            return "", f"AuthenticationError: {e}"
        except Exception as e:
            return "", f"{type(e).__name__}: {e}"
        # Check for refusals in the model's own text
        refusal = self._detect_refusal(text)
        if refusal:
            return text, f"RefusalPattern matched: '{refusal}'"
        return text, None
    @staticmethod
    def _detect_refusal(text: str) -> Optional[str]:
        """Return the first matching refusal pattern, or None."""
        lower = text.lower()
        for pattern in REFUSAL_PATTERNS:
            if pattern.lower() in lower:
                return pattern
        return None
    @staticmethod
    def _validate(
        text: str,
        schema: Optional[dict],
    ) -> tuple[Any, Optional[str]]:
        """Parse and validate the model response.
        Returns (parsed_content, error_reason|None).
        When schema is None, returns (raw_text, None) — only refusal
        detection (already done in _call_model) applies.
        """
        if schema is None:
            return text, None
        try:
            parsed = json.loads(text)
        except json.JSONDecodeError as exc:
            # Try to extract JSON from a markdown code fence
            extracted = _extract_json_from_fence(text)
            if extracted is not None:
                parsed = extracted
            else:
                return None, f"JSONDecodeError: {exc}"
        try:
            validate(instance=parsed, schema=schema)
        except ValidationError as exc:
            return None, f"SchemaValidationError: {exc.message}"
        return parsed, None
    async def _get_redis(self) -> Optional[aioredis.Redis]:
        if self._redis_url is None:
            return None
        if self._redis is None:
            self._redis = aioredis.from_url(
                self._redis_url,
                decode_responses=True,
                socket_connect_timeout=2,
                socket_timeout=2,
            )
        return self._redis
    async def _publish_metrics(self, result: RouteResult) -> None:
        """Non-blocking publish to Redis channel 'llm_router_metrics'."""
        payload = {
            **result.to_dict(),
            "metrics_snapshot": self.metrics.snapshot(),
            "timestamp": time.time(),
        }
        try:
            r = await self._get_redis()
            if r is not None:
                await r.publish("llm_router_metrics", json.dumps(payload))
        except Exception as exc:
            # Never let a metrics failure break the caller
            logger.warning(f"[llm_router] metrics publish failed: {exc}")
 # ---------------------------------------------------------------------------
 # Utility
 # ---------------------------------------------------------------------------
 def _extract_json_from_fence(text: str) -> Optional[Any]:
    """Extract JSON from a ```json ... ``` markdown code fence, if present."""
    import re
    match = re.search(r"```(?:json)?\s*(\{.*?\}|\[.*?\])\s*```", text, re.DOTALL)
    if match:
        try:
            return json.loads(match.group(1))
        except json.JSONDecodeError:
            pass
    return None
--- a/services/planner-agent/tests/test_llm_router.py
+++ b/services/planner-agent/tests/test_llm_router.py
@ -0,0 +1,449 @@
 """
 Unit tests for llm_router.py.
 All LLM and Redis calls are mocked — no network required.
 Run:
    pip install pytest pytest-asyncio litellm jsonschema redis
    pytest services/planner-agent/tests/test_llm_router.py -v
 """
 import asyncio
 import json
 import sys
 from pathlib import Path
 from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 import pytest
 # Allow importing from src/ without installation
 sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
 from llm_router import (
    AttemptRecord,
    LLMRouter,
    ModelConfig,
    ModelMetrics,
    RouteResult,
    _extract_json_from_fence,
 )
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
 def _fake_completion(content: str):
    """Build a minimal litellm-style response object."""
    msg = MagicMock()
    msg.content = content
    choice = MagicMock()
    choice.message = msg
    resp = MagicMock()
    resp.choices = [choice]
    return resp
 def _chain_of(*models: tuple[str, float]) -> list[ModelConfig]:
    """Build a minimal test chain with no api_base."""
    return [ModelConfig(name=name, timeout=timeout) for name, timeout in models]
 # ---------------------------------------------------------------------------
 # ModelMetrics
 # ---------------------------------------------------------------------------
 class TestModelMetrics:
    def test_record_and_snapshot(self):
        m = ModelMetrics()
        m.record("qwen", "success")
        m.record("qwen", "success")
        m.record("qwen", "fallback")
        m.record("haiku", "success")
        snap = m.snapshot()
        assert snap["qwen"]["success"] == 2
        assert snap["qwen"]["fallback"] == 1
        assert snap["haiku"]["success"] == 1
    def test_success_rate(self):
        m = ModelMetrics()
        m.record("model-a", "success")
        m.record("model-a", "fallback")
        assert m.success_rate("model-a") == 0.5
    def test_success_rate_unknown_model(self):
        m = ModelMetrics()
        assert m.success_rate("ghost") is None
    def test_total_calls(self):
        m = ModelMetrics()
        m.record("x", "success")
        m.record("x", "error")
        assert m.total_calls("x") == 2
    def test_snapshot_is_copy(self):
        m = ModelMetrics()
        m.record("x", "success")
        snap = m.snapshot()
        snap["x"]["success"] = 999  # mutate the copy
        assert m.snapshot()["x"]["success"] == 1  # original unchanged
 # ---------------------------------------------------------------------------
 # RouteResult
 # ---------------------------------------------------------------------------
 class TestRouteResult:
    def test_succeeded(self):
        r = RouteResult("hello", "hello", "model-a", [], 100)
        assert r.succeeded is True
    def test_not_succeeded(self):
        r = RouteResult(None, "", "none", [], 100)
        assert r.succeeded is False
    def test_to_dict_structure(self):
        attempt = AttemptRecord("m", "success", None, 50)
        r = RouteResult("x", "x", "m", [attempt], 50)
        d = r.to_dict()
        assert d["model_used"] == "m"
        assert len(d["attempts"]) == 1
        assert d["attempts"][0]["outcome"] == "success"
 # ---------------------------------------------------------------------------
 # _extract_json_from_fence
 # ---------------------------------------------------------------------------
 class TestExtractJsonFromFence:
    def test_json_fence(self):
        text = 'Sure!\n```json\n{"a": 1}\n```\nDone.'
        assert _extract_json_from_fence(text) == {"a": 1}
    def test_plain_fence(self):
        text = "```\n[1, 2]\n```"
        assert _extract_json_from_fence(text) == [1, 2]
    def test_no_fence(self):
        assert _extract_json_from_fence("no json here") is None
    def test_broken_fence(self):
        assert _extract_json_from_fence("```json\n{broken```") is None
 # ---------------------------------------------------------------------------
 # LLMRouter — validation & refusal detection
 # ---------------------------------------------------------------------------
 class TestLLMRouterValidation:
    def setup_method(self):
        self.router = LLMRouter(redis_url=None)
    def test_validate_no_schema_returns_text(self):
        parsed, err = self.router._validate("hello world", schema=None)
        assert parsed == "hello world"
        assert err is None
    def test_validate_valid_json(self):
        schema = {"type": "object", "required": ["action"]}
        parsed, err = self.router._validate('{"action": "redeploy"}', schema)
        assert err is None
        assert parsed == {"action": "redeploy"}
    def test_validate_invalid_json(self):
        schema = {"type": "object"}
        _, err = self.router._validate("not json {", schema)
        assert err is not None
        assert "JSONDecodeError" in err
    def test_validate_schema_violation(self):
        schema = {"type": "object", "required": ["action"]}
        _, err = self.router._validate('{"other": 1}', schema)
        assert err is not None
        assert "SchemaValidationError" in err
    def test_validate_extracts_fenced_json(self):
        schema = {"type": "object", "required": ["action"]}
        text = '```json\n{"action": "restart"}\n```'
        parsed, err = self.router._validate(text, schema)
        assert err is None
        assert parsed == {"action": "restart"}
    def test_detect_refusal_nie_wiem(self):
        assert self.router._detect_refusal("Nie wiem co mam zrobić") == "nie wiem"
    def test_detect_refusal_as_an_ai(self):
        # Text contains both "as an AI" and "I cannot"; first match wins.
        # We only assert that a refusal IS detected, not which pattern fires.
        assert self.router._detect_refusal("As an AI I cannot help") is not None
    def test_detect_refusal_none(self):
        assert self.router._detect_refusal("Sure, here is the action.") is None
    def test_detect_refusal_case_insensitive(self):
        assert self.router._detect_refusal("I CANNOT do that") == "I cannot"
 # ---------------------------------------------------------------------------
 # LLMRouter — routing logic (mocked litellm + Redis)
 # ---------------------------------------------------------------------------
@pytest.fixture
 def router_no_redis():
    """Router with a 3-model chain and Redis disabled."""
    chain = _chain_of(
        ("local/qwen", 8.0),
        ("claude-haiku-test", 30.0),
        ("claude-sonnet-test", 30.0),
    )
    return LLMRouter(redis_url=None, chain=chain)
@pytest.mark.asyncio
 class TestLLMRouterRouting:
    async def test_primary_success(self, router_no_redis):
        """Primary model succeeds — no fallback."""
        with patch(
            "litellm.acompletion",
            AsyncMock(return_value=_fake_completion('{"action": "ok"}')),
        ):
            result = await router_no_redis.route(
                messages=[{"role": "user", "content": "test"}],
                schema={"type": "object", "required": ["action"]},
            )
        assert result.model_used == "local/qwen"
        assert result.content == {"action": "ok"}
        assert len(result.attempts) == 1
        assert result.attempts[0].outcome == "success"
    async def test_fallback_on_json_error(self, router_no_redis):
        """Primary returns bad JSON → falls back to haiku which returns valid JSON."""
        responses = [
            _fake_completion("not json at all"),
            _fake_completion('{"action": "restart"}'),
        ]
        call_count = 0
        async def fake_acompletion(**kwargs):
            nonlocal call_count
            r = responses[call_count]
            call_count += 1
            return r
        with patch("litellm.acompletion", fake_acompletion):
            result = await router_no_redis.route(
                messages=[{"role": "user", "content": "x"}],
                schema={"type": "object", "required": ["action"]},
            )
        assert result.model_used == "claude-haiku-test"
        assert result.content == {"action": "restart"}
        assert result.attempts[0].outcome == "invalid"
        assert result.attempts[1].outcome == "success"
    async def test_fallback_on_refusal(self, router_no_redis):
        """Primary returns refusal text → falls back."""
        responses = [
            _fake_completion("I cannot help with that request."),
            _fake_completion("Sure! Here is the plan."),
        ]
        idx = 0
        async def fake_acompletion(**kwargs):
            nonlocal idx
            r = responses[idx]
            idx += 1
            return r
        with patch("litellm.acompletion", fake_acompletion):
            result = await router_no_redis.route(
                messages=[{"role": "user", "content": "x"}],
            )
        assert result.model_used == "claude-haiku-test"
        assert result.attempts[0].outcome == "rejected"
        assert "RefusalPattern" in result.attempts[0].reason
    async def test_fallback_on_timeout(self, router_no_redis):
        """Primary times out → falls back to haiku."""
        call_count = 0
        async def fake_acompletion(**kwargs):
            nonlocal call_count
            call_count += 1
            if call_count == 1:
                raise asyncio.TimeoutError()
            return _fake_completion("fallback response")
        with patch("litellm.acompletion", fake_acompletion):
            result = await router_no_redis.route(
                messages=[{"role": "user", "content": "x"}],
            )
        assert result.model_used == "claude-haiku-test"
        assert "Timeout" in result.attempts[0].reason
    async def test_fallback_on_api_exception(self, router_no_redis):
        """Primary raises a connection error → falls back."""
        import litellm.exceptions
        call_count = 0
        async def fake_acompletion(**kwargs):
            nonlocal call_count
            call_count += 1
            if call_count == 1:
                raise litellm.exceptions.APIConnectionError(
                    message="Connection refused", llm_provider="ollama", model="qwen"
                )
            return _fake_completion("ok")
        with patch("litellm.acompletion", fake_acompletion):
            result = await router_no_redis.route(
                messages=[{"role": "user", "content": "x"}],
            )
        assert result.model_used == "claude-haiku-test"
        assert "APIConnectionError" in result.attempts[0].reason
    async def test_all_models_fail_raises(self, router_no_redis):
        """All models return bad JSON → RuntimeError with attempt log."""
        with patch(
            "litellm.acompletion",
            AsyncMock(return_value=_fake_completion("not json")),
        ):
            with pytest.raises(RuntimeError) as exc_info:
                await router_no_redis.route(
                    messages=[{"role": "user", "content": "x"}],
                    schema={"type": "object"},
                )
        assert "All 3 models in chain failed" in str(exc_info.value)
    async def test_schema_none_no_json_required(self, router_no_redis):
        """Without a schema, plain text responses are accepted."""
        with patch(
            "litellm.acompletion",
            AsyncMock(return_value=_fake_completion("Here is your plan.")),
        ):
            result = await router_no_redis.route(
                messages=[{"role": "user", "content": "x"}],
            )
        assert result.content == "Here is your plan."
        assert result.succeeded
    async def test_metrics_recorded_on_success(self, router_no_redis):
        with patch(
            "litellm.acompletion",
            AsyncMock(return_value=_fake_completion("ok")),
        ):
            await router_no_redis.route([{"role": "user", "content": "x"}])
        snap = router_no_redis.metrics.snapshot()
        assert snap["local/qwen"]["success"] == 1
    async def test_metrics_fallback_recorded(self, router_no_redis):
        """Primary fails → fallback → metrics show primary=fallback, haiku=success."""
        responses = [
            _fake_completion("I cannot help"),  # refusal
            _fake_completion("ok"),
        ]
        idx = 0
        async def fake_acompletion(**kwargs):
            nonlocal idx
            r = responses[idx]; idx += 1
            return r
        with patch("litellm.acompletion", fake_acompletion):
            await router_no_redis.route([{"role": "user", "content": "x"}])
        snap = router_no_redis.metrics.snapshot()
        assert snap["local/qwen"]["fallback"] == 1
        assert snap["claude-haiku-test"]["success"] == 1
    async def test_context_label_in_logs(self, router_no_redis, caplog):
        """context= parameter appears in log output."""
        import logging
        with patch(
            "litellm.acompletion",
            AsyncMock(return_value=_fake_completion("ok")),
        ):
            with caplog.at_level(logging.INFO, logger="llm_router"):
                await router_no_redis.route(
                    messages=[{"role": "user", "content": "x"}],
                    context="supervisor.reconcile",
                )
        assert any("supervisor.reconcile" in r.message for r in caplog.records)
 # ---------------------------------------------------------------------------
 # LLMRouter — Redis metrics publish
 # ---------------------------------------------------------------------------
@pytest.mark.asyncio
 class TestLLMRouterRedis:
    async def test_metrics_published_on_success(self):
        chain = _chain_of(("m1", 8.0),)
        router = LLMRouter(redis_url="redis://localhost:6379", chain=chain)
        mock_redis = AsyncMock()
        router._redis = mock_redis
        with patch(
            "litellm.acompletion",
            AsyncMock(return_value=_fake_completion("ok")),
        ):
            await router.route([{"role": "user", "content": "x"}])
        mock_redis.publish.assert_awaited_once()
        channel, payload_str = mock_redis.publish.call_args[0]
        assert channel == "llm_router_metrics"
        payload = json.loads(payload_str)
        assert payload["model_used"] == "m1"
        assert "metrics_snapshot" in payload
        assert "timestamp" in payload
    async def test_redis_failure_does_not_raise(self):
        """A broken Redis must never break the LLM call result."""
        chain = _chain_of(("m1", 8.0),)
        router = LLMRouter(redis_url="redis://localhost:6379", chain=chain)
        mock_redis = AsyncMock()
        mock_redis.publish.side_effect = ConnectionError("Redis down")
        router._redis = mock_redis
        with patch(
            "litellm.acompletion",
            AsyncMock(return_value=_fake_completion("ok")),
        ):
            result = await router.route([{"role": "user", "content": "x"}])
        assert result.succeeded  # LLM call still returned
    async def test_metrics_published_on_full_failure(self):
        chain = _chain_of(("m1", 8.0),)
        router = LLMRouter(redis_url="redis://localhost:6379", chain=chain)
        mock_redis = AsyncMock()
        router._redis = mock_redis
        with patch(
            "litellm.acompletion",
            AsyncMock(return_value=_fake_completion("not json")),
        ):
            with pytest.raises(RuntimeError):
                await router.route(
                    messages=[{"role": "user", "content": "x"}],
                    schema={"type": "object"},
                )
        # Metrics must still be published even when we raise
        mock_redis.publish.assert_awaited_once()
        _, payload_str = mock_redis.publish.call_args[0]
        payload = json.loads(payload_str)
        assert payload["model_used"] == "none"