Add LLM-assisted deploy retry

This commit is contained in:
Oskar Kapala 2026-04-22 21:41:22 +02:00
parent 0abe9cbf4b
commit 185a866b51
2 changed files with 61 additions and 13 deletions

View file

@ -75,6 +75,9 @@ SESSION_STATE:
D15: "Extended ./deploy_agent.py with deploy_service(service): generates compose, writes ./deployments/<service[-n]>/docker-compose.yml without overwriting existing directories, runs 'docker compose up -d' via subprocess, returns DEPLOYED or ERROR." D15: "Extended ./deploy_agent.py with deploy_service(service): generates compose, writes ./deployments/<service[-n]>/docker-compose.yml without overwriting existing directories, runs 'docker compose up -d' via subprocess, returns DEPLOYED or ERROR."
D16: "Updated ./deploy_agent.py with get_service_status(path), post-deploy 'docker compose ps' verification requiring 'Up', error outputs including ps output when available, and pre-deploy 'docker ps' port-80 check that adds prompt note 'Use a different port than 80'." D16: "Updated ./deploy_agent.py with get_service_status(path), post-deploy 'docker compose ps' verification requiring 'Up', error outputs including ps output when available, and pre-deploy 'docker ps' port-80 check that adds prompt note 'Use a different port than 80'."
D17: "User requested git commit on 2026-04-22; commit scope includes ./deploy_agent.py and ./codex_context.yaml for deployment status and safety updates." D17: "User requested git commit on 2026-04-22; commit scope includes ./deploy_agent.py and ./codex_context.yaml for deployment status and safety updates."
D18: "Git commit created on 2026-04-22: 0abe9cb 'Improve deploy agent safety checks'."
D19: "Updated ./deploy_agent.py to use local LLM for one bounded deployment-failure retry: capture service/error/status, request corrected YAML only, replace docker-compose.yml, retry once, then return final error plus last status if still failing."
D20: "User requested git commit on 2026-04-22; commit scope includes ./deploy_agent.py and ./codex_context.yaml for one-shot LLM-assisted deployment failure recovery."
todos: todos:
T1: "For all future meaningful changes/decisions, update and overwrite ./codex_context.yaml." T1: "For all future meaningful changes/decisions, update and overwrite ./codex_context.yaml."
T2: "DONE: Commit current changes." T2: "DONE: Commit current changes."
@ -89,6 +92,8 @@ SESSION_STATE:
T11: "DONE: Add docker-compose YAML validation and retry logic." T11: "DONE: Add docker-compose YAML validation and retry logic."
T12: "DONE: Add automatic service deployment workflow." T12: "DONE: Add automatic service deployment workflow."
T13: "DONE: Add deployment status verification and basic port-80 safety check." T13: "DONE: Add deployment status verification and basic port-80 safety check."
T14: "DONE: Commit deploy agent safety/status updates."
T15: "DONE: Add one-shot LLM-assisted deployment failure recovery."
issues: issues:
I1: "Tailscale DNS health warning: configured DNS servers unreachable." I1: "Tailscale DNS health warning: configured DNS servers unreachable."
I2: "Preferred gateway path unavailable: 100.108.208.3:8080 connection failed." I2: "Preferred gateway path unavailable: 100.108.208.3:8080 connection failed."

View file

@ -43,6 +43,30 @@ def generate_compose(service: str) -> str:
return _generate_compose(service) return _generate_compose(service)
def _is_valid_compose(compose: str) -> bool:
try:
parsed = yaml.safe_load(compose)
except yaml.YAMLError:
return False
return isinstance(parsed, dict) and "services" in parsed
def _fix_compose(service: str, error: str, status: str) -> str:
prompt = (
"Deployment failed.\n\n"
f"Service: {service}\n\n"
f"Error: {error}\n\n"
f"Status: {status}\n\n"
"Fix the docker-compose YAML. Return ONLY corrected YAML."
)
response = ask(prompt)
if response.startswith("ERROR:"):
return response
if not _is_valid_compose(response):
return "ERROR: invalid docker-compose"
return response
def get_service_status(path: Path) -> str: def get_service_status(path: Path) -> str:
try: try:
result = subprocess.run( result = subprocess.run(
@ -61,6 +85,23 @@ def get_service_status(path: Path) -> str:
return output return output
def _run_compose_up(path: Path) -> tuple[bool, str]:
try:
result = subprocess.run(
["docker", "compose", "up", "-d"],
cwd=path,
check=False,
capture_output=True,
text=True,
)
except Exception as exc:
return False, str(exc)
if result.returncode != 0:
error = result.stderr.strip() or result.stdout.strip() or "docker compose up failed"
return False, error
return True, result.stdout.strip()
def deploy_service(service: str) -> str: def deploy_service(service: str) -> str:
prompt_extra = "" prompt_extra = ""
try: try:
@ -91,19 +132,21 @@ def deploy_service(service: str) -> str:
compose_path = target_dir / "docker-compose.yml" compose_path = target_dir / "docker-compose.yml"
compose_path.write_text(compose, encoding="utf-8") compose_path.write_text(compose, encoding="utf-8")
try: ok, error = _run_compose_up(target_dir)
subprocess.run( if not ok:
["docker", "compose", "up", "-d"], status = get_service_status(target_dir)
cwd=target_dir, fixed_compose = _fix_compose(service, error, status)
check=True, if fixed_compose.startswith("ERROR:"):
capture_output=True, if status and not status.startswith("ERROR:"):
text=True, return f"ERROR: {error}\n{status}"
) return f"ERROR: {error}"
except subprocess.CalledProcessError as exc: compose_path.write_text(fixed_compose, encoding="utf-8")
message = exc.stderr.strip() or exc.stdout.strip() or str(exc) ok, error = _run_compose_up(target_dir)
return f"ERROR: {message}" if not ok:
except Exception as exc: status = get_service_status(target_dir)
return f"ERROR: {exc}" if status and not status.startswith("ERROR:"):
return f"ERROR: {error}\n{status}"
return f"ERROR: {error}"
status = get_service_status(target_dir) status = get_service_status(target_dir)
if status.startswith("ERROR:"): if status.startswith("ERROR:"):