supervisor: also cancel pending actions for services removed from desired state
Previously _cancel_resolved_pending_actions() only cancelled actions where the service became healthy. This left orphaned actions when a service was removed from services.yaml or marked monitor:false. Add Case 1: if the action's svc_key is no longer in desired_state (either removed entirely or skipped due to monitor:false), cancel with reason service_removed_from_desired_state. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
410bfe7065
commit
46ae92b5c1
|
|
@ -414,20 +414,30 @@ class Supervisor:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
svc_key = f"{node}/{service}"
|
svc_key = f"{node}/{service}"
|
||||||
actual_info = self.actual_state["services"].get(svc_key)
|
|
||||||
if actual_info and actual_info.get("status") == "healthy":
|
cancel_reason = None
|
||||||
# Drift resolved — move to cancelled/
|
|
||||||
|
# Case 1: service is no longer in desired state (removed from services.yaml
|
||||||
|
# or marked monitor:false). The action was generated under old config.
|
||||||
|
if svc_key not in self.desired_state["services"]:
|
||||||
|
cancel_reason = "service_removed_from_desired_state"
|
||||||
|
|
||||||
|
# Case 2: drift resolved — service is now healthy in actual state.
|
||||||
|
elif self.actual_state["services"].get(svc_key, {}).get("status") == "healthy":
|
||||||
|
cancel_reason = "drift_resolved_auto"
|
||||||
|
|
||||||
|
if cancel_reason:
|
||||||
dest = cancelled_dir / action_file.name
|
dest = cancelled_dir / action_file.name
|
||||||
try:
|
try:
|
||||||
action["status"] = "cancelled"
|
action["status"] = "cancelled"
|
||||||
action["cancelled_reason"] = "drift_resolved_auto"
|
action["cancelled_reason"] = cancel_reason
|
||||||
action["cancelled_at"] = time.time()
|
action["cancelled_at"] = time.time()
|
||||||
with open(dest, "w") as f:
|
with open(dest, "w") as f:
|
||||||
json.dump(action, f, indent=2)
|
json.dump(action, f, indent=2)
|
||||||
action_file.unlink()
|
action_file.unlink()
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Auto-cancelled {action_file.name}: "
|
f"Auto-cancelled {action_file.name}: "
|
||||||
f"{svc_key} is now healthy"
|
f"{svc_key} — {cancel_reason}"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to cancel action {action_file.name}: {e}")
|
logger.error(f"Failed to cancel action {action_file.name}: {e}")
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue