From 4707e6aacc43c15641d8edd55d1926b5abbe5857 Mon Sep 17 00:00:00 2001 From: Kayos Date: Thu, 30 Apr 2026 10:02:53 -0700 Subject: [PATCH] sterilize bulk: respect external cancel mid-loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Job 3 surfaced the bug — when I set state=cancelled in the DB, the daemon thread kept running and finalize() at the end overwrote it with 'done'. User cancellations were getting silently undone. Two changes: 1. Runners (run_bulk_preview, run_bulk_apply) now check the job's current state at the top of every iteration via the new lightweight db.get_sterilize_job_state. If the state has moved to a terminal value (cancelled, failed, done) externally, the loop returns without finalizing. 2. db.finalize_sterilize_job now refuses to overwrite a non-running state — added "AND state IN ('running','applying')" to the WHERE clause. Belt-and-suspenders for the same problem: even if a runner races past the state check and limps to its finalize call, the DB itself won't let the cancellation be replaced. Net: hitting cancel via the UI button (or a DB update) now actually stops the runner mid-flight. Polling roundtrip per recipe is one SELECT — negligible vs the multi-second clawdforge call that dominates each iteration. --- cauldron/bulk_sterilize.py | 16 ++++++++++++++++ cauldron/db.py | 19 ++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/cauldron/bulk_sterilize.py b/cauldron/bulk_sterilize.py index da6efe9..c0054f4 100644 --- a/cauldron/bulk_sterilize.py +++ b/cauldron/bulk_sterilize.py @@ -89,6 +89,11 @@ def run_bulk_preview( """Walk all recipes; persist a proposal row per recipe that needs work. Skip already-clean recipes. Move job state on completion.""" log.info("[bulk-sterilize:%s] starting walk", job_id) + + def _cancelled() -> bool: + s = db.get_sterilize_job_state(job_id) + return s in ("cancelled", "failed", "done") + try: # Resolve the user's household once. The walk skips any recipe in a # different household — Mealie's group model lets all members read @@ -123,6 +128,9 @@ def run_bulk_preview( ) for slug, name in slugs: + if _cancelled(): + log.info("[bulk-sterilize:%s] walk aborted — job state changed externally", job_id) + return try: # Cheap pre-check: skip if every ingredient is already parsed recipe = sterilizer.mealie.get_recipe(slug) @@ -208,10 +216,18 @@ def run_bulk_apply( (caller transitions it). Each recipe runs through Sterilizer.apply_recipe; any per-recipe failure is recorded but doesn't stop the loop.""" log.info("[bulk-sterilize:%s] starting apply", job_id) + + def _cancelled() -> bool: + s = db.get_sterilize_job_state(job_id) + return s in ("cancelled", "failed", "done") + try: user_household = _user_household_id(sterilizer.mealie) approved = db.list_approved_unapplied_proposals(job_id) for row in approved: + if _cancelled(): + log.info("[bulk-sterilize:%s] apply aborted — job state changed externally", job_id) + return slug = row["recipe_slug"] try: db.update_sterilize_job_progress(job_id, current_slug=slug) diff --git a/cauldron/db.py b/cauldron/db.py index c03bac5..bc6fa68 100644 --- a/cauldron/db.py +++ b/cauldron/db.py @@ -927,6 +927,16 @@ class DB: ) return cur.fetchone() + def get_sterilize_job_state(self, job_id: int) -> str | None: + """Cheap state-only fetch used by the runner to detect external + cancels mid-loop without dragging the whole row over the wire.""" + with self.conn() as c, c.cursor() as cur: + cur.execute( + "SELECT state FROM cauldron_sterilize_jobs WHERE id=%s", (job_id,) + ) + row = cur.fetchone() + return row["state"] if row else None + def latest_sterilize_job_for_household(self, household_id: int) -> dict | None: """Most recent job (by started_at) for the household — used by the UI to figure out what to render on /sterilize.""" @@ -979,7 +989,13 @@ class DB: ) def finalize_sterilize_job(self, job_id: int, *, state: str) -> None: - """Move job to a terminal state (review/done/failed/cancelled).""" + """Move job to a terminal state (review/done/failed/cancelled). + + Will NOT overwrite a job that's already terminal — if a runner is + about to call finalize('done') but the row was set to 'cancelled' + externally, we leave the cancellation in place. This is the + anti-zombie guard that keeps user cancels from being silently + replaced when the daemon thread limps to the finish line.""" with self.conn() as c, c.cursor() as cur: cur.execute( """ @@ -990,6 +1006,7 @@ class DB: last_progress_at = NOW(), current_slug = NULL WHERE id=%s + AND state IN ('running','applying') """, (state, state, job_id), )