cauldron/cauldron/bulk_sterilize.py
Kayos 4707e6aacc sterilize bulk: respect external cancel mid-loop
Job 3 surfaced the bug — when I set state=cancelled in the DB, the
daemon thread kept running and finalize() at the end overwrote it
with 'done'. User cancellations were getting silently undone.

Two changes:

1. Runners (run_bulk_preview, run_bulk_apply) now check the job's
   current state at the top of every iteration via the new lightweight
   db.get_sterilize_job_state. If the state has moved to a terminal
   value (cancelled, failed, done) externally, the loop returns
   without finalizing.

2. db.finalize_sterilize_job now refuses to overwrite a non-running
   state — added "AND state IN ('running','applying')" to the WHERE
   clause. Belt-and-suspenders for the same problem: even if a runner
   races past the state check and limps to its finalize call, the DB
   itself won't let the cancellation be replaced.

Net: hitting cancel via the UI button (or a DB update) now actually
stops the runner mid-flight. Polling roundtrip per recipe is one
SELECT — negligible vs the multi-second clawdforge call that
dominates each iteration.
2026-04-30 10:02:53 -07:00

302 lines
12 KiB
Python

"""Background runner for bulk-sterilize jobs.
Architecture: a daemon thread per job. The thread holds a Mealie client
built from the starting user's decrypted token (passed in at start), walks
all recipes in the user's household, and for each one calls
Sterilizer.preview_recipe to get a structured proposal. Proposals are
written to cauldron_sterilize_proposals as we go; the job row tracks
overall progress.
Failure modes:
- clawdforge times out / errors → record `preview_error` on the proposal
row, increment `error_count`, keep going
- whole-job crash → daemon thread dies; row stays in 'running' forever
until DB.fail_stuck_sterilize_jobs() runs at next app boot
The thread does NOT apply changes. After the walk completes the job
moves to state='review'; user reviews proposals in the UI and POSTs
/api/sterilize/bulk-apply/<id> to actually write back to Mealie.
"""
import json
import logging
import threading
from typing import Optional
from .db import DB
from .forge import ForgeError
from .mealie import Mealie, MealieError
from .sterilizer import Sterilizer
log = logging.getLogger(__name__)
def _ingredient_needs_sterilizing(ing: dict) -> bool:
"""Heuristic: an ingredient row needs work if it has display/note content
but no resolved food. Already-parsed rows (food.id present) are skipped
so we don't waste clawdforge calls or risk regressing Cobb's manual
cleanup."""
food = ing.get("food") or {}
food_name = food.get("name") if isinstance(food, dict) else None
if food_name:
return False
has_content = bool(ing.get("display") or ing.get("note") or ing.get("originalText"))
return has_content
def _recipe_needs_sterilizing(recipe: dict) -> bool:
ings = recipe.get("recipeIngredient") or []
if not ings:
return False
return any(_ingredient_needs_sterilizing(i) for i in ings)
def _recipe_household_id(recipe: dict) -> str | None:
"""Mealie 1.x returns householdId at the top level. Older shapes used
nested household.id — try both."""
hid = recipe.get("householdId") or recipe.get("household_id")
if hid:
return hid
h = recipe.get("household")
if isinstance(h, dict):
return h.get("id")
return None
def _user_household_id(mealie) -> str | None:
"""Resolve the authenticated user's householdId from /api/users/self.
Cached on the Mealie client instance to avoid hitting the endpoint
once per recipe."""
cache_attr = "_cached_household_id"
cached = getattr(mealie, cache_attr, None)
if cached is not None:
return cached
me = mealie.who_am_i()
hid = me.get("householdId") or me.get("household_id")
if not hid:
h = me.get("household")
if isinstance(h, dict):
hid = h.get("id")
setattr(mealie, cache_attr, hid)
return hid
def run_bulk_preview(
*,
db: DB,
job_id: int,
sterilizer: Sterilizer,
) -> None:
"""Walk all recipes; persist a proposal row per recipe that needs work.
Skip already-clean recipes. Move job state on completion."""
log.info("[bulk-sterilize:%s] starting walk", job_id)
def _cancelled() -> bool:
s = db.get_sterilize_job_state(job_id)
return s in ("cancelled", "failed", "done")
try:
# Resolve the user's household once. The walk skips any recipe in a
# different household — Mealie's group model lets all members read
# everything but write only within their own household, so trying
# to sterilize a foreign recipe would 403 at apply time. Skipping
# at preview means no orphan proposals.
user_household = _user_household_id(sterilizer.mealie)
# Pull every recipe slug (paginated). Mealie's listing returns
# items with slug + name; we resolve full recipes one at a time.
slugs: list[tuple[str, str]] = []
page = 1
while True:
resp = sterilizer.mealie.list_recipes(page=page, per_page=100)
items = resp.get("items") or []
for r in items:
slug = r.get("slug")
name = r.get("name") or slug or ""
if slug:
slugs.append((slug, name))
total_pages = resp.get("total_pages") or resp.get("totalPages") or 1
if page >= total_pages:
break
page += 1
# The job row was created with the recipe count from the caller's
# initial Mealie page-1 fetch. If we discovered more, update.
with db.conn() as c, c.cursor() as cur:
cur.execute(
"UPDATE cauldron_sterilize_jobs SET total_recipes=%s WHERE id=%s",
(len(slugs), job_id),
)
for slug, name in slugs:
if _cancelled():
log.info("[bulk-sterilize:%s] walk aborted — job state changed externally", job_id)
return
try:
# Cheap pre-check: skip if every ingredient is already parsed
recipe = sterilizer.mealie.get_recipe(slug)
except MealieError as e:
log.warning("[bulk-sterilize:%s] mealie get_recipe(%s): %s", job_id, slug, e)
db.update_sterilize_job_progress(
job_id,
error_delta=1,
current_slug=slug,
last_error=str(e)[:500],
)
db.insert_sterilize_proposal(
job_id=job_id,
recipe_slug=slug,
recipe_name=name,
ingredient_count=0,
proposal_json=None,
preview_error=str(e)[:500],
)
continue
if user_household:
rec_hh = _recipe_household_id(recipe)
if rec_hh and rec_hh != user_household:
# Different household within the group — read-only for
# this user. Skip silently; no proposal row created.
db.update_sterilize_job_progress(
job_id, skipped_delta=1, current_slug=slug
)
continue
if not _recipe_needs_sterilizing(recipe):
db.update_sterilize_job_progress(
job_id, skipped_delta=1, current_slug=slug
)
continue
db.update_sterilize_job_progress(job_id, current_slug=slug)
try:
proposal = sterilizer.preview_recipe(slug)
db.insert_sterilize_proposal(
job_id=job_id,
recipe_slug=slug,
recipe_name=proposal.get("name") or name,
ingredient_count=proposal.get("ingredient_count")
or len(proposal.get("proposals") or []),
proposal_json=json.dumps(proposal, ensure_ascii=False),
preview_error=None,
)
db.update_sterilize_job_progress(job_id, processed_delta=1)
except (ForgeError, RuntimeError, MealieError) as e:
msg = str(e)[:500]
log.warning("[bulk-sterilize:%s] preview(%s): %s", job_id, slug, msg)
db.insert_sterilize_proposal(
job_id=job_id,
recipe_slug=slug,
recipe_name=name,
ingredient_count=0,
proposal_json=None,
preview_error=msg,
)
db.update_sterilize_job_progress(
job_id, error_delta=1, current_slug=slug, last_error=msg
)
db.finalize_sterilize_job(job_id, state="review")
log.info("[bulk-sterilize:%s] walk complete; awaiting review", job_id)
except Exception:
log.exception("[bulk-sterilize:%s] unhandled crash", job_id)
try:
db.finalize_sterilize_job(job_id, state="failed")
except Exception:
log.exception("[bulk-sterilize:%s] could not mark failed", job_id)
def run_bulk_apply(
*,
db: DB,
job_id: int,
sterilizer: Sterilizer,
) -> None:
"""Apply approved proposals back to Mealie. Job must be in state='applying'
(caller transitions it). Each recipe runs through Sterilizer.apply_recipe;
any per-recipe failure is recorded but doesn't stop the loop."""
log.info("[bulk-sterilize:%s] starting apply", job_id)
def _cancelled() -> bool:
s = db.get_sterilize_job_state(job_id)
return s in ("cancelled", "failed", "done")
try:
user_household = _user_household_id(sterilizer.mealie)
approved = db.list_approved_unapplied_proposals(job_id)
for row in approved:
if _cancelled():
log.info("[bulk-sterilize:%s] apply aborted — job state changed externally", job_id)
return
slug = row["recipe_slug"]
try:
db.update_sterilize_job_progress(job_id, current_slug=slug)
# Pre-check household: if this proposal was created before
# the walk-side filter (e.g. legacy job 1), Mealie would
# 403 on the PUT and the food/unit creates would have
# already polluted our own household. Guard early.
if user_household:
rec = sterilizer.mealie.get_recipe(slug)
rec_hh = _recipe_household_id(rec)
if rec_hh and rec_hh != user_household:
msg = "skipped: recipe belongs to a different household — sterilize from that household's account"
db.mark_proposal_applied(job_id, slug, error=msg)
db.update_sterilize_job_progress(
job_id, error_delta=1, current_slug=slug, last_error=msg
)
continue
sterilizer.apply_recipe(slug, create_missing=True)
db.mark_proposal_applied(job_id, slug)
except (ForgeError, RuntimeError, MealieError) as e:
raw = str(e)
if "403" in raw:
msg = "skipped: recipe belongs to a different household — sterilize from that household's account"
else:
msg = raw[:500]
log.warning("[bulk-sterilize:%s] apply(%s): %s", job_id, slug, msg)
db.mark_proposal_applied(job_id, slug, error=msg)
db.update_sterilize_job_progress(
job_id, error_delta=1, current_slug=slug, last_error=msg
)
db.finalize_sterilize_job(job_id, state="done")
log.info("[bulk-sterilize:%s] apply complete", job_id)
except Exception:
log.exception("[bulk-sterilize:%s] apply unhandled crash", job_id)
try:
db.finalize_sterilize_job(job_id, state="failed")
except Exception:
log.exception("[bulk-sterilize:%s] could not mark failed", job_id)
def spawn_preview_thread(
*,
db: DB,
job_id: int,
sterilizer: Sterilizer,
name: Optional[str] = None,
) -> threading.Thread:
t = threading.Thread(
target=run_bulk_preview,
kwargs={"db": db, "job_id": job_id, "sterilizer": sterilizer},
name=name or f"bulk-sterilize-preview-{job_id}",
daemon=True,
)
t.start()
return t
def spawn_apply_thread(
*,
db: DB,
job_id: int,
sterilizer: Sterilizer,
name: Optional[str] = None,
) -> threading.Thread:
t = threading.Thread(
target=run_bulk_apply,
kwargs={"db": db, "job_id": job_id, "sterilizer": sterilizer},
name=name or f"bulk-sterilize-apply-{job_id}",
daemon=True,
)
t.start()
return t