"""Background runner for bulk-sterilize jobs. Architecture: a daemon thread per job. The thread holds a Mealie client built from the starting user's decrypted token (passed in at start), walks all recipes in the user's household, and for each one calls Sterilizer.preview_recipe to get a structured proposal. Proposals are written to cauldron_sterilize_proposals as we go; the job row tracks overall progress. Failure modes: - clawdforge times out / errors → record `preview_error` on the proposal row, increment `error_count`, keep going - whole-job crash → daemon thread dies; row stays in 'running' forever until DB.fail_stuck_sterilize_jobs() runs at next app boot The thread does NOT apply changes. After the walk completes the job moves to state='review'; user reviews proposals in the UI and POSTs /api/sterilize/bulk-apply/ to actually write back to Mealie. """ import json import logging import threading from typing import Optional from .db import DB from .forge import ForgeError from .mealie import Mealie, MealieError from .sterilizer import Sterilizer log = logging.getLogger(__name__) def _ingredient_needs_sterilizing(ing: dict) -> bool: """Heuristic: an ingredient row needs work if it has display/note content but no resolved food. Already-parsed rows (food.id present) are skipped so we don't waste clawdforge calls or risk regressing Cobb's manual cleanup.""" food = ing.get("food") or {} food_name = food.get("name") if isinstance(food, dict) else None if food_name: return False has_content = bool(ing.get("display") or ing.get("note") or ing.get("originalText")) return has_content def _recipe_needs_sterilizing(recipe: dict) -> bool: ings = recipe.get("recipeIngredient") or [] if not ings: return False return any(_ingredient_needs_sterilizing(i) for i in ings) def _recipe_household_id(recipe: dict) -> str | None: """Mealie 1.x returns householdId at the top level. Older shapes used nested household.id — try both.""" hid = recipe.get("householdId") or recipe.get("household_id") if hid: return hid h = recipe.get("household") if isinstance(h, dict): return h.get("id") return None def _user_household_id(mealie) -> str | None: """Resolve the authenticated user's householdId from /api/users/self. Cached on the Mealie client instance to avoid hitting the endpoint once per recipe.""" cache_attr = "_cached_household_id" cached = getattr(mealie, cache_attr, None) if cached is not None: return cached me = mealie.who_am_i() hid = me.get("householdId") or me.get("household_id") if not hid: h = me.get("household") if isinstance(h, dict): hid = h.get("id") setattr(mealie, cache_attr, hid) return hid def run_bulk_preview( *, db: DB, job_id: int, sterilizer: Sterilizer, ) -> None: """Walk all recipes; persist a proposal row per recipe that needs work. Skip already-clean recipes. Move job state on completion.""" log.info("[bulk-sterilize:%s] starting walk", job_id) def _cancelled() -> bool: s = db.get_sterilize_job_state(job_id) return s in ("cancelled", "failed", "done") try: # Resolve the user's household once. The walk skips any recipe in a # different household — Mealie's group model lets all members read # everything but write only within their own household, so trying # to sterilize a foreign recipe would 403 at apply time. Skipping # at preview means no orphan proposals. user_household = _user_household_id(sterilizer.mealie) # Pull every recipe slug (paginated). Mealie's listing returns # items with slug + name; we resolve full recipes one at a time. slugs: list[tuple[str, str]] = [] page = 1 while True: resp = sterilizer.mealie.list_recipes(page=page, per_page=100) items = resp.get("items") or [] for r in items: slug = r.get("slug") name = r.get("name") or slug or "" if slug: slugs.append((slug, name)) total_pages = resp.get("total_pages") or resp.get("totalPages") or 1 if page >= total_pages: break page += 1 # The job row was created with the recipe count from the caller's # initial Mealie page-1 fetch. If we discovered more, update. with db.conn() as c, c.cursor() as cur: cur.execute( "UPDATE cauldron_sterilize_jobs SET total_recipes=%s WHERE id=%s", (len(slugs), job_id), ) for slug, name in slugs: if _cancelled(): log.info("[bulk-sterilize:%s] walk aborted — job state changed externally", job_id) return try: # Cheap pre-check: skip if every ingredient is already parsed recipe = sterilizer.mealie.get_recipe(slug) except MealieError as e: log.warning("[bulk-sterilize:%s] mealie get_recipe(%s): %s", job_id, slug, e) db.update_sterilize_job_progress( job_id, error_delta=1, current_slug=slug, last_error=str(e)[:500], ) db.insert_sterilize_proposal( job_id=job_id, recipe_slug=slug, recipe_name=name, ingredient_count=0, proposal_json=None, preview_error=str(e)[:500], ) continue if user_household: rec_hh = _recipe_household_id(recipe) if rec_hh and rec_hh != user_household: # Different household within the group — read-only for # this user. Skip silently; no proposal row created. db.update_sterilize_job_progress( job_id, skipped_delta=1, current_slug=slug ) continue if not _recipe_needs_sterilizing(recipe): db.update_sterilize_job_progress( job_id, skipped_delta=1, current_slug=slug ) continue db.update_sterilize_job_progress(job_id, current_slug=slug) try: proposal = sterilizer.preview_recipe(slug) db.insert_sterilize_proposal( job_id=job_id, recipe_slug=slug, recipe_name=proposal.get("name") or name, ingredient_count=proposal.get("ingredient_count") or len(proposal.get("proposals") or []), proposal_json=json.dumps(proposal, ensure_ascii=False), preview_error=None, ) db.update_sterilize_job_progress(job_id, processed_delta=1) except (ForgeError, RuntimeError, MealieError) as e: msg = str(e)[:500] log.warning("[bulk-sterilize:%s] preview(%s): %s", job_id, slug, msg) db.insert_sterilize_proposal( job_id=job_id, recipe_slug=slug, recipe_name=name, ingredient_count=0, proposal_json=None, preview_error=msg, ) db.update_sterilize_job_progress( job_id, error_delta=1, current_slug=slug, last_error=msg ) db.finalize_sterilize_job(job_id, state="review") log.info("[bulk-sterilize:%s] walk complete; awaiting review", job_id) except Exception: log.exception("[bulk-sterilize:%s] unhandled crash", job_id) try: db.finalize_sterilize_job(job_id, state="failed") except Exception: log.exception("[bulk-sterilize:%s] could not mark failed", job_id) def run_bulk_apply( *, db: DB, job_id: int, sterilizer: Sterilizer, ) -> None: """Apply approved proposals back to Mealie. Job must be in state='applying' (caller transitions it). Each recipe runs through Sterilizer.apply_recipe; any per-recipe failure is recorded but doesn't stop the loop.""" log.info("[bulk-sterilize:%s] starting apply", job_id) def _cancelled() -> bool: s = db.get_sterilize_job_state(job_id) return s in ("cancelled", "failed", "done") try: user_household = _user_household_id(sterilizer.mealie) approved = db.list_approved_unapplied_proposals(job_id) for row in approved: if _cancelled(): log.info("[bulk-sterilize:%s] apply aborted — job state changed externally", job_id) return slug = row["recipe_slug"] try: db.update_sterilize_job_progress(job_id, current_slug=slug) # Pre-check household: if this proposal was created before # the walk-side filter (e.g. legacy job 1), Mealie would # 403 on the PUT and the food/unit creates would have # already polluted our own household. Guard early. if user_household: rec = sterilizer.mealie.get_recipe(slug) rec_hh = _recipe_household_id(rec) if rec_hh and rec_hh != user_household: msg = "skipped: recipe belongs to a different household — sterilize from that household's account" db.mark_proposal_applied(job_id, slug, error=msg) db.update_sterilize_job_progress( job_id, error_delta=1, current_slug=slug, last_error=msg ) continue sterilizer.apply_recipe(slug, create_missing=True) db.mark_proposal_applied(job_id, slug) except (ForgeError, RuntimeError, MealieError) as e: raw = str(e) if "403" in raw: msg = "skipped: recipe belongs to a different household — sterilize from that household's account" else: msg = raw[:500] log.warning("[bulk-sterilize:%s] apply(%s): %s", job_id, slug, msg) db.mark_proposal_applied(job_id, slug, error=msg) db.update_sterilize_job_progress( job_id, error_delta=1, current_slug=slug, last_error=msg ) db.finalize_sterilize_job(job_id, state="done") log.info("[bulk-sterilize:%s] apply complete", job_id) except Exception: log.exception("[bulk-sterilize:%s] apply unhandled crash", job_id) try: db.finalize_sterilize_job(job_id, state="failed") except Exception: log.exception("[bulk-sterilize:%s] could not mark failed", job_id) def spawn_preview_thread( *, db: DB, job_id: int, sterilizer: Sterilizer, name: Optional[str] = None, ) -> threading.Thread: t = threading.Thread( target=run_bulk_preview, kwargs={"db": db, "job_id": job_id, "sterilizer": sterilizer}, name=name or f"bulk-sterilize-preview-{job_id}", daemon=True, ) t.start() return t def spawn_apply_thread( *, db: DB, job_id: int, sterilizer: Sterilizer, name: Optional[str] = None, ) -> threading.Thread: t = threading.Thread( target=run_bulk_apply, kwargs={"db": db, "job_id": job_id, "sterilizer": sterilizer}, name=name or f"bulk-sterilize-apply-{job_id}", daemon=True, ) t.start() return t