From f7b30d3b65678b265292ef63644d55d1da618d98 Mon Sep 17 00:00:00 2001 From: Kayos Date: Thu, 30 Apr 2026 06:05:19 -0700 Subject: [PATCH] sterilize: search-then-create + retry-on-UNIQUE-400 + don't mark errored as applied MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Job 1's bulk run apply'd 184 recipes and 182 of them failed with the same error: POST /api/foods -> 400 UNIQUE constraint failed: ingredient_foods.name, ingredient_foods.group_id. Cause: Mealie's name_normalized strips punctuation/whitespace/case more aggressively than our local _build_name_index's plain .lower(), so the cache misses, the create_food fires blindly, and Mealie's UNIQUE constraint kills the call. Whole-recipe apply was wrapped in try/except at the bulk runner so the recipe got marked errored — but applied_at was still set to NOW(), making the rerun think we'd already tried. We had, but the recipe's still unparsed. Two fixes: 1. sterilizer._resolve_food / _resolve_unit replace the inline create-on-miss block. Order: local cache → Mealie search-endpoint tie-break → create. On any UNIQUE-flavored 400 from create, fall back to one more search to adopt whatever Mealie has under the normalized form. Mealie's search endpoint applies its own name_normalized internally so we don't have to mirror its rules. _search_for_match takes "foods" or "units" and looks for an exact case-insensitive match against name or pluralName, with a fallback to "trust Mealie's ranker" when there's exactly one hit. 2. db.mark_proposal_applied no longer sets applied_at on error. On success: applied_at=NOW(), apply_error=NULL. On error: applied_at stays NULL, apply_error gets the message. list_approved_unapplied_ proposals keys off applied_at IS NULL, so a rerun naturally retries only the failed recipes. Net effect: rerun can now successfully apply the 182 failed recipes without re-walking them, and won't waste calls on the 2 that did go through. --- cauldron/db.py | 27 ++++++--- cauldron/sterilizer.py | 128 +++++++++++++++++++++++++++++++++++++---- 2 files changed, 136 insertions(+), 19 deletions(-) diff --git a/cauldron/db.py b/cauldron/db.py index 8aceb65..c03bac5 100644 --- a/cauldron/db.py +++ b/cauldron/db.py @@ -1069,13 +1069,26 @@ class DB: def mark_proposal_applied( self, job_id: int, recipe_slug: str, *, error: str | None = None ) -> None: - with self.conn() as c, c.cursor() as cur: - cur.execute( - """UPDATE cauldron_sterilize_proposals - SET applied_at=NOW(), apply_error=%s - WHERE job_id=%s AND recipe_slug=%s""", - ((error or "")[:500] or None, job_id, recipe_slug), - ) + """On success: applied_at=NOW(), apply_error=NULL. On error: leave + applied_at NULL so a rerun can retry, but record the error for + review. The list_approved_unapplied_proposals query keys off + applied_at IS NULL, so this directly drives retryability.""" + if error: + with self.conn() as c, c.cursor() as cur: + cur.execute( + """UPDATE cauldron_sterilize_proposals + SET apply_error=%s + WHERE job_id=%s AND recipe_slug=%s""", + (error[:500], job_id, recipe_slug), + ) + else: + with self.conn() as c, c.cursor() as cur: + cur.execute( + """UPDATE cauldron_sterilize_proposals + SET applied_at=NOW(), apply_error=NULL + WHERE job_id=%s AND recipe_slug=%s""", + (job_id, recipe_slug), + ) def list_approved_unapplied_proposals(self, job_id: int) -> list[dict]: with self.conn() as c, c.cursor() as cur: diff --git a/cauldron/sterilizer.py b/cauldron/sterilizer.py index fe0ebcd..ff7e263 100644 --- a/cauldron/sterilizer.py +++ b/cauldron/sterilizer.py @@ -116,6 +116,14 @@ class Sterilizer: For each ingredient we resolve (or create) Mealie food/unit by name, then assemble the new recipeIngredient list and PUT the recipe. + + Mealie normalizes food/unit names more aggressively than .lower() + (its name_normalized strips punctuation + collapses whitespace + + unicode-folds). So a local-cache miss followed by a blind create + can hit Mealie's UNIQUE constraint on (name, group_id). We + ALWAYS try the search endpoint as a tie-break before creating, + and on a UNIQUE-violation 400 we re-search and adopt whatever + Mealie has under that normalized form. """ preview = self.preview_recipe(slug) proposals = preview["proposals"] @@ -137,12 +145,11 @@ class Sterilizer: food_name = (parsed.get("food") or "").strip() if food_name: - food_id = food_index.get(food_name.lower()) - if not food_id and create_missing: - created = self.mealie.create_food(food_name) - food_id = created.get("id") - food_index[food_name.lower()] = food_id - created_foods.append(food_name) + food_id = self._resolve_food( + food_name, food_index, + create_missing=create_missing, + created_log=created_foods, + ) if food_id: new_ing["food"] = {"id": food_id, "name": food_name} new_ing["isFood"] = True @@ -153,12 +160,11 @@ class Sterilizer: unit_name = (parsed.get("unit") or "").strip() if unit_name: - unit_id = unit_index.get(unit_name.lower()) - if not unit_id and create_missing: - created = self.mealie.create_unit(unit_name) - unit_id = created.get("id") - unit_index[unit_name.lower()] = unit_id - created_units.append(unit_name) + unit_id = self._resolve_unit( + unit_name, unit_index, + create_missing=create_missing, + created_log=created_units, + ) if unit_id: new_ing["unit"] = {"id": unit_id, "name": unit_name} else: @@ -177,6 +183,104 @@ class Sterilizer: "created_units": created_units, } + # --- food/unit resolution helpers -------------------------------------- + + def _resolve_food( + self, + name: str, + index: dict[str, str], + *, + create_missing: bool, + created_log: list[str], + ) -> str | None: + """Find or create a Mealie food row, robust to normalization gaps.""" + key = name.lower() + + # Step 1: local cache hit (covers name + pluralName from list_foods) + if key in index: + return index[key] + + # Step 2: server-side search — Mealie does proper normalization here + existing_id = self._search_for_match(name, "foods") + if existing_id: + index[key] = existing_id + return existing_id + + # Step 3: create. If Mealie races us with a UNIQUE-constraint 400, + # search again and use whatever it has under the normalized form. + if not create_missing: + return None + try: + created = self.mealie.create_food(name) + food_id = created.get("id") + except MealieError as e: + msg = str(e) + if "UNIQUE constraint" in msg or "400" in msg: + food_id = self._search_for_match(name, "foods") + if not food_id: + raise # truly couldn't reconcile — let caller record error + else: + raise + if food_id: + index[key] = food_id + created_log.append(name) + return food_id + + def _resolve_unit( + self, + name: str, + index: dict[str, str], + *, + create_missing: bool, + created_log: list[str], + ) -> str | None: + key = name.lower() + if key in index: + return index[key] + existing_id = self._search_for_match(name, "units") + if existing_id: + index[key] = existing_id + return existing_id + if not create_missing: + return None + try: + created = self.mealie.create_unit(name) + unit_id = created.get("id") + except MealieError as e: + msg = str(e) + if "UNIQUE constraint" in msg or "400" in msg: + unit_id = self._search_for_match(name, "units") + if not unit_id: + raise + else: + raise + if unit_id: + index[key] = unit_id + created_log.append(name) + return unit_id + + def _search_for_match(self, name: str, kind: str) -> str | None: + """Use Mealie's search endpoint to find a foods/units row matching + `name`. Returns the id of the first item whose name or pluralName + matches (case-insensitive) the query, else None.""" + target = name.strip().lower() + if not target: + return None + listing = (self.mealie.list_foods(search=name) + if kind == "foods" + else self.mealie.list_units(search=name)) + items = listing.get("items") or listing.get("data") or [] + # Mealie's search returns ranked results; take the first exact-ish match + for item in items: + for field in ("name", "pluralName"): + v = (item.get(field) or "").strip().lower() + if v and v == target: + return item.get("id") + # Fallback: if there's exactly one search hit, trust Mealie's ranker + if len(items) == 1 and items[0].get("id"): + return items[0]["id"] + return None + # --- private ------------------------------------------------------------ def _parse_batch(self, strings: list[str]) -> list[IngredientParse]: