sterilize: search-then-create + retry-on-UNIQUE-400 + don't mark errored as applied

Job 1's bulk run apply'd 184 recipes and 182 of them failed with the
same error: POST /api/foods -> 400 UNIQUE constraint failed:
ingredient_foods.name, ingredient_foods.group_id. Cause: Mealie's
name_normalized strips punctuation/whitespace/case more aggressively
than our local _build_name_index's plain .lower(), so the cache misses,
the create_food fires blindly, and Mealie's UNIQUE constraint kills
the call. Whole-recipe apply was wrapped in try/except at the bulk
runner so the recipe got marked errored — but applied_at was still
set to NOW(), making the rerun think we'd already tried. We had, but
the recipe's still unparsed.

Two fixes:

1. sterilizer._resolve_food / _resolve_unit replace the inline
   create-on-miss block. Order: local cache → Mealie search-endpoint
   tie-break → create. On any UNIQUE-flavored 400 from create, fall
   back to one more search to adopt whatever Mealie has under the
   normalized form. Mealie's search endpoint applies its own
   name_normalized internally so we don't have to mirror its rules.
   _search_for_match takes "foods" or "units" and looks for an exact
   case-insensitive match against name or pluralName, with a fallback
   to "trust Mealie's ranker" when there's exactly one hit.

2. db.mark_proposal_applied no longer sets applied_at on error. On
   success: applied_at=NOW(), apply_error=NULL. On error: applied_at
   stays NULL, apply_error gets the message. list_approved_unapplied_
   proposals keys off applied_at IS NULL, so a rerun naturally retries
   only the failed recipes.

Net effect: rerun can now successfully apply the 182 failed recipes
without re-walking them, and won't waste calls on the 2 that did go
through.
This commit is contained in:
Kayos 2026-04-30 06:05:19 -07:00
parent 9368b64a81
commit f7b30d3b65
2 changed files with 136 additions and 19 deletions

View file

@ -1069,12 +1069,25 @@ class DB:
def mark_proposal_applied(
self, job_id: int, recipe_slug: str, *, error: str | None = None
) -> None:
"""On success: applied_at=NOW(), apply_error=NULL. On error: leave
applied_at NULL so a rerun can retry, but record the error for
review. The list_approved_unapplied_proposals query keys off
applied_at IS NULL, so this directly drives retryability."""
if error:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""UPDATE cauldron_sterilize_proposals
SET applied_at=NOW(), apply_error=%s
SET apply_error=%s
WHERE job_id=%s AND recipe_slug=%s""",
((error or "")[:500] or None, job_id, recipe_slug),
(error[:500], job_id, recipe_slug),
)
else:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""UPDATE cauldron_sterilize_proposals
SET applied_at=NOW(), apply_error=NULL
WHERE job_id=%s AND recipe_slug=%s""",
(job_id, recipe_slug),
)
def list_approved_unapplied_proposals(self, job_id: int) -> list[dict]:

View file

@ -116,6 +116,14 @@ class Sterilizer:
For each ingredient we resolve (or create) Mealie food/unit by name,
then assemble the new recipeIngredient list and PUT the recipe.
Mealie normalizes food/unit names more aggressively than .lower()
(its name_normalized strips punctuation + collapses whitespace +
unicode-folds). So a local-cache miss followed by a blind create
can hit Mealie's UNIQUE constraint on (name, group_id). We
ALWAYS try the search endpoint as a tie-break before creating,
and on a UNIQUE-violation 400 we re-search and adopt whatever
Mealie has under that normalized form.
"""
preview = self.preview_recipe(slug)
proposals = preview["proposals"]
@ -137,12 +145,11 @@ class Sterilizer:
food_name = (parsed.get("food") or "").strip()
if food_name:
food_id = food_index.get(food_name.lower())
if not food_id and create_missing:
created = self.mealie.create_food(food_name)
food_id = created.get("id")
food_index[food_name.lower()] = food_id
created_foods.append(food_name)
food_id = self._resolve_food(
food_name, food_index,
create_missing=create_missing,
created_log=created_foods,
)
if food_id:
new_ing["food"] = {"id": food_id, "name": food_name}
new_ing["isFood"] = True
@ -153,12 +160,11 @@ class Sterilizer:
unit_name = (parsed.get("unit") or "").strip()
if unit_name:
unit_id = unit_index.get(unit_name.lower())
if not unit_id and create_missing:
created = self.mealie.create_unit(unit_name)
unit_id = created.get("id")
unit_index[unit_name.lower()] = unit_id
created_units.append(unit_name)
unit_id = self._resolve_unit(
unit_name, unit_index,
create_missing=create_missing,
created_log=created_units,
)
if unit_id:
new_ing["unit"] = {"id": unit_id, "name": unit_name}
else:
@ -177,6 +183,104 @@ class Sterilizer:
"created_units": created_units,
}
# --- food/unit resolution helpers --------------------------------------
def _resolve_food(
self,
name: str,
index: dict[str, str],
*,
create_missing: bool,
created_log: list[str],
) -> str | None:
"""Find or create a Mealie food row, robust to normalization gaps."""
key = name.lower()
# Step 1: local cache hit (covers name + pluralName from list_foods)
if key in index:
return index[key]
# Step 2: server-side search — Mealie does proper normalization here
existing_id = self._search_for_match(name, "foods")
if existing_id:
index[key] = existing_id
return existing_id
# Step 3: create. If Mealie races us with a UNIQUE-constraint 400,
# search again and use whatever it has under the normalized form.
if not create_missing:
return None
try:
created = self.mealie.create_food(name)
food_id = created.get("id")
except MealieError as e:
msg = str(e)
if "UNIQUE constraint" in msg or "400" in msg:
food_id = self._search_for_match(name, "foods")
if not food_id:
raise # truly couldn't reconcile — let caller record error
else:
raise
if food_id:
index[key] = food_id
created_log.append(name)
return food_id
def _resolve_unit(
self,
name: str,
index: dict[str, str],
*,
create_missing: bool,
created_log: list[str],
) -> str | None:
key = name.lower()
if key in index:
return index[key]
existing_id = self._search_for_match(name, "units")
if existing_id:
index[key] = existing_id
return existing_id
if not create_missing:
return None
try:
created = self.mealie.create_unit(name)
unit_id = created.get("id")
except MealieError as e:
msg = str(e)
if "UNIQUE constraint" in msg or "400" in msg:
unit_id = self._search_for_match(name, "units")
if not unit_id:
raise
else:
raise
if unit_id:
index[key] = unit_id
created_log.append(name)
return unit_id
def _search_for_match(self, name: str, kind: str) -> str | None:
"""Use Mealie's search endpoint to find a foods/units row matching
`name`. Returns the id of the first item whose name or pluralName
matches (case-insensitive) the query, else None."""
target = name.strip().lower()
if not target:
return None
listing = (self.mealie.list_foods(search=name)
if kind == "foods"
else self.mealie.list_units(search=name))
items = listing.get("items") or listing.get("data") or []
# Mealie's search returns ranked results; take the first exact-ish match
for item in items:
for field in ("name", "pluralName"):
v = (item.get(field) or "").strip().lower()
if v and v == target:
return item.get("id")
# Fallback: if there's exactly one search hit, trust Mealie's ranker
if len(items) == 1 and items[0].get("id"):
return items[0]["id"]
return None
# --- private ------------------------------------------------------------
def _parse_batch(self, strings: list[str]) -> list[IngredientParse]: