Job 1's bulk run apply'd 184 recipes and 182 of them failed with the same error: POST /api/foods -> 400 UNIQUE constraint failed: ingredient_foods.name, ingredient_foods.group_id. Cause: Mealie's name_normalized strips punctuation/whitespace/case more aggressively than our local _build_name_index's plain .lower(), so the cache misses, the create_food fires blindly, and Mealie's UNIQUE constraint kills the call. Whole-recipe apply was wrapped in try/except at the bulk runner so the recipe got marked errored — but applied_at was still set to NOW(), making the rerun think we'd already tried. We had, but the recipe's still unparsed. Two fixes: 1. sterilizer._resolve_food / _resolve_unit replace the inline create-on-miss block. Order: local cache → Mealie search-endpoint tie-break → create. On any UNIQUE-flavored 400 from create, fall back to one more search to adopt whatever Mealie has under the normalized form. Mealie's search endpoint applies its own name_normalized internally so we don't have to mirror its rules. _search_for_match takes "foods" or "units" and looks for an exact case-insensitive match against name or pluralName, with a fallback to "trust Mealie's ranker" when there's exactly one hit. 2. db.mark_proposal_applied no longer sets applied_at on error. On success: applied_at=NOW(), apply_error=NULL. On error: applied_at stays NULL, apply_error gets the message. list_approved_unapplied_ proposals keys off applied_at IS NULL, so a rerun naturally retries only the failed recipes. Net effect: rerun can now successfully apply the 182 failed recipes without re-walking them, and won't waste calls on the 2 that did go through.
369 lines
14 KiB
Python
369 lines
14 KiB
Python
"""Ingredient sterilizer — turn Mealie's free-form ingredient strings into
|
|
structured (qty, unit, food, note) so shopping-list aggregation works.
|
|
|
|
Why this exists: Mealie has its own CRF parser, but it's mediocre and produces
|
|
inconsistent results. Cobb's hand-typed recipes have lots of "about 2 cups
|
|
cooked white rice" / "1 small handful kale" / "a pinch of salt" etc. that
|
|
slip past the parser. We send these to Sonnet via clawdforge and get back
|
|
clean structured form.
|
|
|
|
Flow:
|
|
1. Fetch the recipe from Mealie
|
|
2. Build a single batched prompt with all ingredients (one Sonnet call/recipe)
|
|
3. Get back a parallel array of {quantity, unit, food, note}
|
|
4. (preview) return the proposal
|
|
5. (apply) link each parse to existing Mealie food/unit (create if missing),
|
|
then PUT the updated recipe back
|
|
"""
|
|
import json
|
|
from dataclasses import dataclass, asdict
|
|
|
|
from .forge import Forge, ForgeError
|
|
from .mealie import Mealie, MealieError
|
|
|
|
|
|
STERILIZE_SYSTEM = """You are a precise recipe ingredient parser. You ONLY output valid JSON.
|
|
You receive a list of free-form ingredient strings and must return a parallel
|
|
array where each item is parsed into structured form.
|
|
|
|
Output schema (per item):
|
|
{
|
|
"quantity": <number or null>, # numeric amount, fractions converted to decimals (1/2 -> 0.5)
|
|
"unit": <string or null>, # singular canonical form: "cup", "tbsp", "tsp", "oz", "lb", "g", "kg", "ml", "l", "clove", "slice", "can", "package", "piece", "pinch", "dash", "handful". null if no unit (e.g. "1 onion").
|
|
"food": <string or null>, # the core food noun in singular canonical form: "onion", "garlic", "rice", "olive oil". Strip prep state ("chopped", "diced") -- those go in note.
|
|
"note": <string or null>, # prep state, brand, color, modifier: "chopped", "extra virgin", "yellow", "to taste"
|
|
"approx": <bool> # true if the input said "about" / "a pinch" / "to taste" / vague qty
|
|
}
|
|
|
|
Rules:
|
|
- Convert fractions: "1/2" -> 0.5, "1 1/4" -> 1.25
|
|
- "a pinch", "a dash", "to taste" -> {quantity: null, approx: true, note: "to taste"}
|
|
- "1 small onion" -> {quantity: 1, unit: null, food: "onion", note: "small"}
|
|
- "2 cloves garlic, minced" -> {quantity: 2, unit: "clove", food: "garlic", note: "minced"}
|
|
- Section headers like "For the sauce:" -> all fields null EXCEPT note: "<header text>"
|
|
- If you genuinely cannot parse, set all fields null and put the original in note.
|
|
- DO NOT add fields not in the schema.
|
|
- DO NOT wrap output in markdown fences.
|
|
- DO NOT include any prose before or after the JSON.
|
|
|
|
You will be given a JSON object: {"ingredients": ["str", "str", ...]}
|
|
You return: {"parses": [{...}, {...}, ...]} -- same length, same order.
|
|
"""
|
|
|
|
|
|
@dataclass
|
|
class IngredientParse:
|
|
quantity: float | None
|
|
unit: str | None
|
|
food: str | None
|
|
note: str | None
|
|
approx: bool
|
|
|
|
|
|
@dataclass
|
|
class IngredientProposal:
|
|
"""One ingredient before vs after."""
|
|
index: int
|
|
original_display: str
|
|
original_quantity: float | None
|
|
original_unit_name: str | None
|
|
original_food_name: str | None
|
|
original_note: str | None
|
|
parsed: IngredientParse
|
|
|
|
|
|
class Sterilizer:
|
|
def __init__(self, *, mealie: Mealie, forge: Forge, model: str = "sonnet"):
|
|
self.mealie = mealie
|
|
self.forge = forge
|
|
self.model = model
|
|
|
|
# --- public -------------------------------------------------------------
|
|
|
|
def preview_recipe(self, slug: str) -> dict:
|
|
"""Dry-run: parse all ingredients, return proposals without writing."""
|
|
recipe = self.mealie.get_recipe(slug)
|
|
ingredients = recipe.get("recipeIngredient") or []
|
|
if not ingredients:
|
|
return {"slug": slug, "name": recipe.get("name"), "proposals": []}
|
|
|
|
strings = [_render_ingredient_for_parse(ing) for ing in ingredients]
|
|
parses = self._parse_batch(strings)
|
|
|
|
proposals: list[IngredientProposal] = []
|
|
for i, (ing, parse) in enumerate(zip(ingredients, parses)):
|
|
proposals.append(
|
|
IngredientProposal(
|
|
index=i,
|
|
original_display=ing.get("display") or "",
|
|
original_quantity=ing.get("quantity"),
|
|
original_unit_name=(ing.get("unit") or {}).get("name") if ing.get("unit") else None,
|
|
original_food_name=(ing.get("food") or {}).get("name") if ing.get("food") else None,
|
|
original_note=ing.get("note"),
|
|
parsed=parse,
|
|
)
|
|
)
|
|
|
|
return {
|
|
"slug": slug,
|
|
"name": recipe.get("name"),
|
|
"ingredient_count": len(ingredients),
|
|
"proposals": [_proposal_to_dict(p) for p in proposals],
|
|
}
|
|
|
|
def apply_recipe(self, slug: str, *, create_missing: bool = True) -> dict:
|
|
"""Run preview, then write changes back to Mealie.
|
|
|
|
For each ingredient we resolve (or create) Mealie food/unit by name,
|
|
then assemble the new recipeIngredient list and PUT the recipe.
|
|
|
|
Mealie normalizes food/unit names more aggressively than .lower()
|
|
(its name_normalized strips punctuation + collapses whitespace +
|
|
unicode-folds). So a local-cache miss followed by a blind create
|
|
can hit Mealie's UNIQUE constraint on (name, group_id). We
|
|
ALWAYS try the search endpoint as a tie-break before creating,
|
|
and on a UNIQUE-violation 400 we re-search and adopt whatever
|
|
Mealie has under that normalized form.
|
|
"""
|
|
preview = self.preview_recipe(slug)
|
|
proposals = preview["proposals"]
|
|
if not proposals:
|
|
return {"slug": slug, "updated": 0, "skipped": 0, "created_foods": [], "created_units": []}
|
|
|
|
recipe = self.mealie.get_recipe(slug)
|
|
food_index = self._build_name_index(self.mealie.list_foods())
|
|
unit_index = self._build_name_index(self.mealie.list_units())
|
|
created_foods: list[str] = []
|
|
created_units: list[str] = []
|
|
|
|
new_ingredients: list[dict] = []
|
|
for orig_ing, prop in zip(recipe.get("recipeIngredient") or [], proposals):
|
|
parsed = prop["parsed"]
|
|
new_ing = dict(orig_ing) # preserve id, refId, original_text
|
|
|
|
new_ing["quantity"] = parsed["quantity"]
|
|
|
|
food_name = (parsed.get("food") or "").strip()
|
|
if food_name:
|
|
food_id = self._resolve_food(
|
|
food_name, food_index,
|
|
create_missing=create_missing,
|
|
created_log=created_foods,
|
|
)
|
|
if food_id:
|
|
new_ing["food"] = {"id": food_id, "name": food_name}
|
|
new_ing["isFood"] = True
|
|
else:
|
|
# Section header style — clear food, mark not-food
|
|
new_ing["food"] = None
|
|
new_ing["isFood"] = False
|
|
|
|
unit_name = (parsed.get("unit") or "").strip()
|
|
if unit_name:
|
|
unit_id = self._resolve_unit(
|
|
unit_name, unit_index,
|
|
create_missing=create_missing,
|
|
created_log=created_units,
|
|
)
|
|
if unit_id:
|
|
new_ing["unit"] = {"id": unit_id, "name": unit_name}
|
|
else:
|
|
new_ing["unit"] = None
|
|
|
|
new_ing["note"] = parsed.get("note") or ""
|
|
new_ingredients.append(new_ing)
|
|
|
|
recipe["recipeIngredient"] = new_ingredients
|
|
self.mealie.update_recipe(slug, recipe)
|
|
|
|
return {
|
|
"slug": slug,
|
|
"updated": len(new_ingredients),
|
|
"created_foods": created_foods,
|
|
"created_units": created_units,
|
|
}
|
|
|
|
# --- food/unit resolution helpers --------------------------------------
|
|
|
|
def _resolve_food(
|
|
self,
|
|
name: str,
|
|
index: dict[str, str],
|
|
*,
|
|
create_missing: bool,
|
|
created_log: list[str],
|
|
) -> str | None:
|
|
"""Find or create a Mealie food row, robust to normalization gaps."""
|
|
key = name.lower()
|
|
|
|
# Step 1: local cache hit (covers name + pluralName from list_foods)
|
|
if key in index:
|
|
return index[key]
|
|
|
|
# Step 2: server-side search — Mealie does proper normalization here
|
|
existing_id = self._search_for_match(name, "foods")
|
|
if existing_id:
|
|
index[key] = existing_id
|
|
return existing_id
|
|
|
|
# Step 3: create. If Mealie races us with a UNIQUE-constraint 400,
|
|
# search again and use whatever it has under the normalized form.
|
|
if not create_missing:
|
|
return None
|
|
try:
|
|
created = self.mealie.create_food(name)
|
|
food_id = created.get("id")
|
|
except MealieError as e:
|
|
msg = str(e)
|
|
if "UNIQUE constraint" in msg or "400" in msg:
|
|
food_id = self._search_for_match(name, "foods")
|
|
if not food_id:
|
|
raise # truly couldn't reconcile — let caller record error
|
|
else:
|
|
raise
|
|
if food_id:
|
|
index[key] = food_id
|
|
created_log.append(name)
|
|
return food_id
|
|
|
|
def _resolve_unit(
|
|
self,
|
|
name: str,
|
|
index: dict[str, str],
|
|
*,
|
|
create_missing: bool,
|
|
created_log: list[str],
|
|
) -> str | None:
|
|
key = name.lower()
|
|
if key in index:
|
|
return index[key]
|
|
existing_id = self._search_for_match(name, "units")
|
|
if existing_id:
|
|
index[key] = existing_id
|
|
return existing_id
|
|
if not create_missing:
|
|
return None
|
|
try:
|
|
created = self.mealie.create_unit(name)
|
|
unit_id = created.get("id")
|
|
except MealieError as e:
|
|
msg = str(e)
|
|
if "UNIQUE constraint" in msg or "400" in msg:
|
|
unit_id = self._search_for_match(name, "units")
|
|
if not unit_id:
|
|
raise
|
|
else:
|
|
raise
|
|
if unit_id:
|
|
index[key] = unit_id
|
|
created_log.append(name)
|
|
return unit_id
|
|
|
|
def _search_for_match(self, name: str, kind: str) -> str | None:
|
|
"""Use Mealie's search endpoint to find a foods/units row matching
|
|
`name`. Returns the id of the first item whose name or pluralName
|
|
matches (case-insensitive) the query, else None."""
|
|
target = name.strip().lower()
|
|
if not target:
|
|
return None
|
|
listing = (self.mealie.list_foods(search=name)
|
|
if kind == "foods"
|
|
else self.mealie.list_units(search=name))
|
|
items = listing.get("items") or listing.get("data") or []
|
|
# Mealie's search returns ranked results; take the first exact-ish match
|
|
for item in items:
|
|
for field in ("name", "pluralName"):
|
|
v = (item.get(field) or "").strip().lower()
|
|
if v and v == target:
|
|
return item.get("id")
|
|
# Fallback: if there's exactly one search hit, trust Mealie's ranker
|
|
if len(items) == 1 and items[0].get("id"):
|
|
return items[0]["id"]
|
|
return None
|
|
|
|
# --- private ------------------------------------------------------------
|
|
|
|
def _parse_batch(self, strings: list[str]) -> list[IngredientParse]:
|
|
prompt = json.dumps({"ingredients": strings}, ensure_ascii=False)
|
|
try:
|
|
resp = self.forge.run(
|
|
prompt=prompt,
|
|
model=self.model,
|
|
system=STERILIZE_SYSTEM,
|
|
timeout_secs=120,
|
|
)
|
|
except ForgeError as e:
|
|
raise RuntimeError(f"clawdforge failed: {e}") from e
|
|
|
|
result = resp.get("result")
|
|
if not isinstance(result, dict) or "parses" not in result:
|
|
raise RuntimeError(f"unexpected response shape: {str(result)[:200]}")
|
|
|
|
parses_raw = result["parses"]
|
|
if not isinstance(parses_raw, list) or len(parses_raw) != len(strings):
|
|
raise RuntimeError(
|
|
f"parse count mismatch: got {len(parses_raw)}, expected {len(strings)}"
|
|
)
|
|
|
|
out: list[IngredientParse] = []
|
|
for p in parses_raw:
|
|
out.append(
|
|
IngredientParse(
|
|
quantity=_coerce_float(p.get("quantity")),
|
|
unit=_clean_str(p.get("unit")),
|
|
food=_clean_str(p.get("food")),
|
|
note=_clean_str(p.get("note")),
|
|
approx=bool(p.get("approx")),
|
|
)
|
|
)
|
|
return out
|
|
|
|
@staticmethod
|
|
def _build_name_index(listing: dict) -> dict[str, str]:
|
|
index: dict[str, str] = {}
|
|
items = listing.get("items") or listing.get("data") or []
|
|
for item in items:
|
|
if name := item.get("name"):
|
|
index[name.lower()] = item["id"]
|
|
if plural := item.get("pluralName"):
|
|
index[plural.lower()] = item["id"]
|
|
return index
|
|
|
|
|
|
def _render_ingredient_for_parse(ing: dict) -> str:
|
|
"""Best string representation of a Mealie ingredient for sending to Claude."""
|
|
if ing.get("originalText"):
|
|
return ing["originalText"]
|
|
if ing.get("display"):
|
|
return ing["display"]
|
|
parts: list[str] = []
|
|
if (q := ing.get("quantity")) is not None:
|
|
parts.append(str(q))
|
|
if u := ing.get("unit"):
|
|
parts.append(u.get("name") or "")
|
|
if f := ing.get("food"):
|
|
parts.append(f.get("name") or "")
|
|
if note := ing.get("note"):
|
|
parts.append(note)
|
|
return " ".join(p for p in parts if p).strip() or "(empty)"
|
|
|
|
|
|
def _coerce_float(v) -> float | None:
|
|
if v is None:
|
|
return None
|
|
try:
|
|
return float(v)
|
|
except (TypeError, ValueError):
|
|
return None
|
|
|
|
|
|
def _clean_str(v) -> str | None:
|
|
if v is None:
|
|
return None
|
|
s = str(v).strip()
|
|
return s or None
|
|
|
|
|
|
def _proposal_to_dict(p: IngredientProposal) -> dict:
|
|
d = asdict(p)
|
|
return d
|