"""Ingredient sterilizer — turn Mealie's free-form ingredient strings into structured (qty, unit, food, note) so shopping-list aggregation works. Why this exists: Mealie has its own CRF parser, but it's mediocre and produces inconsistent results. Cobb's hand-typed recipes have lots of "about 2 cups cooked white rice" / "1 small handful kale" / "a pinch of salt" etc. that slip past the parser. We send these to Sonnet via clawdforge and get back clean structured form. Flow: 1. Fetch the recipe from Mealie 2. Build a single batched prompt with all ingredients (one Sonnet call/recipe) 3. Get back a parallel array of {quantity, unit, food, note} 4. (preview) return the proposal 5. (apply) link each parse to existing Mealie food/unit (create if missing), then PUT the updated recipe back """ import json from dataclasses import dataclass, asdict from .forge import Forge, ForgeError from .mealie import Mealie, MealieError STERILIZE_SYSTEM = """You are a precise recipe ingredient parser. You ONLY output valid JSON. You receive a list of free-form ingredient strings and must return a parallel array where each item is parsed into structured form. Output schema (per item): { "quantity": , # numeric amount, fractions converted to decimals (1/2 -> 0.5) "unit": , # singular canonical form: "cup", "tbsp", "tsp", "oz", "lb", "g", "kg", "ml", "l", "clove", "slice", "can", "package", "piece", "pinch", "dash", "handful". null if no unit (e.g. "1 onion"). "food": , # the core food noun in singular canonical form: "onion", "garlic", "rice", "olive oil". Strip prep state ("chopped", "diced") -- those go in note. "note": , # prep state, brand, color, modifier: "chopped", "extra virgin", "yellow", "to taste" "approx": # true if the input said "about" / "a pinch" / "to taste" / vague qty } Rules: - Convert fractions: "1/2" -> 0.5, "1 1/4" -> 1.25 - "a pinch", "a dash", "to taste" -> {quantity: null, approx: true, note: "to taste"} - "1 small onion" -> {quantity: 1, unit: null, food: "onion", note: "small"} - "2 cloves garlic, minced" -> {quantity: 2, unit: "clove", food: "garlic", note: "minced"} - Section headers like "For the sauce:" -> all fields null EXCEPT note: "
" - If you genuinely cannot parse, set all fields null and put the original in note. - DO NOT add fields not in the schema. - DO NOT wrap output in markdown fences. - DO NOT include any prose before or after the JSON. You will be given a JSON object: {"ingredients": ["str", "str", ...]} You return: {"parses": [{...}, {...}, ...]} -- same length, same order. """ @dataclass class IngredientParse: quantity: float | None unit: str | None food: str | None note: str | None approx: bool @dataclass class IngredientProposal: """One ingredient before vs after.""" index: int original_display: str original_quantity: float | None original_unit_name: str | None original_food_name: str | None original_note: str | None parsed: IngredientParse class Sterilizer: def __init__(self, *, mealie: Mealie, forge: Forge, model: str = "sonnet"): self.mealie = mealie self.forge = forge self.model = model # --- public ------------------------------------------------------------- def preview_recipe(self, slug: str) -> dict: """Dry-run: parse all ingredients, return proposals without writing.""" recipe = self.mealie.get_recipe(slug) ingredients = recipe.get("recipeIngredient") or [] if not ingredients: return {"slug": slug, "name": recipe.get("name"), "proposals": []} strings = [_render_ingredient_for_parse(ing) for ing in ingredients] parses = self._parse_batch(strings) proposals: list[IngredientProposal] = [] for i, (ing, parse) in enumerate(zip(ingredients, parses)): proposals.append( IngredientProposal( index=i, original_display=ing.get("display") or "", original_quantity=ing.get("quantity"), original_unit_name=(ing.get("unit") or {}).get("name") if ing.get("unit") else None, original_food_name=(ing.get("food") or {}).get("name") if ing.get("food") else None, original_note=ing.get("note"), parsed=parse, ) ) return { "slug": slug, "name": recipe.get("name"), "ingredient_count": len(ingredients), "proposals": [_proposal_to_dict(p) for p in proposals], } def apply_recipe(self, slug: str, *, create_missing: bool = True) -> dict: """Run preview, then write changes back to Mealie. For each ingredient we resolve (or create) Mealie food/unit by name, then assemble the new recipeIngredient list and PUT the recipe. Mealie normalizes food/unit names more aggressively than .lower() (its name_normalized strips punctuation + collapses whitespace + unicode-folds). So a local-cache miss followed by a blind create can hit Mealie's UNIQUE constraint on (name, group_id). We ALWAYS try the search endpoint as a tie-break before creating, and on a UNIQUE-violation 400 we re-search and adopt whatever Mealie has under that normalized form. """ preview = self.preview_recipe(slug) proposals = preview["proposals"] if not proposals: return {"slug": slug, "updated": 0, "skipped": 0, "created_foods": [], "created_units": []} recipe = self.mealie.get_recipe(slug) food_index = self._build_name_index(self.mealie.list_foods()) unit_index = self._build_name_index(self.mealie.list_units()) created_foods: list[str] = [] created_units: list[str] = [] new_ingredients: list[dict] = [] for orig_ing, prop in zip(recipe.get("recipeIngredient") or [], proposals): parsed = prop["parsed"] new_ing = dict(orig_ing) # preserve id, refId, original_text new_ing["quantity"] = parsed["quantity"] food_name = (parsed.get("food") or "").strip() if food_name: food_id = self._resolve_food( food_name, food_index, create_missing=create_missing, created_log=created_foods, ) if food_id: new_ing["food"] = {"id": food_id, "name": food_name} new_ing["isFood"] = True else: # Section header style — clear food, mark not-food new_ing["food"] = None new_ing["isFood"] = False unit_name = (parsed.get("unit") or "").strip() if unit_name: unit_id = self._resolve_unit( unit_name, unit_index, create_missing=create_missing, created_log=created_units, ) if unit_id: new_ing["unit"] = {"id": unit_id, "name": unit_name} else: new_ing["unit"] = None new_ing["note"] = parsed.get("note") or "" new_ingredients.append(new_ing) recipe["recipeIngredient"] = new_ingredients self.mealie.update_recipe(slug, recipe) return { "slug": slug, "updated": len(new_ingredients), "created_foods": created_foods, "created_units": created_units, } # --- food/unit resolution helpers -------------------------------------- def _resolve_food( self, name: str, index: dict[str, str], *, create_missing: bool, created_log: list[str], ) -> str | None: """Find or create a Mealie food row, robust to normalization gaps.""" key = name.lower() # Step 1: local cache hit (covers name + pluralName from list_foods) if key in index: return index[key] # Step 2: server-side search — Mealie does proper normalization here existing_id = self._search_for_match(name, "foods") if existing_id: index[key] = existing_id return existing_id # Step 3: create. If Mealie races us with a UNIQUE-constraint 400, # search again and use whatever it has under the normalized form. if not create_missing: return None try: created = self.mealie.create_food(name) food_id = created.get("id") except MealieError as e: msg = str(e) if "UNIQUE constraint" in msg or "400" in msg: food_id = self._search_for_match(name, "foods") if not food_id: raise # truly couldn't reconcile — let caller record error else: raise if food_id: index[key] = food_id created_log.append(name) return food_id def _resolve_unit( self, name: str, index: dict[str, str], *, create_missing: bool, created_log: list[str], ) -> str | None: key = name.lower() if key in index: return index[key] existing_id = self._search_for_match(name, "units") if existing_id: index[key] = existing_id return existing_id if not create_missing: return None try: created = self.mealie.create_unit(name) unit_id = created.get("id") except MealieError as e: msg = str(e) if "UNIQUE constraint" in msg or "400" in msg: unit_id = self._search_for_match(name, "units") if not unit_id: raise else: raise if unit_id: index[key] = unit_id created_log.append(name) return unit_id def _search_for_match(self, name: str, kind: str) -> str | None: """Use Mealie's search endpoint to find a foods/units row matching `name`. Returns the id of the first item whose name or pluralName matches (case-insensitive) the query, else None.""" target = name.strip().lower() if not target: return None listing = (self.mealie.list_foods(search=name) if kind == "foods" else self.mealie.list_units(search=name)) items = listing.get("items") or listing.get("data") or [] # Mealie's search returns ranked results; take the first exact-ish match for item in items: for field in ("name", "pluralName"): v = (item.get(field) or "").strip().lower() if v and v == target: return item.get("id") # Fallback: if there's exactly one search hit, trust Mealie's ranker if len(items) == 1 and items[0].get("id"): return items[0]["id"] return None # --- private ------------------------------------------------------------ def _parse_batch(self, strings: list[str]) -> list[IngredientParse]: prompt = json.dumps({"ingredients": strings}, ensure_ascii=False) try: resp = self.forge.run( prompt=prompt, model=self.model, system=STERILIZE_SYSTEM, timeout_secs=120, ) except ForgeError as e: raise RuntimeError(f"clawdforge failed: {e}") from e result = resp.get("result") if not isinstance(result, dict) or "parses" not in result: raise RuntimeError(f"unexpected response shape: {str(result)[:200]}") parses_raw = result["parses"] if not isinstance(parses_raw, list) or len(parses_raw) != len(strings): raise RuntimeError( f"parse count mismatch: got {len(parses_raw)}, expected {len(strings)}" ) out: list[IngredientParse] = [] for p in parses_raw: out.append( IngredientParse( quantity=_coerce_float(p.get("quantity")), unit=_clean_str(p.get("unit")), food=_clean_str(p.get("food")), note=_clean_str(p.get("note")), approx=bool(p.get("approx")), ) ) return out @staticmethod def _build_name_index(listing: dict) -> dict[str, str]: index: dict[str, str] = {} items = listing.get("items") or listing.get("data") or [] for item in items: if name := item.get("name"): index[name.lower()] = item["id"] if plural := item.get("pluralName"): index[plural.lower()] = item["id"] return index def _render_ingredient_for_parse(ing: dict) -> str: """Best string representation of a Mealie ingredient for sending to Claude.""" if ing.get("originalText"): return ing["originalText"] if ing.get("display"): return ing["display"] parts: list[str] = [] if (q := ing.get("quantity")) is not None: parts.append(str(q)) if u := ing.get("unit"): parts.append(u.get("name") or "") if f := ing.get("food"): parts.append(f.get("name") or "") if note := ing.get("note"): parts.append(note) return " ".join(p for p in parts if p).strip() or "(empty)" def _coerce_float(v) -> float | None: if v is None: return None try: return float(v) except (TypeError, ValueError): return None def _clean_str(v) -> str | None: if v is None: return None s = str(v).strip() return s or None def _proposal_to_dict(p: IngredientProposal) -> dict: d = asdict(p) return d