From 6bcf79e5dc6c81c3b8555cdca8c4bf4c0788ee51 Mon Sep 17 00:00:00 2001 From: Kayos Date: Thu, 30 Apr 2026 12:27:17 -0700 Subject: [PATCH] sterilize: recipe context + spell cleanup + defensive food.id preservation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three improvements driven by Cobb's review of the fan-out output: 1. Recipe context. _parse_batch now accepts an optional recipe_context dict carrying recipe_name, recipe_description, and recipe_steps. preview_recipe builds the context from the Mealie recipe and passes it through. The Sonnet prompt has new USE RECIPE CONTEXT WHEN AMBIGUOUS rules: "1 cup flour" is ambiguous (AP / bread / cake); the cooking steps usually disambiguate ("knead until elastic" → bread flour, "sift with cocoa powder" + cake recipe → cake flour). Step text capped to 3000 chars so the user prompt stays modest; defaults to all-purpose flour when steps don't disambiguate. Brand/style hints in the description carry through too. 2. Spell + grammar cleanup. New SPELL/GRAMMAR CLEANUP rules in the prompt: silently fix typos in food and note ("tomatos" → "tomatoes", "chopped finly" → "chopped finely", "heavy cram" → "heavy cream"). Normalize spacing. Critically: preserve EVERY semantic value — numeric quantities verbatim, every prep state, brand, color. When uncertain whether something is a typo or intentional ("yellow squash" is a real food, not a typo), keep it. Original strings stay in originalText for audit / rollback. 3. Defensive food.id preservation in apply_recipe. Three new safeguards protect against Sonnet hallucinations dropping live recipe data: a) If Sonnet returns a single all-null parsed item but the original Mealie row had a real food.id, pass the original through verbatim. (Sonnet probably parse-failed; never blank a real link.) b) When Sonnet returns a food name that we can't resolve in Mealie's catalog AND the original had a food.id, preserve the original link rather than emit food=null. c) When Sonnet explicitly returns food=null on the first child of an ingredient that originally had a food.id, treat that as a misread and preserve the original. Real section headers — where the original was ALREADY foodless — still pass through cleanly. Net effect: no apply path can drop a recipe's existing food reference. Sonnet can ADD food links (good), CHANGE them (good), or fail to parse (we keep what was there). It cannot remove them. The is_new_food field also benefits from recipe context — Sonnet has more evidence to set is_new_food=false (matched a known canonical) when the steps confirm the ingredient identity. --- cauldron/sterilizer.py | 136 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 127 insertions(+), 9 deletions(-) diff --git a/cauldron/sterilizer.py b/cauldron/sterilizer.py index a75ab6d..f1554eb 100644 --- a/cauldron/sterilizer.py +++ b/cauldron/sterilizer.py @@ -87,7 +87,37 @@ PARSE RULES (for the common 1→1 case): - DO NOT wrap output in markdown fences. - DO NOT include any prose before or after the JSON. -Input shape: {{"ingredients": ["str", "str", ...]}} +SPELL/GRAMMAR CLEANUP (no info loss): +- Silently fix obvious typos in food and note: "tomatos" → "tomatoes", + "all-purpose flouur" → "all-purpose flour", "chopped finly" → "chopped finely", + "1 cup heavy cram" → food: "heavy cream". +- Normalize spacing: "1 cup rice" → quantity 1 / unit cup / food rice. +- Preserve EVERY semantic value: numeric quantities verbatim, every prep state, + brand, color, cooking method. If you're unsure whether something is a typo + or intentional (e.g. "yellow squash" is a real food, not a typo of "squash"), + keep it. + +USE RECIPE CONTEXT WHEN AMBIGUOUS: +- The user prompt may include `recipe_name`, `recipe_description`, and + `recipe_steps` alongside the ingredients. When an ingredient is ambiguous + (e.g. just "1 cup flour" — could be all-purpose, bread, cake, etc.), use + the cooking steps to disambiguate. + - Steps say "knead until smooth and elastic" → bread flour + - Steps say "sift with cocoa powder" + recipe is a cake → cake flour + - Steps don't disambiguate → default to all-purpose flour +- If the recipe description names a brand/style, use it. +- DO NOT add ingredients that aren't in the input list. Steps may mention + garnishes that weren't in the ingredient list — leave them out. +- When the steps confirm what an ingredient is, set is_new_food=false + (you have evidence it matches a known canonical) when possible. + +Input shape: + {{"recipe_name": "...", "recipe_description": "...", + "recipe_steps": ["step text", ...], + "ingredients": ["str", "str", ...]}} +The recipe_name/description/steps fields are optional context — they may +be empty or missing; ignore them in that case. + Output shape: {{"parses": [[{{...}}, {{...}}], [{{...}}], [{{...}}, {{...}}, {{...}}], ...]}} The outer list MUST have the same length as the input list. Each inner list MUST contain at least 1 item (use the all-null junk-fallback if needed). @@ -135,14 +165,19 @@ class Sterilizer: """Dry-run: parse all ingredients, return proposals without writing. Each input ingredient produces one IngredientProposal whose - parsed_items list has length 1 (normal case) or N (fan-out).""" + parsed_items list has length 1 (normal case) or N (fan-out). + + Recipe context (name + description + cooking steps) is bundled + into the Sonnet call so ambiguous ingredients ("1 cup flour") + can be disambiguated by what the recipe actually does with them.""" recipe = self.mealie.get_recipe(slug) ingredients = recipe.get("recipeIngredient") or [] if not ingredients: return {"slug": slug, "name": recipe.get("name"), "proposals": []} strings = [_render_ingredient_for_parse(ing) for ing in ingredients] - parses_per_input = self._parse_batch(strings) + recipe_context = _build_recipe_context(recipe) + parses_per_input = self._parse_batch(strings, recipe_context=recipe_context) proposals: list[IngredientProposal] = [] for i, (ing, items) in enumerate(zip(ingredients, parses_per_input)): @@ -205,6 +240,25 @@ class Sterilizer: new_ingredients.append(dict(orig_ing)) continue + # Defensive: if Sonnet returned a single all-null item but the + # original ingredient had a real Mealie food link, this is + # almost certainly a hallucination/parse failure. Pass the + # original through unchanged rather than blank the row. + orig_food = (orig_ing.get("food") or {}) if isinstance(orig_ing.get("food"), dict) else {} + orig_food_id = orig_food.get("id") + orig_food_name = orig_food.get("name") + if ( + len(items) == 1 + and not (items[0].get("food") or "").strip() + and not (items[0].get("note") or "").strip() + and (items[0].get("quantity") in (None, "")) + and orig_food_id + ): + # Sonnet returned all-null for a row that already had data. + # Preserve the original verbatim — never lose a recipe link. + new_ingredients.append(dict(orig_ing)) + continue + for child_idx, parsed in enumerate(items): if child_idx == 0: # First child inherits id/refId/originalText from the @@ -236,13 +290,29 @@ class Sterilizer: if food_id: new_ing["food"] = {"id": food_id, "name": food_name} new_ing["isFood"] = True + elif child_idx == 0 and orig_food_id: + # Sonnet wanted to set a different food but we + # couldn't resolve. Preserve the original link + # rather than blank — never lose recipe data. + new_ing["food"] = {"id": orig_food_id, "name": orig_food_name or ""} + new_ing["isFood"] = True else: new_ing["food"] = None new_ing["isFood"] = False else: - # Section header style — clear food, mark not-food - new_ing["food"] = None - new_ing["isFood"] = False + # Sonnet returned no food. Distinguish two cases: + if child_idx == 0 and orig_food_id: + # Original had a Mealie food link → defensive: KEEP + # it. Sonnet probably misread the input as a section + # header but the recipe author already linked it. + new_ing["food"] = {"id": orig_food_id, "name": orig_food_name or ""} + new_ing["isFood"] = True + else: + # True section header (original was already foodless) + # OR fan-out child (these are net-new rows; if Sonnet + # didn't pick a food for them, drop quietly) + new_ing["food"] = None + new_ing["isFood"] = False unit_name = (parsed.get("unit") or "").strip() if unit_name: @@ -438,11 +508,26 @@ class Sterilizer: # --- per-batch Sonnet call --------------------------------------------- - def _parse_batch(self, strings: list[str]) -> list[list[IngredientParse]]: + def _parse_batch( + self, + strings: list[str], + *, + recipe_context: dict | None = None, + ) -> list[list[IngredientParse]]: """Returns list-of-lists matching the input length. Each inner list is the parses derived from one input string (1 in normal case, N - for fan-out, never 0).""" - prompt = json.dumps({"ingredients": strings}, ensure_ascii=False) + for fan-out, never 0). + + recipe_context is optional — when provided, includes recipe_name, + recipe_description, and recipe_steps so Sonnet can disambiguate + unclear ingredients via the cooking steps.""" + body: dict = {"ingredients": strings} + if recipe_context: + for k in ("recipe_name", "recipe_description", "recipe_steps"): + v = recipe_context.get(k) + if v: + body[k] = v + prompt = json.dumps(body, ensure_ascii=False) try: resp = self.forge.run( prompt=prompt, @@ -506,6 +591,39 @@ class Sterilizer: return index +def _build_recipe_context(recipe: dict) -> dict: + """Pull name + description + cooking steps off a Mealie recipe in a + Sonnet-friendly shape. Capped to ~3000 chars total of step text so + the user prompt doesn't blow past Sonnet's reasonable input size.""" + out: dict = {} + name = (recipe.get("name") or "").strip() + if name: + out["recipe_name"] = name[:200] + desc = (recipe.get("description") or "").strip() + if desc: + out["recipe_description"] = desc[:600] + + instructions = recipe.get("recipeInstructions") or [] + steps: list[str] = [] + char_budget = 3000 + for step in instructions: + if not isinstance(step, dict): + continue + text = (step.get("text") or "").strip() + if not text: + continue + if char_budget <= 0: + break + # Truncate individual step if needed + if len(text) > char_budget: + text = text[:char_budget] + "…" + steps.append(text) + char_budget -= len(text) + if steps: + out["recipe_steps"] = steps + return out + + def _render_ingredient_for_parse(ing: dict) -> str: """Best string representation of a Mealie ingredient for sending to Claude.""" if ing.get("originalText"):