From d359bed450516efa2f264e258ea7a997ad560502 Mon Sep 17 00:00:00 2001 From: Kayos Date: Thu, 30 Apr 2026 11:07:20 -0700 Subject: [PATCH] sterilize: fan-out compound lines + filter identity rows in diff UI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cobb spotted job 4's proposals weren't actually doing useful work — "Toppings (Cinnamon Butter, Marshmallows, Ground Cinnamon, Butter, Etc)" came back unchanged because the prompt was rigidly 1-in-1-out and treated the whole compound line as a section header. Same for "salt and ground black pepper to taste" — should be 2 separate shopping list items but the parser kept them as one note. Three changes: 1. STERILIZE_SYSTEM rewritten to allow fan-out. New return shape is list-of-lists: outer list mirrors input length, each inner list has 1 item (normal case) or N items (fan-out). Explicit fan-out rules cover the two patterns Cobb cares about: - "salt and pepper" / "X and Y to taste" → 2 items - "Toppings (a, b, c, etc)" / "Optional: A, B, C" → N items, wrapper word dropped, filler ("etc") skipped Plus a heuristic against accidentally splitting compound food names ("salt and vinegar chips", "macaroni and cheese" → keep). 2. _parse_batch + IngredientProposal + apply_recipe all updated for the new shape. IngredientProposal.parsed → parsed_items: list. apply_recipe iterates each child: - First child inherits the original Mealie row's id/refId/ originalText so existing references stay live - Additional children are fresh dicts; Mealie generates ids on save when none provided Backward-compat fallback in apply_recipe accepts the legacy single-parsed shape so any in-flight job 4 proposals still apply cleanly. 3. /sterilize UI was→becomes table now: - Renders one row per parsed child (rowspan'd "was" cell when fanning out, with a "→³" superscript marker on the arrow) - Drops identity rows (1→1 case where parse matches original verbatim) so the diff shows only ACTUAL changes — fixes Cobb's "this doesn't look sterilized at all" complaint where every diff was identical - Cards with all-identity proposals show "no changes proposed (all ingredients already matched)" instead of an empty table Job 4's stored proposals use the legacy 1→1 shape so won't show fan-out until re-walked. Recommend cancelling job 4 and starting a fresh job 5 with the new prompt to see the toppings line break out properly. --- cauldron/sterilizer.py | 204 +++++++++++++++++++++--------- cauldron/templates/sterilize.html | 83 ++++++++---- 2 files changed, 203 insertions(+), 84 deletions(-) diff --git a/cauldron/sterilizer.py b/cauldron/sterilizer.py index ff7e263..16c31f4 100644 --- a/cauldron/sterilizer.py +++ b/cauldron/sterilizer.py @@ -23,31 +23,52 @@ from .mealie import Mealie, MealieError STERILIZE_SYSTEM = """You are a precise recipe ingredient parser. You ONLY output valid JSON. -You receive a list of free-form ingredient strings and must return a parallel -array where each item is parsed into structured form. +You receive a list of free-form ingredient strings and return a parallel +list of LISTS — one inner list per input. Most inputs map 1→1 (single item +inside the list). Compound lines that name multiple distinct foods MUST +fan out into multiple items so each food gets its own row on the shopping +list. -Output schema (per item): +Per-item schema: { - "quantity": , # numeric amount, fractions converted to decimals (1/2 -> 0.5) - "unit": , # singular canonical form: "cup", "tbsp", "tsp", "oz", "lb", "g", "kg", "ml", "l", "clove", "slice", "can", "package", "piece", "pinch", "dash", "handful". null if no unit (e.g. "1 onion"). - "food": , # the core food noun in singular canonical form: "onion", "garlic", "rice", "olive oil". Strip prep state ("chopped", "diced") -- those go in note. + "quantity": , # numeric amount; fractions → decimals (1/2 → 0.5) + "unit": , # singular canonical: "cup", "tbsp", "tsp", "oz", "lb", "g", "kg", "ml", "l", "clove", "slice", "can", "package", "piece", "pinch", "dash", "handful". null if no unit (e.g. "1 onion"). + "food": , # core food noun, singular canonical lowercase: "onion", "garlic", "rice", "olive oil". Strip prep state ("chopped", "diced") into note. "note": , # prep state, brand, color, modifier: "chopped", "extra virgin", "yellow", "to taste" - "approx": # true if the input said "about" / "a pinch" / "to taste" / vague qty + "approx": # true if input said "about", "a pinch", "to taste", or otherwise vague } -Rules: -- Convert fractions: "1/2" -> 0.5, "1 1/4" -> 1.25 -- "a pinch", "a dash", "to taste" -> {quantity: null, approx: true, note: "to taste"} -- "1 small onion" -> {quantity: 1, unit: null, food: "onion", note: "small"} -- "2 cloves garlic, minced" -> {quantity: 2, unit: "clove", food: "garlic", note: "minced"} -- Section headers like "For the sauce:" -> all fields null EXCEPT note: "
" -- If you genuinely cannot parse, set all fields null and put the original in note. +FAN-OUT RULES — return MULTIPLE items for one input when: +- "salt and pepper" / "salt and ground black pepper to taste" → split into 2 items, each + {quantity: 1, unit: "dash", food: "salt"|"black pepper", note: "to taste", approx: true} +- "Toppings (cinnamon butter, marshmallows, ground cinnamon, butter, etc)" or + "Optional: cilantro, lime, queso fresco" → one item per food in the comma list. + Drop the wrapper word ("Toppings", "Optional"); leave it OUT of food/note. Skip + filler words like "etc". Each item: quantity=null, unit=null, food=, note=null, approx=true. +- "1 lemon, juice and zest" → 2 items: {qty:1, unit:null, food:"lemon juice"} and {qty:1, unit:null, food:"lemon zest"} +- DO NOT split "salt and vinegar chips" or "macaroni and cheese" — those are + compound food names, not multi-food lines. Heuristic: if the words on either + side of "and" are a recognized standalone food, split; otherwise keep as one. + +PARSE RULES (for the common 1→1 case): +- Convert fractions: "1/2" → 0.5, "1 1/4" → 1.25 +- "a pinch" / "a dash" alone → {quantity: 1, unit: "pinch"|"dash", approx: true} +- "to taste" alone → {quantity: null, unit: null, food: , note: "to taste", approx: true} +- "1 small onion" → {quantity: 1, unit: null, food: "onion", note: "small"} +- "2 cloves garlic, minced" → {quantity: 2, unit: "clove", food: "garlic", note: "minced"} +- "1.5 cups broccoli (coarsely chopped florets)" → {quantity: 1.5, unit: "cup", food: "broccoli", note: "coarsely chopped florets"} +- Section headers like "For the sauce:" → 1 item with all fields null EXCEPT + note: "
" (so Mealie can preserve the header row) +- If you genuinely cannot parse (junk input), return 1 item with all fields null + and the original string in note. - DO NOT add fields not in the schema. - DO NOT wrap output in markdown fences. - DO NOT include any prose before or after the JSON. -You will be given a JSON object: {"ingredients": ["str", "str", ...]} -You return: {"parses": [{...}, {...}, ...]} -- same length, same order. +Input shape: {"ingredients": ["str", "str", ...]} +Output shape: {"parses": [[{...}, {...}], [{...}], [{...}, {...}, {...}], ...]} +The outer list MUST have the same length as the input list. Each inner list +MUST contain at least 1 item (use the all-null junk-fallback if needed). """ @@ -62,14 +83,16 @@ class IngredientParse: @dataclass class IngredientProposal: - """One ingredient before vs after.""" + """One original ingredient → one or more parsed children. parsed_items + has length 1 in normal cases; >1 when a compound line was fanned out + ("Toppings (a, b, c)" → 3 children, "salt and pepper" → 2 children).""" index: int original_display: str original_quantity: float | None original_unit_name: str | None original_food_name: str | None original_note: str | None - parsed: IngredientParse + parsed_items: list[IngredientParse] class Sterilizer: @@ -81,17 +104,20 @@ class Sterilizer: # --- public ------------------------------------------------------------- def preview_recipe(self, slug: str) -> dict: - """Dry-run: parse all ingredients, return proposals without writing.""" + """Dry-run: parse all ingredients, return proposals without writing. + + Each input ingredient produces one IngredientProposal whose + parsed_items list has length 1 (normal case) or N (fan-out).""" recipe = self.mealie.get_recipe(slug) ingredients = recipe.get("recipeIngredient") or [] if not ingredients: return {"slug": slug, "name": recipe.get("name"), "proposals": []} strings = [_render_ingredient_for_parse(ing) for ing in ingredients] - parses = self._parse_batch(strings) + parses_per_input = self._parse_batch(strings) proposals: list[IngredientProposal] = [] - for i, (ing, parse) in enumerate(zip(ingredients, parses)): + for i, (ing, items) in enumerate(zip(ingredients, parses_per_input)): proposals.append( IngredientProposal( index=i, @@ -100,7 +126,7 @@ class Sterilizer: original_unit_name=(ing.get("unit") or {}).get("name") if ing.get("unit") else None, original_food_name=(ing.get("food") or {}).get("name") if ing.get("food") else None, original_note=ing.get("note"), - parsed=parse, + parsed_items=items, ) ) @@ -138,40 +164,74 @@ class Sterilizer: new_ingredients: list[dict] = [] for orig_ing, prop in zip(recipe.get("recipeIngredient") or [], proposals): - parsed = prop["parsed"] - new_ing = dict(orig_ing) # preserve id, refId, original_text + # Each proposal can produce 1+ parsed children (fan-out for + # compound inputs like "Toppings (a, b, c)" or "salt and pepper"). + # Keep the proposal_json key flexible: prefer parsed_items but + # fall back to a single 'parsed' for backward-compat. + items = prop.get("parsed_items") + if not isinstance(items, list) or not items: + legacy = prop.get("parsed") + items = [legacy] if isinstance(legacy, dict) else [] + if not items: + # Nothing to write — pass the original through unchanged + new_ingredients.append(dict(orig_ing)) + continue - new_ing["quantity"] = parsed["quantity"] + for child_idx, parsed in enumerate(items): + if child_idx == 0: + # First child inherits id/refId/originalText from the + # original Mealie row, so existing references stay live + new_ing = dict(orig_ing) + else: + # Additional children are fresh rows. Mealie generates + # ids on save when none provided. + new_ing = { + # Inherit refId so all fan-out children belong to + # the same logical group as the original. Some + # Mealie versions tolerate dup refIds; others + # generate one if missing. + "referenceId": orig_ing.get("referenceId"), + "title": None, + "originalText": orig_ing.get("originalText") or orig_ing.get("display"), + "disableAmount": False, + } - food_name = (parsed.get("food") or "").strip() - if food_name: - food_id = self._resolve_food( - food_name, food_index, - create_missing=create_missing, - created_log=created_foods, - ) - if food_id: - new_ing["food"] = {"id": food_id, "name": food_name} - new_ing["isFood"] = True - else: - # Section header style — clear food, mark not-food - new_ing["food"] = None - new_ing["isFood"] = False + new_ing["quantity"] = parsed.get("quantity") - unit_name = (parsed.get("unit") or "").strip() - if unit_name: - unit_id = self._resolve_unit( - unit_name, unit_index, - create_missing=create_missing, - created_log=created_units, - ) - if unit_id: - new_ing["unit"] = {"id": unit_id, "name": unit_name} - else: - new_ing["unit"] = None + food_name = (parsed.get("food") or "").strip() + if food_name: + food_id = self._resolve_food( + food_name, food_index, + create_missing=create_missing, + created_log=created_foods, + ) + if food_id: + new_ing["food"] = {"id": food_id, "name": food_name} + new_ing["isFood"] = True + else: + new_ing["food"] = None + new_ing["isFood"] = False + else: + # Section header style — clear food, mark not-food + new_ing["food"] = None + new_ing["isFood"] = False - new_ing["note"] = parsed.get("note") or "" - new_ingredients.append(new_ing) + unit_name = (parsed.get("unit") or "").strip() + if unit_name: + unit_id = self._resolve_unit( + unit_name, unit_index, + create_missing=create_missing, + created_log=created_units, + ) + if unit_id: + new_ing["unit"] = {"id": unit_id, "name": unit_name} + else: + new_ing["unit"] = None + else: + new_ing["unit"] = None + + new_ing["note"] = parsed.get("note") or "" + new_ingredients.append(new_ing) recipe["recipeIngredient"] = new_ingredients self.mealie.update_recipe(slug, recipe) @@ -283,7 +343,10 @@ class Sterilizer: # --- private ------------------------------------------------------------ - def _parse_batch(self, strings: list[str]) -> list[IngredientParse]: + def _parse_batch(self, strings: list[str]) -> list[list[IngredientParse]]: + """Returns list-of-lists matching the input length. Each inner list + is the parses derived from one input string (1 in normal case, N + for fan-out, never 0).""" prompt = json.dumps({"ingredients": strings}, ensure_ascii=False) try: resp = self.forge.run( @@ -305,17 +368,34 @@ class Sterilizer: f"parse count mismatch: got {len(parses_raw)}, expected {len(strings)}" ) - out: list[IngredientParse] = [] + out: list[list[IngredientParse]] = [] for p in parses_raw: - out.append( - IngredientParse( - quantity=_coerce_float(p.get("quantity")), - unit=_clean_str(p.get("unit")), - food=_clean_str(p.get("food")), - note=_clean_str(p.get("note")), - approx=bool(p.get("approx")), + # Defensive: accept either a single dict (legacy 1→1 shape) or + # a list of dicts (fan-out shape). Normalize to list-of-dicts. + items_raw = p if isinstance(p, list) else [p] + if not items_raw: + # Empty list — substitute a fallback all-null item so we + # never lose track of an input slot + items_raw = [{"quantity": None, "unit": None, "food": None, + "note": None, "approx": False}] + inner: list[IngredientParse] = [] + for it in items_raw: + if not isinstance(it, dict): + continue + inner.append( + IngredientParse( + quantity=_coerce_float(it.get("quantity")), + unit=_clean_str(it.get("unit")), + food=_clean_str(it.get("food")), + note=_clean_str(it.get("note")), + approx=bool(it.get("approx")), + ) ) - ) + if not inner: + inner.append(IngredientParse( + quantity=None, unit=None, food=None, note=None, approx=False + )) + out.append(inner) return out @staticmethod diff --git a/cauldron/templates/sterilize.html b/cauldron/templates/sterilize.html index 86343c5..e90f260 100644 --- a/cauldron/templates/sterilize.html +++ b/cauldron/templates/sterilize.html @@ -369,22 +369,43 @@ card.appendChild(head); if (!p.preview_error && p.proposal_json && p.proposal_json.proposals) { - const tbl = document.createElement('table'); - tbl.className = 'diff-table'; - tbl.innerHTML = ` - - was→becomes - - - ${p.proposal_json.proposals.map(rowProposal).join('')} - `; - card.appendChild(tbl); + const rows = p.proposal_json.proposals.map(rowProposal).join(''); + if (rows.trim()) { + const tbl = document.createElement('table'); + tbl.className = 'diff-table'; + tbl.innerHTML = ` + was→becomes + ${rows}`; + card.appendChild(tbl); + } else { + // All rows were identity matches — sonnet thinks this recipe is + // already clean. Show a marker; user can still skip/approve as + // a no-op apply (which will be cheap, just refreshes food.id + // resolution if any food row got renamed in Mealie). + const note = document.createElement('div'); + note.className = 'proposal-meta'; + note.style.marginTop = '6px'; + note.textContent = 'no changes proposed (all ingredients already matched)'; + card.appendChild(note); + } } return card; } + function renderItem(pa) { + const parts = []; + if (pa.quantity !== null && pa.quantity !== undefined) parts.push(pa.quantity); + if (pa.unit) parts.push(pa.unit); + if (pa.food) parts.push(pa.food); + if (pa.note) parts.push(`(${pa.note})`); + return parts.length ? parts.join(' ') : '—'; + } + function rowProposal(rp) { - const pa = rp.parsed || {}; + // Render one or more rows for a single proposal. Fan-out shows + // multiple "becomes" rows under one "was" cell; identity rows + // (parse matches original verbatim) are dropped so the table + // only displays actual changes. const wasParts = []; if (rp.original_quantity !== null && rp.original_quantity !== undefined) wasParts.push(rp.original_quantity); if (rp.original_unit_name) wasParts.push(rp.original_unit_name); @@ -392,18 +413,36 @@ if (rp.original_note) wasParts.push(`(${rp.original_note})`); const wasStr = wasParts.length ? wasParts.join(' ') : (rp.original_display || '—'); - const newParts = []; - if (pa.quantity !== null && pa.quantity !== undefined) newParts.push(pa.quantity); - if (pa.unit) newParts.push(pa.unit); - if (pa.food) newParts.push(pa.food); - if (pa.note) newParts.push(`(${pa.note})`); - const newStr = newParts.length ? newParts.join(' ') : '—'; + const items = Array.isArray(rp.parsed_items) ? rp.parsed_items + : (rp.parsed ? [rp.parsed] : []); + const fanout = items.length > 1; + const renderedNew = items.map(renderItem); - return ` - ${escapeHtml(wasStr)} - → - ${escapeHtml(newStr)} - `; + // Decide which rows to keep. In the 1→1 case, drop if was==new. + // In the fan-out case, always show every child (even if one happens + // to match a piece of the original). + let rows; + if (!fanout) { + if (renderedNew.length === 0 || renderedNew[0] === wasStr) return ''; + rows = renderedNew; + } else { + rows = renderedNew; + } + if (!rows.length) return ''; + + const arrow = fanout + ? `→${rows.length}` + : '→'; + + return rows.map((newStr, idx) => { + const wasCell = (idx === 0) + ? `${escapeHtml(wasStr)}` + : ''; + const arrowCell = (idx === 0) + ? `${arrow}` + : ''; + return `${wasCell}${arrowCell}${escapeHtml(newStr)}`; + }).join(''); } function escapeHtml(s) {