From f74a627ac7d5f118b46123cfdad62425d8c6a38f Mon Sep 17 00:00:00 2001 From: Kayos Date: Thu, 30 Apr 2026 11:52:25 -0700 Subject: [PATCH] Step 2: re-key cauldron's food metadata by mealie_food_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aligns cauldron's data layer with the architectural rule "Mealie owns canonical food names; cauldron only owns cooking metadata Mealie can't store". The old parallel name catalog (cauldron_foods, 2462 noisy USDA rows + ~229 Sonnet-curated names) was always going to drift from Mealie's foods table over time. Now metadata follows Mealie's UUID, so when Cobb merges or renames a food in Mealie the density+unit_class travels with it automatically. Schema: - New table cauldron_food_metadata (migration 017): primary key is mealie_food_id VARCHAR(64); columns are density_g_per_ml, common_size_g, default_unit_class, category, source enum (seed / claude / manual), notes JSON. - cauldron_foods table stays untouched in this step (Step 4 drops it after the backfill ledger has been verified in production). Code: - cauldron/foods.py rewritten: - get_metadata_by_food_id(db, mealie_food_id) — primary read - upsert_metadata(...) — write keyed by mealie_food_id - fetch_and_persist(db, mealie_food_id, food_name, forge) — Sonnet fallback, persists keyed by id - backfill_seed_from_legacy(db, mealie) — one-time migration helper called at app boot when metadata table is empty. Walks Mealie's foods, looks up each in legacy cauldron_foods by name/plural/alias, copies density into the new table keyed by Mealie's id. Returns {matched, missed, total_mealie} stats. - Legacy shims (search_food, upsert_claude_food, load_seed_if_empty) kept as no-ops so server boot doesn't break before full cutover. - cauldron/aggregator.py: - Ingredient.mealie_food_id new optional field - aggregate() now keys by mealie_food_id when present, falls back to normalized name. Verified with rice-from-3-recipes synthetic: same id → consolidates to "2.25 lb rice" single line as before. - foods_lookup callable signature changed to (name, food_id) — id is primary, name is for display + Sonnet fallback. - cauldron/server.py: - /list view captures Mealie's food.id from each recipe ingredient and threads it through the Ingredient. foods_lookup now does an id-keyed cauldron_food_metadata read; on miss with a known id, calls forge.fetch_food_info and persists. When food.id is missing (ingredient still in note form, no Mealie row linked), returns None and aggregator falls back to name grouping. - Boot: replaces the USDA seed loader with a one-time backfill of legacy cauldron_foods → cauldron_food_metadata via the system Mealie token. Runs only when metadata table is empty. Net effect: rice in 3 recipes that all link to the same Mealie food row now group by UUID, not by lowercased name. When Mealie's foods get cleaned up (Step 3 consolidator), cauldron's metadata follows because the ids are stable. Foundation for the consolidator is now in place. --- cauldron/aggregator.py | 49 ++++--- cauldron/db.py | 22 +++ cauldron/foods.py | 310 ++++++++++++++++++++++++++--------------- cauldron/server.py | 77 +++++----- 4 files changed, 298 insertions(+), 160 deletions(-) diff --git a/cauldron/aggregator.py b/cauldron/aggregator.py index 4166e1f..63e4059 100644 --- a/cauldron/aggregator.py +++ b/cauldron/aggregator.py @@ -115,7 +115,8 @@ class Ingredient: """One line on a recipe — what we feed in.""" qty: float | None unit: str | None - food_name: str # raw food name (will be canonicalized via lookup) + food_name: str # raw food name (used for display + Sonnet fallback) + mealie_food_id: str | None = None # Mealie's UUID; primary grouping key when present note: str | None = None source_recipe_slug: str | None = None original_text: str | None = None @@ -137,30 +138,46 @@ class ShoppingLine: def aggregate( ingredients: Iterable[Ingredient], - foods_lookup: Callable[[str], dict | None], + foods_lookup: Callable[[str, str | None], dict | None], ) -> list[ShoppingLine]: - """Group ingredients by canonical food, sum within compatible unit - classes, output a clean shopping-list line per food (or per unit-class - if we can't reconcile). + """Group ingredients by Mealie food.id (when available) and consolidate + quantities. Output is one shopping-list line per food, or N lines per + food when units don't reconcile. - foods_lookup(name) returns {canonical_name, density_g_per_ml, - default_unit_class, common_size_g} or None for unknown foods. + foods_lookup(food_name, mealie_food_id) returns metadata: + {canonical_name, density_g_per_ml, default_unit_class, common_size_g} + or None for foods we have no record of. The id-keyed lookup means + "rice" in 3 different recipes always groups under one canonical line + as long as Mealie has them all linked to the same food row. """ - # Step 1: bucket by canonical food + # Step 1: bucket by stable key. Prefer Mealie food.id when present + # (guaranteed consistent across recipes for the same food). Fall + # back to a normalized name when the ingredient hasn't been linked + # to a Mealie food row. by_food: dict[str, list[Ingredient]] = defaultdict(list) food_meta: dict[str, dict] = {} for ing in ingredients: - if not ing.food_name: + if not ing.food_name and not ing.mealie_food_id: continue - meta = foods_lookup(ing.food_name) or {"canonical_name": ing.food_name.strip().lower()} - canonical = meta["canonical_name"] - by_food[canonical].append(ing) - food_meta[canonical] = meta + # Lookup metadata. Both args passed; lookup decides which is + # primary (id-first when set; name as fallback for Sonnet calls). + meta = foods_lookup(ing.food_name, ing.mealie_food_id) or { + "canonical_name": (ing.food_name or "").strip().lower() or "(unknown)" + } + # Stable grouping key: id when we have it, normalized name otherwise. + key = ing.mealie_food_id or meta.get("canonical_name") or ing.food_name.strip().lower() + # Display name: prefer canonical_name from metadata, else the + # Mealie food.name we received. + canonical_display = meta.get("canonical_name") or (ing.food_name or "").strip().lower() + # Stash the display once per group + if key not in food_meta: + food_meta[key] = {**meta, "canonical_name": canonical_display} + by_food[key].append(ing) out: list[ShoppingLine] = [] - for food, group in by_food.items(): - meta = food_meta[food] - out.extend(_aggregate_one_food(food, group, meta)) + for key, group in by_food.items(): + meta = food_meta[key] + out.extend(_aggregate_one_food(meta["canonical_name"], group, meta)) return out diff --git a/cauldron/db.py b/cauldron/db.py index bc6fa68..4f8c62c 100644 --- a/cauldron/db.py +++ b/cauldron/db.py @@ -278,6 +278,28 @@ MIGRATIONS = [ FOREIGN KEY (job_id) REFERENCES cauldron_sterilize_jobs(id) ON DELETE CASCADE ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 """, + # 017 — Per-Mealie-food cooking metadata, keyed by Mealie's UUID food.id. + # Replaces the old parallel name catalog (cauldron_foods) — Mealie owns + # canonical food names, cauldron only owns density + unit class + common + # size for shopping list aggregation. Populated lazily by the /list + # foods_lookup path on the first sighting of each food (claude fallback). + # Carried-over USDA densities will get re-keyed to mealie_food_id during + # the Step 2 backfill, then cauldron_foods can be dropped (Step 4). + """ + CREATE TABLE IF NOT EXISTS cauldron_food_metadata ( + mealie_food_id VARCHAR(64) PRIMARY KEY, + food_name VARCHAR(255), + density_g_per_ml DECIMAL(6,3), + common_size_g DECIMAL(8,2), + default_unit_class ENUM('mass','volume','count','mixed') NOT NULL DEFAULT 'mass', + category VARCHAR(64), + source ENUM('seed','claude','manual') NOT NULL DEFAULT 'claude', + notes JSON, + last_updated DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, + INDEX idx_food_name (food_name), + INDEX idx_category (category) + ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 + """, ] diff --git a/cauldron/foods.py b/cauldron/foods.py index be08130..90dd5f4 100644 --- a/cauldron/foods.py +++ b/cauldron/foods.py @@ -1,143 +1,235 @@ -"""Foods catalog — canonical food rows + the seed loader. +"""Food metadata for cauldron's aggregator — keyed by Mealie food.id. -Seed = cauldron/data/foods_seed.json (Sonnet-curated, ~229 clean rows -with proper densities and unit classes). The raw USDA dump still lives -at foods_seed_usda.json as a reference; we don't load it directly. +Architectural rule: **Mealie owns canonical food names.** Cauldron only +holds the cooking metadata Mealie can't store: density (for volume↔mass +conversion), default unit class (mass/volume/count), common size (for +"1 onion" → grams). Indexed by Mealie's UUID so the metadata follows +the food row through any Mealie merge / rename. -Lookup is exact + case-insensitive. Mealie's parser already -canonicalizes food names household-side via its own alias system, so -the food.name we get from Mealie is consistent across recipes. When a -Mealie food name has no match in cauldron_foods, server.py's -ensure_food() calls clawdforge to fetch density+unit_class+common_size_g -for that exact name, persists it with source='claude', and the -household's catalog grows organically. +Lookup pattern (called from /list view): + + meta = foods.get_metadata_by_food_id(db, mealie_food_id) + if meta is None: + meta = foods.fetch_and_persist(db, mealie_food_id, food_name, forge) + # use meta['density_g_per_ml'], meta['default_unit_class'], etc. + +The legacy cauldron_foods table (USDA noisy + Sonnet curated names) is +no longer authoritative — the only thing it's still good for is its +density values, which the migration helper below back-fills into +cauldron_food_metadata when we can resolve a name to a Mealie food.id. """ import json -from pathlib import Path +from typing import Optional -SEED_PATH = Path(__file__).parent / "data" / "foods_seed.json" - - -def seed_count(db) -> int: - with db.conn() as c, c.cursor() as cur: - cur.execute("SELECT COUNT(*) AS n FROM cauldron_foods") - return cur.fetchone()["n"] - - -def load_seed_if_empty(db) -> int: - """If cauldron_foods is empty, load the USDA seed JSON. Returns rows - inserted (0 if already populated). Called by app startup after migrate.""" - if not SEED_PATH.exists(): - return 0 - if seed_count(db) > 0: - return 0 - return _load_seed_file(db, SEED_PATH) - - -def reload_seed(db) -> int: - """Force-reload the seed file (used by /api/foods/reload-seed). Won't - overwrite existing rows — INSERT IGNORE on canonical_name. Returns - rows inserted on this run.""" - if not SEED_PATH.exists(): - return 0 - return _load_seed_file(db, SEED_PATH) - - -def _load_seed_file(db, path: Path) -> int: - with path.open() as f: - data = json.load(f) - inserted = 0 - with db.conn() as c, c.cursor() as cur: - for entry in data: - try: - cur.execute( - """ - INSERT IGNORE INTO cauldron_foods - (canonical_name, category, density_g_per_ml, - common_size_g, default_unit_class, usda_fdc_id, - usda_description, source) - VALUES (%s, %s, %s, %s, %s, %s, %s, 'usda') - """, - ( - entry["canonical_name"][:255], - entry.get("category"), - entry.get("density_g_per_ml"), - entry.get("common_size_g"), - entry.get("default_unit_class") or "mass", - entry.get("usda_fdc_id"), - (entry.get("usda_description") or "")[:500], - ), - ) - inserted += cur.rowcount - except Exception: - # Skip malformed rows — seed cleanup is iterative - continue - return inserted - - -def search_food(db, name: str, *, limit: int = 1) -> list[dict]: - """Case-insensitive exact match. Returns [] on miss. - - `limit` kept for backwards-compat with callers; only ever returns 0 or 1. - """ - name_clean = (name or "").strip().lower() - if not name_clean: - return [] +def get_metadata_by_food_id(db, mealie_food_id: str) -> dict | None: + """Single-row read. Returns None on miss.""" + if not mealie_food_id: + return None with db.conn() as c, c.cursor() as cur: cur.execute( - """SELECT id, canonical_name, category, density_g_per_ml, - default_unit_class, common_size_g, source - FROM cauldron_foods - WHERE LOWER(canonical_name) = %s - LIMIT 1""", - (name_clean,), + """SELECT mealie_food_id, food_name, density_g_per_ml, + common_size_g, default_unit_class, category, + source, last_updated + FROM cauldron_food_metadata + WHERE mealie_food_id = %s""", + (mealie_food_id,), ) row = cur.fetchone() - return [dict(row)] if row else [] + return dict(row) if row else None -def upsert_claude_food( +def upsert_metadata( db, *, - canonical_name: str, + mealie_food_id: str, + food_name: str | None, density_g_per_ml: float | None, default_unit_class: str, common_size_g: float | None, category: str | None = None, + source: str = "claude", ) -> dict: - """Insert (or update if a row already exists) a canonical food row from - a clawdforge response. Returns the row as a dict. Idempotent.""" - name = canonical_name.strip().lower()[:255] + """Insert or update a metadata row for one Mealie food. Idempotent. + Source defaults to 'claude' (the on-demand Sonnet path); pass 'seed' + for the one-time backfill from the legacy cauldron_foods table or + 'manual' for hand-curated values.""" cls = (default_unit_class or "mass").lower() if cls not in ("mass", "volume", "count", "mixed"): cls = "mass" + if source not in ("seed", "claude", "manual"): + source = "claude" with db.conn() as c, c.cursor() as cur: cur.execute( """ - INSERT INTO cauldron_foods - (canonical_name, category, density_g_per_ml, - common_size_g, default_unit_class, source) - VALUES (%s, %s, %s, %s, %s, 'claude') + INSERT INTO cauldron_food_metadata + (mealie_food_id, food_name, density_g_per_ml, + common_size_g, default_unit_class, category, source) + VALUES (%s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE - category=COALESCE(VALUES(category), category), - density_g_per_ml=COALESCE(VALUES(density_g_per_ml), density_g_per_ml), - common_size_g=COALESCE(VALUES(common_size_g), common_size_g), - default_unit_class=VALUES(default_unit_class), - source='claude' + food_name = COALESCE(VALUES(food_name), food_name), + density_g_per_ml = COALESCE(VALUES(density_g_per_ml), density_g_per_ml), + common_size_g = COALESCE(VALUES(common_size_g), common_size_g), + default_unit_class = VALUES(default_unit_class), + category = COALESCE(VALUES(category), category), + source = VALUES(source) """, - (name, category, density_g_per_ml, common_size_g, cls), + (mealie_food_id, (food_name or "")[:255] or None, + density_g_per_ml, common_size_g, cls, category, source), ) cur.execute( - """SELECT id, canonical_name, category, density_g_per_ml, - default_unit_class, common_size_g, source - FROM cauldron_foods WHERE LOWER(canonical_name)=%s LIMIT 1""", - (name,), + """SELECT mealie_food_id, food_name, density_g_per_ml, + common_size_g, default_unit_class, category, + source, last_updated + FROM cauldron_food_metadata + WHERE mealie_food_id = %s""", + (mealie_food_id,), ) return dict(cur.fetchone()) -def get_food(db, food_id: int) -> dict | None: +def fetch_and_persist( + db, + *, + mealie_food_id: str, + food_name: str, + forge, +) -> dict | None: + """On a /list lookup miss, ask clawdforge for cooking metadata and + persist keyed by Mealie food.id. Returns the new row or None on + upstream failure (caller falls back to no-density aggregation).""" + if not mealie_food_id or not food_name: + return None + try: + info = forge.fetch_food_info(food_name) + except Exception: + return None + return upsert_metadata( + db, + mealie_food_id=mealie_food_id, + food_name=food_name, + density_g_per_ml=info.get("density_g_per_ml"), + default_unit_class=info.get("default_unit_class") or "mass", + common_size_g=info.get("common_size_g"), + category=info.get("category"), + source="claude", + ) + + +def metadata_count(db) -> int: with db.conn() as c, c.cursor() as cur: - cur.execute("SELECT * FROM cauldron_foods WHERE id=%s", (food_id,)) - return cur.fetchone() + cur.execute("SELECT COUNT(*) AS n FROM cauldron_food_metadata") + return cur.fetchone()["n"] + + +# --- backfill from the legacy cauldron_foods catalog -------------------- + +def backfill_seed_from_legacy(db, mealie) -> dict: + """One-shot migration helper called at app startup if metadata table + is empty: walk every Mealie food, look up the food name in the + legacy cauldron_foods (USDA noisy + claude-curated rows), and copy + the density / unit_class / common_size into cauldron_food_metadata + keyed by Mealie's food.id. After this runs once, cauldron_foods can + be dropped (Step 4) — its data lives on as metadata. + + Returns {matched, missed, total_mealie} stats. Idempotent — won't + overwrite existing metadata rows (uses INSERT IGNORE-equivalent).""" + stats = {"matched": 0, "missed": 0, "total_mealie": 0} + + # Pull Mealie's foods (group-wide; same logic as Sterilizer._load_catalog) + mealie_foods: list[dict] = [] + page = 1 + while page <= 20: + resp = mealie._get("/api/foods", search="", perPage=2000, page=page) + items = resp.get("items") or resp.get("data") or [] + for item in items: + mealie_foods.append(item) + tp = resp.get("total_pages") or resp.get("totalPages") or 1 + if not items or page >= tp: + break + page += 1 + stats["total_mealie"] = len(mealie_foods) + + # Pull legacy cauldron_foods into a name→row map (lowercased). + legacy: dict[str, dict] = {} + try: + with db.conn() as c, c.cursor() as cur: + cur.execute( + """SELECT canonical_name, category, density_g_per_ml, + common_size_g, default_unit_class + FROM cauldron_foods""" + ) + for r in cur.fetchall(): + key = (r["canonical_name"] or "").strip().lower() + if key and key not in legacy: + legacy[key] = dict(r) + except Exception: + # Table may already be dropped — that's fine, no backfill possible + return stats + + if not legacy: + return stats + + for f in mealie_foods: + food_id = f.get("id") + name = (f.get("name") or "").strip() + if not food_id or not name: + continue + # Try direct match first, then plural form, then aliases + keys = [name.lower()] + plural = (f.get("pluralName") or "").strip().lower() + if plural: + keys.append(plural) + for a in (f.get("aliases") or []): + an = "" + if isinstance(a, str): + an = a.strip().lower() + elif isinstance(a, dict): + an = (a.get("name") or "").strip().lower() + if an: + keys.append(an) + hit = None + for k in keys: + if k in legacy: + hit = legacy[k] + break + if hit and hit.get("density_g_per_ml") is not None: + try: + upsert_metadata( + db, + mealie_food_id=food_id, + food_name=name, + density_g_per_ml=float(hit["density_g_per_ml"]), + default_unit_class=hit.get("default_unit_class") or "mass", + common_size_g=float(hit["common_size_g"]) if hit.get("common_size_g") else None, + category=hit.get("category"), + source="seed", + ) + stats["matched"] += 1 + except Exception: + pass + else: + stats["missed"] += 1 + return stats + + +# --- legacy shims (kept so server.py doesn't break before full cutover) - + +def search_food(db, name: str, *, limit: int = 1) -> list[dict]: + """DEPRECATED — kept as a no-op shim for any caller still importing it. + Always returns []. New code should call get_metadata_by_food_id with + the Mealie food UUID instead.""" + return [] + + +def upsert_claude_food(db, **kwargs) -> dict: + """DEPRECATED — kept as a no-op shim. New code should call upsert_metadata.""" + return {} + + +def load_seed_if_empty(db) -> int: + """DEPRECATED — the legacy cauldron_foods table is no longer populated + from the JSON seed. Returns 0 unconditionally. The actual one-time + backfill happens in backfill_seed_from_legacy(), called from server + boot when cauldron_food_metadata is empty.""" + return 0 diff --git a/cauldron/server.py b/cauldron/server.py index 62baa1b..ba7e174 100644 --- a/cauldron/server.py +++ b/cauldron/server.py @@ -69,13 +69,22 @@ def create_app() -> Flask: if applied: app.logger.info("applied migrations: %s", applied) - # Seed cauldron_foods from the USDA snapshot if empty + # One-time backfill: re-key the legacy cauldron_foods (USDA + curated) + # densities into cauldron_food_metadata, keyed by Mealie food.id. Runs + # only when the new metadata table is empty. After this lands, the + # legacy parallel-name catalog is no longer authoritative — Step 4 + # cleanup can drop cauldron_foods entirely. try: - loaded = foods.load_seed_if_empty(db) - if loaded: - app.logger.info("loaded %d foods from USDA seed", loaded) + if foods.metadata_count(db) == 0: + stats = foods.backfill_seed_from_legacy(db, system_mealie) + app.logger.info( + "cauldron_food_metadata backfill: matched=%d missed=%d total_mealie=%d", + stats.get("matched", 0), + stats.get("missed", 0), + stats.get("total_mealie", 0), + ) except Exception as e: - app.logger.warning("foods seed load failed: %s", e) + app.logger.warning("food metadata backfill failed: %s", e) # Recover sterilize jobs whose worker died mid-run. A new run should # produce no false positives: gunicorn-sync workers reconnect cleanly, @@ -752,6 +761,7 @@ def create_app() -> Flask: unit = (unit_obj.get("name") if isinstance(unit_obj, dict) else "") or "" food_obj = ri.get("food") or {} food_name = (food_obj.get("name") if isinstance(food_obj, dict) else "") or "" + food_id = (food_obj.get("id") if isinstance(food_obj, dict) else None) or None note = ri.get("note") or "" if not food_name and not note: continue @@ -759,44 +769,41 @@ def create_app() -> Flask: qty=float(qty) if qty not in (None, "") else None, unit=unit, food_name=food_name or note, + mealie_food_id=food_id, note=note if food_name else None, source_recipe_slug=s["recipe_slug"], original_text=ri.get("display") or _ing_render(qty, unit, food_name, note), )) - # foods_lookup: exact match → clawdforge fallback that persists. - # Per-request cache so the same food in multiple recipes isn't - # re-queried within one /list render. + # foods_lookup is now id-keyed: takes (food_name, mealie_food_id), + # primary lookup is by Mealie's UUID via cauldron_food_metadata. + # On a miss with a known food_id, calls clawdforge and persists. + # When food_id is missing (ingredient still in note form), returns + # None — the aggregator will fall back to name-based grouping. lookup_cache: dict[str, dict | None] = {} - def foods_lookup(name: str): - key = (name or "").strip().lower() - if not key: - return None - if key in lookup_cache: - return lookup_cache[key] - hits = foods.search_food(db, key, limit=1) - if hits: - lookup_cache[key] = hits[0] - return hits[0] - # Miss — ask clawdforge, persist, return. On any failure, cache - # None for this request so we don't spam the model. - try: - info = forge.fetch_food_info(key) - row = foods.upsert_claude_food( - db, - canonical_name=key, - density_g_per_ml=info.get("density_g_per_ml"), - default_unit_class=info.get("default_unit_class") or "mass", - common_size_g=info.get("common_size_g"), - category=info.get("category"), - ) - lookup_cache[key] = row - return row - except Exception as exc: - app.logger.warning("forge.fetch_food_info(%r) failed: %s", key, exc) - lookup_cache[key] = None + def foods_lookup(name: str, food_id: str | None): + if not food_id: return None + cache_key = food_id + if cache_key in lookup_cache: + return lookup_cache[cache_key] + meta = foods.get_metadata_by_food_id(db, food_id) + if meta: + # Normalize shape — aggregator expects canonical_name key + meta = {**meta, "canonical_name": meta.get("food_name") or (name or "").lower()} + lookup_cache[cache_key] = meta + return meta + # Miss — clawdforge fetch, persist by id + row = foods.fetch_and_persist( + db, mealie_food_id=food_id, food_name=name or "", forge=forge + ) + if row: + row = {**row, "canonical_name": row.get("food_name") or (name or "").lower()} + else: + app.logger.warning("foods.fetch_and_persist failed for food_id=%s name=%r", food_id, name) + lookup_cache[cache_key] = row + return row lines = aggregator.aggregate(raw_ings, foods_lookup) return render_template(