cauldron/cauldron/db.py
Kayos c7ee84d70a search: local fuzzy recipe index — way smarter than Mealie's lexical default
Cobb: 'searching recipes is a bit off. lets make that way way more on
point. need to be the google of recipe searching.'

Architecture:
- New cauldron_recipe_index table mirrors enough of Mealie's recipe shape
  to fuzzy-rank locally without round-tripping. Migrations 008+009.
- Refresh on first /recipes load + every 5 minutes + on-demand button.
  Single page-200 pull from Mealie covers Cobb's 226 recipes in one trip.
- recipe_index.py — flatten_recipe(), refresh_household_index(),
  search_index().

Search algorithm (rapidfuzz):
- Multi-field weighted: name×1.00, tags×0.85, cats×0.80, foods×0.70,
  ings×0.55, description×0.45 (max-of wins, not sum, to avoid noise spike)
- Three scorers per field: WRatio (overall), partial_token_set_ratio
  (handles 'spag bol' → 'Spaghetti Bolognese'), token_set_ratio
  (order-independent)
- Substring-of-query in title bonus +20
- Floor 50 to filter junk
- Top-80 returned

API:
- /api/recipes.json now uses local index for both search and browse
- /recipes route same — first-page server-render from index
- POST /api/index/refresh — manual refresh button (admin-y)
- ?q=...  → ranked fuzzy results, paginated
- no q   → ordered browse from index, paginated, has_next via lookahead

Performance:
- Local index query: ~5ms for browse
- Search across 226 rows × 6 fields × 3 scorers: ~60ms
- Should feel instant compared to Mealie's network round-trip
2026-04-28 21:37:12 -07:00

608 lines
25 KiB
Python

"""DB access + migrations against sulkta-mariadb.
Uses PyMySQL with a tiny per-request connection (no pool) — Cauldron is
LAN-only family-internal, traffic is single-digit qps. If load ever grows
swap in DBUtils.PooledDB or SQLAlchemy.
"""
from contextlib import contextmanager
from pathlib import Path
import pymysql
import pymysql.cursors
MIGRATIONS = [
# 001 — bookkeeping
"""
CREATE TABLE IF NOT EXISTS schema_migrations (
version VARCHAR(16) PRIMARY KEY,
applied_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
""",
# 002 — users (Authentik subject is the PK)
"""
CREATE TABLE IF NOT EXISTS cauldron_users (
authentik_sub VARCHAR(190) PRIMARY KEY,
email VARCHAR(255) NOT NULL,
display_name VARCHAR(255),
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
last_seen DATETIME,
INDEX idx_email (email)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
""",
# 003 — per-user encrypted Mealie tokens
"""
CREATE TABLE IF NOT EXISTS cauldron_user_mealie_tokens (
authentik_sub VARCHAR(190) PRIMARY KEY,
encrypted_token BLOB NOT NULL,
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
last_validated DATETIME,
last_failure_at DATETIME,
last_failure_reason VARCHAR(500),
FOREIGN KEY (authentik_sub) REFERENCES cauldron_users(authentik_sub)
ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
""",
# 004 — chat / AI run log (joins to clawdforge runs server-side)
"""
CREATE TABLE IF NOT EXISTS cauldron_chat_log (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
authentik_sub VARCHAR(190) NOT NULL,
ts DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
intent VARCHAR(64),
forge_duration_ms INT,
forge_model VARCHAR(64),
prompt_chars INT,
result_chars INT,
ok BOOLEAN NOT NULL DEFAULT TRUE,
error VARCHAR(500),
INDEX idx_user_ts (authentik_sub, ts)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
""",
# 005 — meal picks: per-user list of recipes the user wants in the next
# AI meal plan run. Pre-populated wishlist that the planner respects.
"""
CREATE TABLE IF NOT EXISTS cauldron_meal_picks (
authentik_sub VARCHAR(190) NOT NULL,
recipe_slug VARCHAR(255) NOT NULL,
recipe_name VARCHAR(500) NOT NULL,
added_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (authentik_sub, recipe_slug),
INDEX idx_user_added (authentik_sub, added_at)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
""",
# 006 — households (cached mirror of Mealie's household) + membership.
# Keyed by Mealie's UUID. Multiple cauldron users join via the same
# Mealie household to share picks/plans.
"""
CREATE TABLE IF NOT EXISTS cauldron_households (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
mealie_household_id VARCHAR(64) UNIQUE NOT NULL,
name VARCHAR(255) NOT NULL,
created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
""",
"""
CREATE TABLE IF NOT EXISTS cauldron_household_members (
household_id BIGINT NOT NULL,
authentik_sub VARCHAR(190) NOT NULL,
role VARCHAR(32) NOT NULL DEFAULT 'member',
joined_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (household_id, authentik_sub),
FOREIGN KEY (household_id) REFERENCES cauldron_households(id) ON DELETE CASCADE,
FOREIGN KEY (authentik_sub) REFERENCES cauldron_users(authentik_sub) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
""",
# 007 — meal plans (per household per week). Lock state + race metadata.
# week_start = Monday (date) of the week.
"""
CREATE TABLE IF NOT EXISTS cauldron_meal_plans (
id BIGINT PRIMARY KEY AUTO_INCREMENT,
household_id BIGINT NOT NULL,
week_start DATE NOT NULL,
generated_by_sub VARCHAR(190),
generated_at DATETIME,
locked_by_sub VARCHAR(190),
locked_at DATETIME,
locked_reason ENUM('user','auto') DEFAULT NULL,
UNIQUE KEY uk_household_week (household_id, week_start),
INDEX idx_locked_by (locked_by_sub),
FOREIGN KEY (household_id) REFERENCES cauldron_households(id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
""",
# 008 — local recipe index for fast in-process search. Mirrors enough
# of Mealie's recipe shape to fuzzy-rank without round-tripping to
# Mealie on every keystroke. Refreshed on demand (on first /recipes
# load, after pin/unpin, every 5min, or on /me 'refresh' button).
"""
CREATE TABLE IF NOT EXISTS cauldron_recipe_index (
household_id BIGINT NOT NULL,
slug VARCHAR(255) NOT NULL,
name VARCHAR(500) NOT NULL,
description TEXT,
tags_text TEXT,
cats_text TEXT,
foods_text TEXT,
ings_text TEXT,
date_updated DATETIME,
date_added DATETIME,
last_made DATETIME,
total_time VARCHAR(64),
recipe_yield VARCHAR(255),
raw_json JSON,
indexed_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
PRIMARY KEY (household_id, slug),
FULLTEXT KEY ft_text (name, description, tags_text, cats_text, foods_text),
INDEX idx_household (household_id),
FOREIGN KEY (household_id) REFERENCES cauldron_households(id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
""",
# 009 — refresh state per household
"""
CREATE TABLE IF NOT EXISTS cauldron_recipe_index_state (
household_id BIGINT PRIMARY KEY,
last_refreshed_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
recipe_count INT NOT NULL DEFAULT 0,
FOREIGN KEY (household_id) REFERENCES cauldron_households(id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
""",
]
class DB:
def __init__(self, *, host: str, port: int, name: str, user: str, password: str):
self.kwargs = dict(
host=host,
port=port,
user=user,
password=password,
database=name,
charset="utf8mb4",
cursorclass=pymysql.cursors.DictCursor,
autocommit=False,
)
@contextmanager
def conn(self):
c = pymysql.connect(**self.kwargs)
try:
yield c
c.commit()
except Exception:
c.rollback()
raise
finally:
c.close()
def migrate(self) -> list[str]:
"""Apply pending migrations. Returns list of versions applied."""
applied: list[str] = []
with self.conn() as c:
with c.cursor() as cur:
cur.execute(MIGRATIONS[0]) # bootstrap migrations table
cur.execute("SELECT version FROM schema_migrations")
done = {r["version"] for r in cur.fetchall()}
for i, sql in enumerate(MIGRATIONS, start=1):
ver = f"{i:03d}"
if ver in done:
continue
cur.execute(sql)
# IGNORE to tolerate the multi-worker boot race where two
# gunicorn workers both bootstrap an empty migrations table
cur.execute(
"INSERT IGNORE INTO schema_migrations (version) VALUES (%s)", (ver,)
)
applied.append(ver)
return applied
# --- user ops -----------------------------------------------------------
def upsert_user(self, *, sub: str, email: str, display_name: str | None) -> None:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
INSERT INTO cauldron_users (authentik_sub, email, display_name, last_seen)
VALUES (%s, %s, %s, NOW())
ON DUPLICATE KEY UPDATE
email = VALUES(email),
display_name = COALESCE(VALUES(display_name), display_name),
last_seen = NOW()
""",
(sub, email, display_name),
)
def get_user(self, sub: str) -> dict | None:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"SELECT authentik_sub, email, display_name, last_seen FROM cauldron_users WHERE authentik_sub=%s",
(sub,),
)
return cur.fetchone()
# --- mealie token ops ---------------------------------------------------
def get_user_mealie_token_blob(self, sub: str) -> bytes | None:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"SELECT encrypted_token FROM cauldron_user_mealie_tokens WHERE authentik_sub=%s",
(sub,),
)
row = cur.fetchone()
return row["encrypted_token"] if row else None
def set_user_mealie_token_blob(self, sub: str, blob: bytes) -> None:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
INSERT INTO cauldron_user_mealie_tokens (authentik_sub, encrypted_token, last_validated)
VALUES (%s, %s, NOW())
ON DUPLICATE KEY UPDATE
encrypted_token = VALUES(encrypted_token),
last_validated = NOW(),
last_failure_at = NULL,
last_failure_reason = NULL
""",
(sub, blob),
)
def delete_user_mealie_token(self, sub: str) -> None:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"DELETE FROM cauldron_user_mealie_tokens WHERE authentik_sub=%s",
(sub,),
)
def mark_user_mealie_token_failure(self, sub: str, reason: str) -> None:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
UPDATE cauldron_user_mealie_tokens
SET last_failure_at = NOW(), last_failure_reason = %s
WHERE authentik_sub = %s
""",
(reason[:500], sub),
)
# --- households ---------------------------------------------------------
def upsert_household(self, *, mealie_household_id: str, name: str) -> int:
"""Create or update a household record. Returns local PK (id)."""
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
INSERT INTO cauldron_households (mealie_household_id, name)
VALUES (%s, %s)
ON DUPLICATE KEY UPDATE name = VALUES(name), id = LAST_INSERT_ID(id)
""",
(mealie_household_id, name),
)
return cur.lastrowid
def add_household_member(self, household_id: int, sub: str, role: str = "member") -> None:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
INSERT IGNORE INTO cauldron_household_members
(household_id, authentik_sub, role)
VALUES (%s, %s, %s)
""",
(household_id, sub, role),
)
def get_user_household_id(self, sub: str) -> int | None:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"SELECT household_id FROM cauldron_household_members WHERE authentik_sub=%s LIMIT 1",
(sub,),
)
row = cur.fetchone()
return row["household_id"] if row else None
def list_household_member_subs(self, household_id: int) -> list[str]:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"SELECT authentik_sub FROM cauldron_household_members WHERE household_id=%s",
(household_id,),
)
return [r["authentik_sub"] for r in cur.fetchall()]
# --- meal plans (per household per week) -------------------------------
def get_or_create_plan(self, household_id: int, week_start) -> dict:
"""Get the plan record for a (household, week_start), creating an
empty one if it doesn't exist. week_start is a date (Monday)."""
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
INSERT IGNORE INTO cauldron_meal_plans (household_id, week_start)
VALUES (%s, %s)
""",
(household_id, week_start),
)
cur.execute(
"SELECT * FROM cauldron_meal_plans WHERE household_id=%s AND week_start=%s",
(household_id, week_start),
)
return dict(cur.fetchone())
def lock_plan(self, plan_id: int, *, sub: str, reason: str = "user") -> dict:
"""Lock a plan if not already locked. Returns updated plan dict."""
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
UPDATE cauldron_meal_plans
SET locked_by_sub = %s, locked_at = NOW(), locked_reason = %s
WHERE id = %s AND locked_at IS NULL
""",
(sub, reason, plan_id),
)
cur.execute("SELECT * FROM cauldron_meal_plans WHERE id=%s", (plan_id,))
return dict(cur.fetchone())
def auto_lock_past_unlocked_plans(self, household_id: int, before_date) -> int:
"""Mark any past unlocked plans as auto-locked. Returns count."""
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
UPDATE cauldron_meal_plans
SET locked_at = NOW(), locked_reason = 'auto'
WHERE household_id = %s AND week_start < %s AND locked_at IS NULL
""",
(household_id, before_date),
)
return cur.rowcount
def household_scoreboard(self, household_id: int) -> list[dict]:
"""Per-user lock counts + most recent lock time. Joins to users for
display name. Excludes auto-locks (those are no-one's win)."""
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
SELECT
u.authentik_sub AS sub,
u.email AS email,
u.display_name AS display_name,
COUNT(p.id) AS wins,
MAX(p.locked_at) AS last_win
FROM cauldron_household_members m
LEFT JOIN cauldron_users u
ON u.authentik_sub = m.authentik_sub
LEFT JOIN cauldron_meal_plans p
ON p.locked_by_sub = m.authentik_sub
AND p.household_id = m.household_id
AND p.locked_reason = 'user'
WHERE m.household_id = %s
GROUP BY u.authentik_sub, u.email, u.display_name
ORDER BY wins DESC, last_win DESC
""",
(household_id,),
)
return [dict(r) for r in cur.fetchall()]
def household_streak(self, household_id: int) -> dict | None:
"""Compute current win streak: walk back from most recent locked week,
counting consecutive weeks won by the same user. Returns
{sub, display_name, count} or None if no locks."""
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
SELECT p.week_start, p.locked_by_sub, u.display_name, u.email
FROM cauldron_meal_plans p
LEFT JOIN cauldron_users u ON u.authentik_sub = p.locked_by_sub
WHERE p.household_id = %s
AND p.locked_at IS NOT NULL
AND p.locked_reason = 'user'
ORDER BY p.week_start DESC
""",
(household_id,),
)
rows = cur.fetchall()
if not rows:
return None
leader = rows[0]["locked_by_sub"]
count = 0
for r in rows:
if r["locked_by_sub"] != leader:
break
count += 1
return {
"sub": leader,
"display_name": rows[0]["display_name"] or rows[0]["email"],
"count": count,
}
# --- meal picks ---------------------------------------------------------
def add_meal_pick(self, sub: str, slug: str, name: str) -> bool:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
INSERT IGNORE INTO cauldron_meal_picks (authentik_sub, recipe_slug, recipe_name)
VALUES (%s, %s, %s)
""",
(sub, slug, name[:500]),
)
return cur.rowcount > 0
def remove_meal_pick(self, sub: str, slug: str) -> bool:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"DELETE FROM cauldron_meal_picks WHERE authentik_sub=%s AND recipe_slug=%s",
(sub, slug),
)
return cur.rowcount > 0
def list_meal_picks(self, sub: str) -> list[dict]:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"SELECT recipe_slug, recipe_name, added_at FROM cauldron_meal_picks "
"WHERE authentik_sub=%s ORDER BY added_at DESC",
(sub,),
)
return [dict(r) for r in cur.fetchall()]
def list_meal_pick_slugs(self, sub: str) -> set[str]:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"SELECT recipe_slug FROM cauldron_meal_picks WHERE authentik_sub=%s",
(sub,),
)
return {r["recipe_slug"] for r in cur.fetchall()}
def list_household_pick_slugs(self, household_id: int) -> set[str]:
"""Union of picks across all members of the household."""
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
SELECT DISTINCT p.recipe_slug
FROM cauldron_meal_picks p
JOIN cauldron_household_members m ON m.authentik_sub = p.authentik_sub
WHERE m.household_id = %s
""",
(household_id,),
)
return {r["recipe_slug"] for r in cur.fetchall()}
def list_household_picks_with_pickers(self, household_id: int) -> list[dict]:
"""All picks across the household, grouped by slug, with the list of
members who picked each (so the UI can show 'pinned by Cobb · Abby').
Latest pick added_at per slug for ordering."""
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
SELECT
p.recipe_slug AS slug,
MIN(p.recipe_name) AS name,
GROUP_CONCAT(
DISTINCT COALESCE(NULLIF(u.display_name, ''),
SUBSTRING_INDEX(u.email, '@', 1))
ORDER BY p.added_at ASC
SEPARATOR '|'
) AS pickers,
GROUP_CONCAT(
DISTINCT u.authentik_sub
ORDER BY p.added_at ASC
SEPARATOR '|'
) AS picker_subs,
MAX(p.added_at) AS last_pick_at,
COUNT(*) AS pick_count
FROM cauldron_meal_picks p
JOIN cauldron_household_members m ON m.authentik_sub = p.authentik_sub
LEFT JOIN cauldron_users u ON u.authentik_sub = p.authentik_sub
WHERE m.household_id = %s
GROUP BY p.recipe_slug
ORDER BY last_pick_at DESC
""",
(household_id,),
)
out = []
for r in cur.fetchall():
d = dict(r)
d["pickers"] = (d["pickers"] or "").split("|") if d["pickers"] else []
d["picker_subs"] = (d["picker_subs"] or "").split("|") if d["picker_subs"] else []
out.append(d)
return out
# --- recipe index -------------------------------------------------------
def get_index_state(self, household_id: int) -> dict | None:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"SELECT last_refreshed_at, recipe_count FROM cauldron_recipe_index_state WHERE household_id=%s",
(household_id,),
)
return cur.fetchone()
def replace_recipe_index(self, household_id: int, rows: list[dict]) -> int:
"""Atomic-ish replace of the index for one household. Drops + reinserts."""
import json as _json
with self.conn() as c, c.cursor() as cur:
cur.execute("DELETE FROM cauldron_recipe_index WHERE household_id=%s", (household_id,))
for r in rows:
cur.execute(
"""
INSERT INTO cauldron_recipe_index
(household_id, slug, name, description, tags_text, cats_text,
foods_text, ings_text, date_updated, date_added, last_made,
total_time, recipe_yield, raw_json)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
""",
(
household_id,
r["slug"],
r["name"][:500],
(r.get("description") or "")[:65000],
(r.get("tags_text") or "")[:65000],
(r.get("cats_text") or "")[:65000],
(r.get("foods_text") or "")[:65000],
(r.get("ings_text") or "")[:65000],
r.get("date_updated"),
r.get("date_added"),
r.get("last_made"),
(r.get("total_time") or "")[:64],
(r.get("recipe_yield") or "")[:255],
_json.dumps(r.get("raw") or {}, default=str),
),
)
cur.execute(
"""
INSERT INTO cauldron_recipe_index_state (household_id, last_refreshed_at, recipe_count)
VALUES (%s, NOW(), %s)
ON DUPLICATE KEY UPDATE last_refreshed_at=NOW(), recipe_count=VALUES(recipe_count)
""",
(household_id, len(rows)),
)
return len(rows)
def list_indexed_recipes(self, household_id: int, *, category: str | None = None,
order_by: str = "date_added", order_dir: str = "desc",
limit: int = 1000, offset: int = 0) -> list[dict]:
"""Pull the indexed recipe rows. Used both for non-search browse + as
the candidate set for in-process fuzzy ranking on search."""
order_col = {
"date_added": "date_added",
"date_updated": "date_updated",
"last_made": "last_made",
"name": "name",
}.get(order_by, "date_added")
order_dir_sql = "DESC" if order_dir.lower() != "asc" else "ASC"
sql = f"""
SELECT slug, name, description, tags_text, cats_text, foods_text,
date_updated, date_added, last_made, total_time, recipe_yield, raw_json
FROM cauldron_recipe_index
WHERE household_id = %s
"""
params: list = [household_id]
if category:
sql += " AND cats_text LIKE %s"
params.append(f"%{category}%")
sql += f" ORDER BY {order_col} {order_dir_sql} LIMIT %s OFFSET %s"
params += [limit, offset]
with self.conn() as c, c.cursor() as cur:
cur.execute(sql, params)
return [dict(r) for r in cur.fetchall()]
# --- chat log -----------------------------------------------------------
def log_chat(
self,
*,
sub: str,
intent: str,
duration_ms: int,
model: str,
prompt_chars: int,
result_chars: int,
ok: bool,
error: str | None = None,
) -> None:
with self.conn() as c, c.cursor() as cur:
cur.execute(
"""
INSERT INTO cauldron_chat_log
(authentik_sub, intent, forge_duration_ms, forge_model,
prompt_chars, result_chars, ok, error)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
""",
(sub, intent, duration_ms, model, prompt_chars, result_chars, ok, (error or "")[:500] or None),
)