cauldron/tests/test_plan_generator.py
Kayos 36aba73f66 v0.3 step 3+4: AI plan generator + /list shopping aggregation
- migrations 012 + 013: cauldron_meal_plan_slots + cauldron_pick_points
- db: list_plan_slots, save_plan_slots, delete_plan_slots, mark_plan_generated,
  clear_plan_generated, award_pick_points, enrich_plan_with_slots; scoreboard
  extended with points (sum from pick_points) and weeks_locked alias
- forge.generate_plan: sonnet prompt builds 7-day plan respecting picks,
  validates slot count + day uniqueness + slug-in-pool, fills picker_subs
  from ground-truth picks (model output is advisory)
- POST /api/plan/generate: race-safe (existing slots → 409 with plan),
  lock-aware (locked → 409), idempotent
- POST /api/plan/regenerate: re-roll for the original generator, gated by
  ownership + lock; wipes slots + pick_points then re-runs generate
- plan.html: generate CTA + 7 day cards with picker chips + AI reason +
  re-roll button (generator-only, pre-lock); scoreboard now shows points + wins
- /list: pulls plan slots, queries Mealie for ingredients, runs aggregator,
  renders 48px-tall checkbox shopping list with localStorage state per plan_id
- tests: 13 new tests across forge.generate_plan + /api/plan/generate routes
  + /list view + scoreboard SQL inspection. conftest+_testenv stub
  pymysql/oidc/foods at import time so tests run against module-level app
  without a live DB. Both pytest and `unittest discover` paths green (27/27).

Defers: bulk sterilizer admin (A1), foods dedupe (A2), Mealie shopping-list-
export (button rendered but disabled). 7-slot count is fixed at the
endpoint (no UI for slot-count selection yet).

Spec: memory/spec-cauldron-v0.3.md
2026-04-29 06:26:54 -07:00

381 lines
16 KiB
Python

"""Tests for the AI plan generator (forge.generate_plan + the
/api/plan/generate endpoint).
The endpoint tests use Flask's test client. db methods on the module-level
cauldron.server.db object are swapped out with MagicMocks per-test — this
avoids needing a real MariaDB to test routing + the orchestration logic.
"""
import json
import unittest
from unittest.mock import MagicMock, patch
# Run conftest's import-time patches BEFORE pulling in cauldron.server.
# pytest auto-loads conftest, but unittest doesn't, so do it explicitly.
# Import path is absolute so `unittest discover` (which doesn't treat tests/
# as a package) and pytest both resolve it.
import os, sys
sys.path.insert(0, os.path.dirname(__file__))
import _testenv # noqa: E402, F401
from cauldron import server as srv
from cauldron.forge import Forge, ForgeError
# ---------- forge.generate_plan unit tests --------------------------------
class TestForgeGeneratePlan(unittest.TestCase):
def setUp(self):
self.forge = Forge(
base_url="http://forge.test", token="t",
default_model="sonnet", default_timeout=60,
)
def _ok_run(self, slots_payload):
"""Patch self.forge.run to return a dict shaped like clawdforge's."""
return patch.object(
self.forge, "run",
return_value={"result": {"slots": slots_payload}},
)
def test_validates_slot_count_matches(self):
recipes = [
{"slug": "r1", "name": "Stew"},
{"slug": "r2", "name": "Tacos"},
{"slug": "r3", "name": "Pasta"},
{"slug": "r4", "name": "Pie"},
{"slug": "r5", "name": "Curry"},
{"slug": "r6", "name": "Bowl"},
{"slug": "r7", "name": "Soup"},
]
# Model returns only 5 slots — must raise
bad = [{"day": d, "recipe_slug": "r1", "picker_subs": [], "reason": ""}
for d in ("monday", "tuesday", "wednesday", "thursday", "friday")]
with self._ok_run(bad):
with self.assertRaises(ForgeError) as cm:
self.forge.generate_plan(picks=[], recipes=recipes, slots=7, week_start="2026-04-27")
self.assertIn("expected 7", str(cm.exception))
def test_rejects_unknown_slug(self):
recipes = [{"slug": "r1", "name": "A"}]
bad = [{"day": "monday", "recipe_slug": "r-not-real", "picker_subs": [], "reason": ""}]
with self._ok_run(bad):
with self.assertRaises(ForgeError) as cm:
self.forge.generate_plan(picks=[], recipes=recipes, slots=1, week_start="2026-04-27")
self.assertIn("unknown recipe_slug", str(cm.exception))
def test_rejects_duplicate_day(self):
recipes = [{"slug": "r1", "name": "A"}, {"slug": "r2", "name": "B"}]
bad = [
{"day": "monday", "recipe_slug": "r1", "picker_subs": [], "reason": ""},
{"day": "monday", "recipe_slug": "r2", "picker_subs": [], "reason": ""},
]
with self._ok_run(bad):
with self.assertRaises(ForgeError) as cm:
self.forge.generate_plan(picks=[], recipes=recipes, slots=2, week_start="2026-04-27")
self.assertIn("duplicate day", str(cm.exception))
def test_picker_attribution_uses_real_subs(self):
"""Even if the model omits picker_subs, our ground-truth pick map
is what ends up on the slot."""
recipes = [{"slug": "r1", "name": "Stew"}]
picks = [{"slug": "r1", "name": "Stew", "picker_subs": ["sub-abby", "sub-cobb"]}]
# Model returns empty picker_subs — we should fill from the picks
slots_in = [{"day": "monday", "recipe_slug": "r1", "picker_subs": [], "reason": "honors picks"}]
with self._ok_run(slots_in):
out = self.forge.generate_plan(
picks=picks, recipes=recipes, slots=1, week_start="2026-04-27",
)
self.assertEqual(len(out), 1)
self.assertEqual(out[0]["picker_subs"], ["sub-abby", "sub-cobb"])
self.assertEqual(out[0]["source"], "pick")
self.assertEqual(out[0]["recipe_name"], "Stew")
self.assertEqual(out[0]["reason"], "honors picks")
def test_string_response_is_parsed(self):
"""clawdforge sometimes returns the JSON as a string in `result`."""
recipes = [{"slug": "r1", "name": "A"}]
payload = {"slots": [{"day": "monday", "recipe_slug": "r1",
"picker_subs": [], "reason": "ai"}]}
with patch.object(self.forge, "run",
return_value={"result": json.dumps(payload)}):
out = self.forge.generate_plan(
picks=[], recipes=recipes, slots=1, week_start="2026-04-27",
)
self.assertEqual(len(out), 1)
self.assertEqual(out[0]["recipe_slug"], "r1")
self.assertEqual(out[0]["source"], "mealie") # no picks → mealie source
def test_code_fenced_response_is_parsed(self):
recipes = [{"slug": "r1", "name": "A"}]
payload = {"slots": [{"day": "monday", "recipe_slug": "r1",
"picker_subs": [], "reason": ""}]}
fenced = "```json\n" + json.dumps(payload) + "\n```"
with patch.object(self.forge, "run", return_value={"result": fenced}):
out = self.forge.generate_plan(
picks=[], recipes=recipes, slots=1, week_start="2026-04-27",
)
self.assertEqual(out[0]["recipe_slug"], "r1")
# ---------- /api/plan/generate route tests --------------------------------
def _make_db_stub(*, plan, picks=None, recipe_rows=None,
existing_slots=None, save_inserted=None):
"""Build a fake db with the methods the route uses."""
fake = MagicMock()
fake.list_household_picks_with_pickers.return_value = picks or []
fake.list_indexed_recipes.return_value = recipe_rows or []
fake.list_plan_slots.return_value = existing_slots or []
fake.get_or_create_plan.return_value = dict(plan)
fake.auto_lock_past_unlocked_plans.return_value = 0
fake.list_household_member_subs.return_value = ["sub-1"]
fake.get_user_household_id.return_value = 1
fake.list_household_pick_slugs.return_value = set()
fake.household_scoreboard.return_value = []
fake.household_streak.return_value = None
fake.upsert_user.return_value = None
# save_plan_slots returns inserted count (1+ default, or override for race tests)
if save_inserted is None:
save_inserted = lambda plan_id, slots: len(slots)
fake.save_plan_slots.side_effect = save_inserted
# mark_plan_generated returns updated plan dict
def _mark(plan_id, sub):
p = dict(plan)
p["generated_by_sub"] = sub
from datetime import datetime
p["generated_at"] = datetime(2026, 4, 27, 12, 0, 0)
return p
fake.mark_plan_generated.side_effect = _mark
# enrich_plan_with_slots adds slots to the plan dict in-place
def _enrich(p):
p["slots"] = fake.list_plan_slots.return_value
return p
fake.enrich_plan_with_slots.side_effect = _enrich
# conn() context manager stub for the display-name resolution
from contextlib import contextmanager
@contextmanager
def _conn():
yield FakeConn()
fake.conn.side_effect = _conn
return fake
class FakeCursor:
def __init__(self):
self._rows = []
def execute(self, *a, **kw): pass
def fetchone(self): return None
def fetchall(self): return []
def __enter__(self): return self
def __exit__(self, *a): return False
class FakeConn:
def cursor(self): return FakeCursor()
def commit(self): pass
def rollback(self): pass
def close(self): pass
class _RouteTestBase(unittest.TestCase):
def setUp(self):
self.client = srv.app.test_client()
# Inject a session via a context override
with self.client.session_transaction() as s:
s["user"] = {"sub": "sub-cobb", "email": "cobb@sulkta.com", "name": "Cobb"}
def _patch_db(self, fake_db):
return patch.object(srv, "db", fake_db)
class TestGenerateRoute(_RouteTestBase):
def test_generate_creates_slots(self):
from datetime import date
plan = {
"id": 42, "household_id": 1,
"week_start": date(2026, 4, 27),
"generated_by_sub": None, "generated_at": None,
"locked_by_sub": None, "locked_at": None, "locked_reason": None,
}
recipe_rows = [
{"slug": f"r{i}", "name": f"Recipe {i}", "raw_json": "{}"}
for i in range(1, 11)
]
slots_returned = [
{"day": d, "recipe_slug": "r1", "recipe_name": "Recipe 1",
"picker_subs": [], "reason": "ai", "source": "mealie"}
for d in ("monday", "tuesday", "wednesday", "thursday",
"friday", "saturday", "sunday")
]
# Make slots use unique recipes per day for realism
for i, s in enumerate(slots_returned):
s["recipe_slug"] = f"r{i+1}"
s["recipe_name"] = f"Recipe {i+1}"
fake_db = _make_db_stub(plan=plan, recipe_rows=recipe_rows)
with self._patch_db(fake_db), \
patch.object(srv.forge, "generate_plan", return_value=slots_returned):
r = self.client.post("/api/plan/generate")
self.assertEqual(r.status_code, 200, r.get_data(as_text=True))
body = r.get_json()
self.assertTrue(body["ok"])
# save_plan_slots called with the plan id and the slots list
fake_db.save_plan_slots.assert_called_once()
args, _ = fake_db.save_plan_slots.call_args
self.assertEqual(args[0], 42)
self.assertEqual(len(args[1]), 7)
self.assertEqual(args[1][0]["day"], "monday")
# mark_plan_generated called with cobb's sub
fake_db.mark_plan_generated.assert_called_once_with(42, "sub-cobb")
def test_generate_when_locked_409(self):
from datetime import date, datetime
plan = {
"id": 7, "household_id": 1, "week_start": date(2026, 4, 27),
"generated_by_sub": None, "generated_at": None,
"locked_by_sub": "sub-abby",
"locked_at": datetime(2026, 4, 27, 18, 0),
"locked_reason": "user",
}
fake_db = _make_db_stub(plan=plan)
with self._patch_db(fake_db), \
patch.object(srv.forge, "generate_plan") as gp:
r = self.client.post("/api/plan/generate")
self.assertEqual(r.status_code, 409)
self.assertEqual(r.get_json()["error"], "plan_locked")
gp.assert_not_called()
def test_generate_when_already_generated_409(self):
from datetime import date
plan = {
"id": 9, "household_id": 1, "week_start": date(2026, 4, 27),
"generated_by_sub": "sub-abby", "generated_at": None,
"locked_by_sub": None, "locked_at": None, "locked_reason": None,
}
existing = [{
"id": 1, "plan_id": 9, "day": "monday",
"recipe_slug": "r1", "recipe_name": "Stew",
"source": "mealie", "picker_subs": [], "reason": "", "notes": None,
"created_at": None,
}]
fake_db = _make_db_stub(plan=plan, existing_slots=existing)
with self._patch_db(fake_db), \
patch.object(srv.forge, "generate_plan") as gp:
r = self.client.post("/api/plan/generate")
self.assertEqual(r.status_code, 409)
body = r.get_json()
self.assertEqual(body["error"], "plan_already_generated")
self.assertIn("plan", body)
self.assertEqual(len(body["plan"]["slots"]), 1)
gp.assert_not_called()
def test_pick_points_awarded_on_pick_use(self):
from datetime import date
plan = {
"id": 11, "household_id": 1, "week_start": date(2026, 4, 27),
"generated_by_sub": None, "generated_at": None,
"locked_by_sub": None, "locked_at": None, "locked_reason": None,
}
recipe_rows = [
{"slug": "stew", "name": "Stew", "raw_json": "{}"},
{"slug": "tacos", "name": "Tacos", "raw_json": "{}"},
]
picks = [
{"slug": "stew", "name": "Stew",
"pickers": ["abby"], "picker_subs": ["sub-abby"]},
{"slug": "tacos", "name": "Tacos",
"pickers": ["cobb", "abby"], "picker_subs": ["sub-cobb", "sub-abby"]},
]
# Slot fixture: monday = stew (abby picks), tuesday = tacos (cobb +
# abby picks), wed-sun = stew (ai-chosen, no pickers).
slots_full = []
days = ("monday", "tuesday", "wednesday", "thursday",
"friday", "saturday", "sunday")
for i, d in enumerate(days):
if i == 0:
slots_full.append({
"day": d, "recipe_slug": "stew", "recipe_name": "Stew",
"picker_subs": ["sub-abby"], "reason": "abby's pick",
"source": "pick",
})
elif i == 1:
slots_full.append({
"day": d, "recipe_slug": "tacos", "recipe_name": "Tacos",
"picker_subs": ["sub-cobb", "sub-abby"], "reason": "co",
"source": "pick",
})
else:
slots_full.append({
"day": d, "recipe_slug": "stew", "recipe_name": "Stew",
"picker_subs": [], "reason": "ai", "source": "mealie",
})
fake_db = _make_db_stub(plan=plan, picks=picks, recipe_rows=recipe_rows)
with self._patch_db(fake_db), \
patch.object(srv.forge, "generate_plan", return_value=slots_full):
r = self.client.post("/api/plan/generate")
self.assertEqual(r.status_code, 200, r.get_data(as_text=True))
# 1pt for sub-abby on monday + 1pt sub-cobb + 1pt sub-abby on tuesday
# = 3 award_pick_points calls total
self.assertEqual(fake_db.award_pick_points.call_count, 3)
# All calls should be (1, 11, <sub>, 1, "pick_used")
called_subs = [c.args[2] for c in fake_db.award_pick_points.call_args_list]
self.assertEqual(sorted(called_subs), ["sub-abby", "sub-abby", "sub-cobb"])
for call in fake_db.award_pick_points.call_args_list:
self.assertEqual(call.args[3], 1) # points
self.assertEqual(call.args[4], "pick_used")
# ---------- household_scoreboard SQL test --------------------------------
class TestScoreboardSchema(unittest.TestCase):
"""The scoreboard SELECT must reference cauldron_pick_points and return
a `points` field. Verified by inspecting the generated SQL via a
capturing fake cursor."""
def test_scoreboard_query_includes_points(self):
from cauldron.db import DB
captured = {"sql": None}
class CapCursor:
def execute(self, sql, params=None):
captured["sql"] = sql
def fetchall(self):
return [
{
"sub": "sub-cobb", "email": "cobb@x.com",
"display_name": "Cobb",
"wins": 2, "last_win": None, "points": 5,
},
]
def __enter__(self): return self
def __exit__(self, *a): return False
class CapConn:
def cursor(self): return CapCursor()
def commit(self): pass
def rollback(self): pass
def close(self): pass
db = DB(host="x", port=3306, name="x", user="x", password="x")
with patch("pymysql.connect", lambda **kw: CapConn()):
rows = db.household_scoreboard(1)
self.assertIn("cauldron_pick_points", captured["sql"])
self.assertIn("points", captured["sql"])
# And the row decoder coerces points to int + adds weeks_locked alias
self.assertEqual(rows[0]["points"], 5)
self.assertEqual(rows[0]["weeks_locked"], 2)
if __name__ == "__main__":
unittest.main()