diff --git a/.env.example b/.env.example index 256b160..57ae730 100644 --- a/.env.example +++ b/.env.example @@ -1,6 +1,5 @@ -# Cauldron — copy to /mnt/cache/appdata/secrets/cauldron.env on Lucy -# (chmod 600, root:root). Some values are already populated by the deploy -# bootstrap (CLAWDFORGE_*); fill in the rest before first start. +# Cauldron — copy to .env (chmod 600). Point compose at it via +# CAULDRON_ENV_FILE if you keep it elsewhere. # Flask SECRET_KEY=change-me-32-bytes-of-entropy @@ -9,12 +8,12 @@ SECRET_KEY=change-me-32-bytes-of-entropy BIND_HOST=0.0.0.0 BIND_PORT=7790 -# Mealie (recipes.sulkta.com is already wired with Authentik OIDC) -MEALIE_BASE_URL=https://recipes.sulkta.com +# Mealie +MEALIE_BASE_URL=https://mealie.example.com MEALIE_API_TOKEN= -# clawdforge (centralized claude-runner on Lucy) -CLAWDFORGE_URL=http://192.168.0.5:8800 +# clawdforge (claude-runner HTTP service) +CLAWDFORGE_URL=http://clawdforge:8800 CLAWDFORGE_TOKEN= DEFAULT_MODEL=sonnet DEFAULT_TIMEOUT_SECS=120 @@ -22,14 +21,15 @@ DEFAULT_TIMEOUT_SECS=120 # Admin bearer for batch ops (sterilize-all, etc.) — separate from user OIDC ADMIN_BEARER=change-me-this-is-the-cauldron-admin-batch-token -# Authentik OIDC (provisioned 2026-04-28; client_id + secret minted by Authentik) -OIDC_ISSUER=https://auth.sulkta.com/application/o/cauldron/ +# Authentik OIDC (or any OIDC provider that exposes +# /.well-known/openid-configuration) +OIDC_ISSUER=https://auth.example.com/application/o/cauldron/ OIDC_CLIENT_ID= OIDC_CLIENT_SECRET= -OIDC_REDIRECT_URI=http://192.168.0.5:7790/auth/callback +OIDC_REDIRECT_URI=http://localhost:7790/auth/callback -# DB (sulkta-mariadb on the sulkta bridge) -DB_HOST=sulkta-mariadb +# DB +DB_HOST=mariadb DB_PORT=3306 DB_NAME=cauldron DB_USER=cauldron_app @@ -39,28 +39,26 @@ DB_PASSWORD= # Generate with: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())" CAULDRON_FERNET_KEY= -# --- Public-deploy hardening (added 2026-05-02 CVE audit) --- -# Comma-separated list of authentik subjects who get the operator-tier -# /me admin tools panel (consolidate, discover scrape). Empty = nobody. -# Cobb's authentik sub goes here for production. +# --- Public-deploy hardening --- +# Comma-separated list of OIDC subjects who get the operator-tier /me +# admin tools panel (consolidate, discover scrape). Empty = nobody. CAULDRON_ADMIN_SUBS= -# External base URL where cauldron is reachable. Set to your public host -# (e.g. https://cauldron.sulkta.com) when going public; leave empty for -# LAN-only HTTP. When set: enables CSRF Origin guard, HSTS, secure cookie. +# External base URL where cauldron is reachable (e.g. https://cauldron.example.com). +# Leave empty for LAN-only HTTP. When set: enables CSRF Origin guard, +# HSTS, secure cookie. CAULDRON_BASE_URL= -# Whether the deploy is fronted by TLS (rackham apache → cauldron over -# OpenVPN). Independent toggle from base_url so dev/staging can override. -# When true: SESSION_COOKIE_SECURE=True, HSTS header emitted. +# Whether the deploy is fronted by TLS. Independent toggle from base_url +# so dev/staging can override. When true: SESSION_COOKIE_SECURE=True, +# HSTS header emitted. CAULDRON_BEHIND_TLS=false # Comma-separated CIDR list of trusted proxies whose X-Forwarded-* we # honor. Empty = trust nothing → ProxyFix is OFF and X-Forwarded-* are -# stripped from every request. For the rackham→OpenVPN→lucy:7790 deploy, -# set this to rackham's WireGuard-internal IP (e.g. 10.20.30.1/32). Any -# X-Forwarded-* from a peer outside this list gets dropped before -# ProxyFix sees it. +# stripped from every request. Set this to the reverse-proxy peer's +# address (e.g. 10.20.30.1/32). Any X-Forwarded-* from a peer outside +# this list gets dropped before ProxyFix sees it. CAULDRON_TRUSTED_PROXIES= # bugs.sulkta.com integration. Per-service key minted via: diff --git a/README.md b/README.md index 16a6835..1b8daad 100644 --- a/README.md +++ b/README.md @@ -1,100 +1,60 @@ # cauldron -Mealie-backed AI meal planner + shopping list for the family. LAN-only, -internal tool. Mealie at `recipes.sulkta.com` is the source of truth for -recipes / meal plans / shopping lists; cauldron is the AI layer + Abby's -branded UI on top. +Mealie-backed meal planner + shopping-list aggregator. Wraps a Mealie +instance with: an ingredient sterilizer (free-form quantities → structured +parses), a weekly meal-plan generator, and a household shopping list that +collapses cross-recipe duplicates. -## Status +## Stack -**v0.1 — backend bones (current).** Ingredient sterilizer endpoint working. -No UI yet; bearer-auth API only. Frontend + Authentik OIDC arrives in v0.2. -Native Kotlin Android in v0.5. +- Flask + gunicorn, Python 3.12 +- Authentik (or any OIDC provider) for sessions +- MariaDB for per-user prefs + Fernet-encrypted Mealie tokens +- [clawdforge](https://github.com/Sulkta-Coop/clawdforge) for the AI layer -## Surface (v0.1) +Mealie remains the source of truth for recipes / plans / shopping lists. +Cauldron stores per-user prefs + cached metadata only. + +## Run + +```bash +cp .env.example .env # fill in secrets +docker compose up -d --build +curl http://localhost:7790/healthz +``` + +`CAULDRON_ENV_FILE=/path/to/secrets.env docker compose up -d` if your env +file lives outside the repo. + +## Endpoints ``` GET /healthz liveness + clawdforge upstream -GET /api/recipes list Mealie recipes (paginated) -POST /api/sterilize/preview/ dry-run AI parse, return proposals -POST /api/sterilize/apply/ write parses back to Mealie +GET /login /auth/callback OIDC flow +GET /me account + integration status +GET /plan /list household plan + shopping list +GET /api/recipes (admin bearer) proxy Mealie list +POST /api/sterilize/preview/ (admin bearer) dry-run parser +POST /api/sterilize/apply/ (admin bearer) write parses back ``` -All routes except `/healthz` require `Authorization: Bearer `. +Admin-bearer endpoints expect `Authorization: Bearer $ADMIN_BEARER`. -## Architecture - -``` -Abby's phone (later: Kotlin app) - │ - ▼ - cauldron (Flask, port 7790, LAN-only) - ├─ Mealie API client ─── recipes.sulkta.com (source of truth) - ├─ clawdforge client ─── 192.168.0.5:8800 (claude -p runner) - └─ Authentik OIDC (v0.2) -``` - -cauldron does NOT hold its own database in v0.1 — all state lives in Mealie. -A small Postgres/MariaDB schema lands in v0.2 for Abby-specific prefs + -chat history. - -## Ingredient sterilizer - -Mealie's CRF parser is mediocre. Cobb's hand-typed recipes have lots of -free-form quantity strings ("about 2 cups cooked white rice", "1 small -handful kale", "a pinch of salt") that don't aggregate cleanly into a -shopping list. - -The sterilizer batches all ingredients of one recipe into a single Sonnet -call (via clawdforge), gets back parallel structured parses, then on apply -links each parse to existing Mealie food/unit records (creating any missing -by name) and PUTs the recipe back. - -Preview is non-destructive — review proposals before apply. - -```bash -# Dry-run preview -curl -sS -X POST -H "Authorization: Bearer $ADMIN_BEARER" \ - http://192.168.0.5:7790/api/sterilize/preview/spaghetti-bolognese | jq . - -# Apply (creates missing foods/units by default) -curl -sS -X POST -H "Authorization: Bearer $ADMIN_BEARER" \ - http://192.168.0.5:7790/api/sterilize/apply/spaghetti-bolognese | jq . -``` - -## Deploy - -1. `ssh lucy` -2. `cd /mnt/user/appdata && git clone cauldron && cd cauldron/build` - (or wherever the deploy convention lands) -3. Drop `.env` at `/mnt/cache/appdata/secrets/cauldron.env` (chmod 600 root:root) - - `CLAWDFORGE_TOKEN` is already populated by the bootstrap (see `memory/2026-04-28.md`) - - `MEALIE_API_TOKEN` — mint at `recipes.sulkta.com` → user → API tokens - - `ADMIN_BEARER` — pick 32 bytes of entropy - - `SECRET_KEY` — 32 bytes for Flask sessions -4. `docker compose up -d --build` -5. Smoke: `curl http://192.168.0.5:7790/healthz` - -## Roadmap - -- v0.1 ✓ — sterilizer backend + Flask shell -- v0.2 — Authentik OIDC, Abby-branded web UI, palette CSS, postgres for prefs -- v0.3 — meal plan generator (week → Mealie meal plan write) -- v0.4 — shopping list aggregator (read meal plan → consolidated grocery list) -- v0.5 — native Kotlin + Compose Android app (read-only shopping list + plan view) - -## Repo layout +## Layout ``` cauldron/ -├─ cauldron/ -│ ├─ config.py env-driven config -│ ├─ forge.py clawdforge HTTP client -│ ├─ mealie.py Mealie API client -│ ├─ sterilizer.py ingredient parse + apply pipeline -│ └─ server.py Flask app -├─ Dockerfile -├─ compose.yml -├─ requirements.txt -└─ .env.example + config.py env-driven config + forge.py clawdforge HTTP client + mealie.py Mealie API client + sterilizer.py ingredient parse + apply pipeline + aggregator.py cross-recipe shopping aggregator + server.py Flask app +scripts/ + build_foods_seed.py USDA → foods seed + clean_foods_seed.py clawdforge-curated cleanup pass ``` + +## License + +MIT. diff --git a/cauldron/aggregator.py b/cauldron/aggregator.py index 6908b9e..e4aec8b 100644 --- a/cauldron/aggregator.py +++ b/cauldron/aggregator.py @@ -189,8 +189,8 @@ def _aggregate_one_food( """All ingredients for ONE food → 1+ ShoppingLines.""" # Bucket by unit class. Ingredients with qty=None go to a separate # `no_qty` bucket so they DON'T silently disappear from the shopping - # list when Mealie's parser couldn't extract a number (audit F-15 - # domain, 2026-05-02). The killer feature should surface "buy onion" + # list when Mealie's parser couldn't extract a number. The killer + # feature should surface "buy onion" # even if the source recipe just said "1 onion, chopped" without a # parseable quantity. buckets: dict[str, list[tuple[Ingredient, float]]] = { diff --git a/cauldron/config.py b/cauldron/config.py index 047560a..96a6972 100644 --- a/cauldron/config.py +++ b/cauldron/config.py @@ -26,7 +26,7 @@ class Config: oidc_client_secret: str oidc_redirect_uri: str - # DB (sulkta-mariadb) + # DB db_host: str db_port: int db_name: str @@ -50,20 +50,18 @@ class Config: # - werkzeug.middleware.proxy_fix.ProxyFix is wrapped (1 hop trusted) base_url: str - # Whether the deploy is fronted by TLS (rackham Apache → cauldron over OpenVPN). - # Independent toggle from base_url so dev/staging can override. + # Whether the deploy is fronted by TLS. Independent toggle from + # base_url so dev/staging can override. behind_tls: bool # Comma-separated list of CIDRs (or single IPs) whose X-Forwarded-* # headers we trust. Empty = trust nothing → ProxyFix is NOT enabled - # and incoming X-Forwarded-* headers from any peer are stripped before - # they reach the app. When non-empty, only the listed peers can set - # the perceived scheme/host/port. Audit CVE-NEW-6 (2026-05-02 PM): - # the prior `if cfg.behind_tls: ProxyFix(...)` trusted X-Forwarded-* - # from ANY peer that could reach :7790 — including other containers - # on the sulkta docker network, since gunicorn binds 0.0.0.0:7790. - # An attacker on a sibling container could spoof X-Forwarded-Proto - # and have request.is_secure return True even on plain HTTP. + # and incoming X-Forwarded-* headers from any peer are stripped + # before they reach the app. When non-empty, only the listed peers + # can set the perceived scheme/host/port. Required because gunicorn + # binds 0.0.0.0:7790; without it any sibling container on the same + # docker network could spoof X-Forwarded-Proto and have + # request.is_secure return True on plain HTTP. trusted_proxies: tuple[str, ...] # bugs.sulkta.com integration (vendored SDK at cauldron/vendor/bugs_sulkta). diff --git a/cauldron/consolidate_foods.py b/cauldron/consolidate_foods.py index dcbf323..0287e3a 100644 --- a/cauldron/consolidate_foods.py +++ b/cauldron/consolidate_foods.py @@ -108,7 +108,7 @@ def _cluster( a few seconds. For larger catalogs (10K+) the inner loop polls cancel_check every 5K comparisons so a user-initiated cancel can abort cleanly mid-scan rather than waiting tens of seconds. 2nd-pass - audit fix CODE-1 (2026-05-02 PM).""" + cleanly mid-scan.""" n = len(foods) names = [(f.get("name") or "").strip().lower() for f in foods] pairs: list[list[dict]] = [] diff --git a/cauldron/db.py b/cauldron/db.py index 8621eeb..149ee6b 100644 --- a/cauldron/db.py +++ b/cauldron/db.py @@ -1,8 +1,8 @@ -"""DB access + migrations against sulkta-mariadb. +"""DB access + migrations against MariaDB. -Uses PyMySQL with a tiny per-request connection (no pool) — Cauldron is -LAN-only family-internal, traffic is single-digit qps. If load ever grows -swap in DBUtils.PooledDB or SQLAlchemy. +Uses PyMySQL with a tiny per-request connection (no pool) — traffic is +single-digit qps. If load ever grows swap in DBUtils.PooledDB or +SQLAlchemy. """ from contextlib import contextmanager from pathlib import Path @@ -607,12 +607,12 @@ MIGRATIONS = [ ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 """, # 039 — Per-household discover skips. Replaces the global - # `cauldron_discovered_recipes.status='rejected'` write — that flipped - # the row for EVERY household in EVERY group (audit finding F-2 routes, - # 2026-05-02). Different households have different tastes; skip is - # per-household. The global status column stays for spam URLs an admin - # wants to nuke for everyone (set via a future bearer-only endpoint); - # routine "not interested" goes here. + # `cauldron_discovered_recipes.status='rejected'` write — that + # flipped the row for EVERY household in EVERY group. Different + # households have different tastes; skip is per-household. The + # global status column stays for spam URLs an admin wants to nuke + # for everyone (set via a future bearer-only endpoint); routine + # "not interested" goes here. """ CREATE TABLE IF NOT EXISTS cauldron_discover_skips ( discover_id BIGINT NOT NULL, @@ -1900,8 +1900,8 @@ class DB: with cauldron_recipe_meta to derive their picking pattern: { - "cobb@sulkta.com": { - "display_name": "cobb", + "user@example.com": { + "display_name": "user", "total_picks": 24, "cuisines": {"asian": 6, "mexican": 4, "italian": 3, ...}, "proteins": {"chicken": 8, "beef": 5, "fish": 2, ...}, diff --git a/cauldron/dedupe_recipes.py b/cauldron/dedupe_recipes.py index e1cdaa9..3d2dd88 100644 --- a/cauldron/dedupe_recipes.py +++ b/cauldron/dedupe_recipes.py @@ -87,15 +87,15 @@ def _cluster_by_name( cancel_check: Callable[[], bool] | None = None, ) -> list[list[dict]]: """Pair-based: emit one 2-recipe candidate per (i, j) where - token_set_ratio >= threshold. 2nd-pass audit fix (CODE-2, 2026-05-02 PM): - the previous single-link agglomerative chained weak similarities through - the recipe corpus the same way it did with foods — `chicken alfredo` - → `chicken parm` → `parm chicken cutlets` → ... — collapsing dozens of - unrelated recipes into one megacluster that Sonnet then had to refuse. + token_set_ratio >= threshold. The previous single-link agglomerative + approach chained weak similarities through the recipe corpus the + same way it did with foods — `chicken alfredo` → `chicken parm` → + `parm chicken cutlets` → ... — collapsing dozens of unrelated + recipes into one megacluster that Sonnet then had to refuse. Mirrors the pattern used in `consolidate_foods._cluster`. cancel_check, when provided, is polled every 5K pair-comparisons so a - user-initiated cancel can abort a long scan early (CODE-1 fix). On + user-initiated cancel can abort a long scan early. On cancel we return the pairs accumulated so far rather than raising — the caller's _cancelled() in run_walk will catch and exit cleanly.""" n = len(recipes) @@ -265,7 +265,7 @@ def run_apply(*, db: DB, job_id: int, mealie: Mealie) -> None: # (A,B) approved+deleted; later (A,C) tries to delete A # again. Mealie returns 404 — treat that as already- # handled, not an error. Mirrors the consolidate - # apply path. 3rd-pass audit fix CODE3-1 (2026-05-02 PM). + # apply path. if "404" in msg or "not found" in msg.lower(): log.info("[dedupe-recipes:%s] delete %s: stale (already removed)", job_id, slug) continue diff --git a/cauldron/discover_recipes.py b/cauldron/discover_recipes.py index 31edd8d..dbd9f51 100644 --- a/cauldron/discover_recipes.py +++ b/cauldron/discover_recipes.py @@ -88,10 +88,9 @@ def is_public_url(url: str) -> tuple[bool, str]: non-loopback, non-link-local IP. Returns (ok, reason). Used by both `/api/discover/scrape-start` (pre-queue rejection) and - `_scrape_one` (defense-in-depth before fetch). Audit finding F-1 - (CRIT, 2026-05-02): without this, any session user could queue URLs - pointing at Lucy's LAN, the docker bridge, or cloud metadata - endpoints (169.254.169.254, etc). + `_scrape_one` (defense-in-depth before fetch). Without this, any + session user could queue URLs pointing at the LAN, the docker + bridge, or cloud metadata endpoints (169.254.169.254, etc). Strategy: 1. Parse the URL. Reject non-http(s) schemes. @@ -232,9 +231,8 @@ def _scrape_one(url: str) -> tuple[dict, str | None] | None: # allow_redirects=False: is_public_url validated the # original host as public; a 30x to 127.0.0.1 / 169.254.x # would otherwise route this scrape worker at internal - # services (LAN scanner, cloud metadata IMDS). 3rd-pass - # audit fix CVE-NEW3-1 (2026-05-02 PM): treat 30x as - # scrape failure rather than chase the redirect chain. + # services (LAN scanner, cloud metadata IMDS). Treat 30x + # as scrape failure rather than chase the redirect chain. # The recipe_scrapers primary path has its own internal # request chain that's a known residual — the docstring # on is_public_url notes the long-term answer is a diff --git a/cauldron/enrich_recipes.py b/cauldron/enrich_recipes.py index f1d321d..3556ec7 100644 --- a/cauldron/enrich_recipes.py +++ b/cauldron/enrich_recipes.py @@ -134,10 +134,10 @@ def run_enrich( # Heartbeat between Sonnet sub-calls so a slow verify_allergens # doesn't push last_progress_at past db.fail_stuck_enrich_jobs's - # stale_minutes (15) window. Audit CODE-5 (2026-05-02 PM): - # without this, two ~3-4-min Sonnet calls back-to-back could - # straddle the 15-min staleness gate and a still-alive job - # would be incorrectly reaped at next worker restart. + # stale_minutes (15) window. Without this, two ~3-4-min + # Sonnet calls back-to-back could straddle the 15-min + # staleness gate and a still-alive job would be incorrectly + # reaped at next worker restart. db.update_enrich_job_progress(job_id, current_slug=slug) # Verification pass: re-check contains.* booleans with a strict diff --git a/cauldron/server.py b/cauldron/server.py index 28b1d54..57033a8 100644 --- a/cauldron/server.py +++ b/cauldron/server.py @@ -1,8 +1,8 @@ """Flask app — v0.2 foundation. -Adds Authentik OIDC + sulkta-mariadb DB + Fernet crypto for per-user Mealie -tokens. v0.1 admin endpoints stay (still bearer-gated for now); user-facing -routes start using OIDC sessions. +Adds Authentik OIDC + MariaDB + Fernet crypto for per-user Mealie tokens. +v0.1 admin endpoints stay (still bearer-gated for now); user-facing routes +start using OIDC sessions. Routes (current): GET /healthz liveness, no auth @@ -64,29 +64,24 @@ def create_app() -> Flask: app.config.update( SESSION_COOKIE_HTTPONLY=True, SESSION_COOKIE_SAMESITE="Lax", - # SESSION_COOKIE_SECURE: env-gated. ON when behind TLS so the - # session cookie won't ride over plain HTTP (sslstrip / mixed- - # content downgrade). Audit CVE-D1 (2026-05-02). Off in dev - # so LAN HTTP development still works. + # ON when behind TLS so the session cookie won't ride plain HTTP + # (sslstrip / mixed-content downgrade). Off in dev so plain-HTTP + # local development still works. SESSION_COOKIE_SECURE=cfg.behind_tls, # 14-day session lifetime + refresh-each-request idle slide. - # Audit CVE-D2 (2026-05-02): Flask's default has no expiry. + # Flask's default has no expiry. PERMANENT_SESSION_LIFETIME=timedelta(days=14), SESSION_REFRESH_EACH_REQUEST=True, - # 1 MiB body cap. Cauldron POSTs are tiny JSON; no legitimate - # reason for a request body to exceed this. Audit CVE-G1. + # 1 MiB body cap. All POSTs are tiny JSON. MAX_CONTENT_LENGTH=1 * 1024 * 1024, ) # X-Forwarded-* trust chain: parse the trusted-proxy CIDR list once # at boot. Empty = trust nothing → ProxyFix is NOT enabled and any # X-Forwarded-* headers from any peer get stripped before they reach - # the app. Audit CVE-NEW-6 (2026-05-02 PM 2nd-pass): the prior - # `if cfg.behind_tls: ProxyFix(...)` trusted X-Forwarded-* from any - # peer that could reach :7790 — including sibling containers on the - # sulkta docker network, since gunicorn binds 0.0.0.0:7790. An - # attacker on a co-located container could spoof X-Forwarded-Proto - # and have request.is_secure return True even on plain HTTP. + # the app. Required because gunicorn binds 0.0.0.0:7790 — without it + # any sibling on the same docker network could spoof X-Forwarded-Proto + # and have request.is_secure return True on plain HTTP. _trusted_networks: list[ipaddress.IPv4Network | ipaddress.IPv6Network] = [] for entry in cfg.trusted_proxies: try: @@ -229,28 +224,24 @@ def create_app() -> Flask: u = session.get("user") or {} return {"is_admin": u.get("sub") in cfg.admin_subs} - # CSRF Origin/Referer check (audit CVE-A1, 2026-05-02). + # CSRF Origin/Referer check. # SAMESITE=Lax alone doesn't cover same-site subdomain CSRF (a - # compromised *.sulkta.com page POSTing to cauldron.sulkta.com - # carries cookies). When CAULDRON_BASE_URL is set, every state- - # mutating request must EXACTLY match origin (scheme+host+port). - # Bearer-token API calls are exempt — no cookie means no CSRF - # surface. Pure-GET/HEAD/OPTIONS are exempt. + # compromised sibling subdomain POSTing carries cookies). When + # CAULDRON_BASE_URL is set, every state-mutating request must + # EXACTLY match origin (scheme+host+port). Bearer-token API calls + # are exempt — no cookie means no CSRF surface. GET/HEAD/OPTIONS + # are exempt. # - # 2nd-pass audit fix (2026-05-02 PM, CVE-NEW-1): the original guard - # used `startswith(cfg.base_url)` which is bypassable by an attacker - # registering `cauldron.sulkta.com.evil.com` — its Origin string - # starts-with `https://cauldron.sulkta.com`. Switched to parsed- - # origin equality so the host comparison is byte-exact at the - # netloc boundary. + # Note: do NOT compare with startswith() on the raw base_url. An + # attacker registering `cauldron.example.com.evil.com` would pass + # such a check. Compare parsed origins so the host match is byte- + # exact at the netloc boundary. def _origin_of(url: str) -> str: """RFC-normalized origin: lowercase scheme + lowercase host, - plus port unless it's the scheme's default. 3rd-pass audit fix - CVE-NEW3-3 (2026-05-02 PM): the prior byte-equality compare - could false-reject browsers that send `Origin: https://Cauldron.SULKTA.com` - (some preserve case in netloc) or `https://x.com:443` against a - bare `https://x.com` base. urlparse already lowercases scheme but - NOT host, and doesn't drop default ports.""" + plus port unless it's the scheme's default. urlparse lowercases + scheme but NOT host, and doesn't drop default ports, so a naive + byte compare can false-reject `Origin: https://Cauldron.example.com` + or `https://x.com:443` vs a bare `https://x.com` base.""" if not url: return "" try: @@ -294,9 +285,9 @@ def create_app() -> Flask: return jsonify({"error": "csrf_origin_mismatch"}), 403 return ("Cross-origin request rejected.", 403) - # Security response headers (audit CVE-E1, 2026-05-02). HSTS only - # when behind TLS (sending HSTS over HTTP is invalid). CSP is - # permissive on inline because templates use them; tighten later. + # Security response headers. HSTS only when behind TLS (sending HSTS + # over HTTP is invalid). CSP is permissive on inline because templates + # use them; tighten later. @app.after_request def _security_headers(resp): resp.headers.setdefault("X-Frame-Options", "DENY") @@ -349,7 +340,7 @@ def create_app() -> Flask: """Layered on top of require_session — only members of cfg.admin_subs proceed. Non-admins get a 404 (not a 403) so the route's existence isn't advertised. Used for `/discover` and `/consolidate` whose - admin-only nature was scoped per Cobb 2026-05-02.""" + admin-only nature was scoped intentionally.""" @wraps(fn) def w(*a, **kw): u = session.get("user") @@ -434,11 +425,10 @@ def create_app() -> Flask: """Canonical household_id source for session-authenticated routes. EVERY session-auth handler that scopes data by household MUST get its `hid` from this helper, never from request body / query / form - — otherwise we open a cross-household read/write surface (audit - CODE-4 convention, 2026-05-02 PM 2nd-pass). Admin-bearer endpoints - legitimately derive hid from `started_by_sub` because the bearer - IS the trust anchor for those calls; that path is documented - separately at the admin endpoints.""" + — otherwise we open a cross-household read/write surface. + Admin-bearer endpoints legitimately derive hid from + `started_by_sub` because the bearer IS the trust anchor for + those calls; that path is documented at the admin endpoints.""" u = session.get("user") if not u: return None @@ -463,11 +453,10 @@ def create_app() -> Flask: @app.get("/healthz") def healthz(): """Liveness probe. Public, intentionally minimal — returns - {"ok": true} or {"ok": false} ONLY. Audit CVE-E3 (2026-05-02): - the previous version echoed upstream error strings (clawdforge URL, - DB error message including hostname/user) which leak internal LAN - topology to anyone who can reach cauldron.sulkta.com/healthz. - Detailed upstream check moved to /api/admin/healthz (bearer-only).""" + {"ok": true} or {"ok": false} ONLY. Never echo upstream error + strings here: clawdforge URLs, DB hostnames/users, etc. would + leak internal topology to anyone who can reach /healthz. + Detailed upstream check lives at /api/admin/healthz (bearer-only).""" try: with db.conn() as c, c.cursor() as cur: cur.execute("SELECT 1") @@ -505,11 +494,11 @@ def create_app() -> Flask: def _safe_next(nxt: str | None) -> str: """Validate a post-login redirect target is a same-origin local - path. Defense-in-depth open-redirect guard — we apply this BOTH - at the /login stash AND at /auth/callback consumption (CVE-NEW-3 - audit fix 2026-05-02 PM). The double-check protects against any - future code path that writes session['post_login_next'] outside - of /login, and against percent-encoded path tricks.""" + path. Defense-in-depth open-redirect guard — applied BOTH at the + /login stash AND at /auth/callback consumption. The double-check + protects against any future code path that writes + session['post_login_next'] outside of /login, and against + percent-encoded path tricks.""" if not nxt: return "/me" # Must start with `/` and only `/`. Reject `//foo`, `/\\foo`, @@ -526,11 +515,10 @@ def create_app() -> Flask: return "/me" # Allow only a strict path charset. Anything weirder lands at /me. # Path component is everything before the optional `?` / `#`. - # `%` is allowed for percent-encoded chars (3rd-pass audit fix - # CODE3-3, 2026-05-02 PM) so paths like /recipes/spaghetti%20bol - # don't silently land at /me. Defense-in-depth: percent-decode - # the path and re-validate so encoded path-traversal `%2e%2e/` - # is still caught. + # `%` is allowed for percent-encoded chars so paths like + # /recipes/spaghetti%20bol don't silently land at /me. + # Defense-in-depth: percent-decode and re-validate so encoded + # path-traversal `%2e%2e/` is still caught. path = p.path or "/" for ch in path: if not (ch.isalnum() or ch in "-_./%"): @@ -551,11 +539,10 @@ def create_app() -> Flask: # otherwise route an authenticated user to an attacker page # right after OIDC handshake. nxt = _safe_next(request.args.get("next")) - # Already-authenticated users skip OIDC entirely (CVE-NEW-5 fix, - # 2026-05-02 PM): a malicious cross-origin link - # `` would otherwise - # silently re-trigger the OIDC handshake on a logged-in user - # and hand them off to the attacker-supplied next= path. + # Already-authenticated users skip OIDC entirely. Otherwise a + # malicious cross-origin link `` + # could silently re-trigger the OIDC handshake on a logged-in + # user and hand them off to the attacker-supplied next= path. if session.get("user"): return redirect(nxt) session["post_login_next"] = nxt @@ -564,7 +551,7 @@ def create_app() -> Flask: @app.get("/auth/callback") def auth_callback(): # Wrap the OIDC exchange so transient DNS/JWKS hiccups (resolver - # blip on auth.sulkta.com → ConnectionError → 500) render a + # blip on the auth host → ConnectionError → 500) render a # friendly retry page instead of dumping a stack trace, AND # clear the stashed state so the user's retry doesn't trip the # MismatchingState CSRF guard from a stale state cookie. @@ -589,10 +576,9 @@ def create_app() -> Flask: ), 400 except OAuthError as e: # Log the full Authentik error server-side; render only a - # generic detail to the user. Audit CVE-NEW-8 (2026-05-02 PM): - # the prior `f"auth handshake failed: {e}"` echoed Authentik - # error codes (e.g. invalid_client_id) into the auth_retry - # page — anyone who can hit /auth/callback?state=evil could + # generic detail to the user. Don't echo provider error + # codes (e.g. invalid_client_id) into the auth_retry page — + # anyone who can hit /auth/callback?state=evil could otherwise # probe Authentik internals via the rendered detail. app.logger.warning("OIDC callback: oauth error: %s", e) session.pop("_state_cauldron_authlib", None) @@ -616,13 +602,12 @@ def create_app() -> Flask: # session is just a serialized dict in the cookie body, but # carrying any pre-auth key into the authenticated state is the # session-fixation/contamination shape best-practice asks us to - # avoid. 3rd-pass audit fix INFO3-2 (2026-05-02 PM). + # avoid. session.clear() session["user"] = {"sub": sub, "email": email, "name": name} # Mark session permanent so PERMANENT_SESSION_LIFETIME (14d) is # honored. Without this, Flask treats the session as a browser- - # session cookie (no Expires) and tab-close kills it. Audit - # CVE-D2 (2026-05-02). + # session cookie (no Expires) and tab-close kills it. session.permanent = True return redirect(nxt) @@ -863,9 +848,9 @@ def create_app() -> Flask: # — and `picks.html` interpolated the slug straight into a JS # `onclick='removePick('{{ slug }}', ...)'` literal, opening a # stored-XSS surface where any household member viewing /picks - # ran the attacker's JS. Audit CVE-NEW-2 (2026-05-02 PM 2nd-pass). - # Also closes the prompt-injection-via-poison-slug vector since - # the planner would otherwise pass garbage slugs to Sonnet. + # ran the attacker's JS. Also closes the prompt-injection-via- + # poison-slug vector — the planner would otherwise pass garbage + # slugs straight to Sonnet. hid = current_household_id() if hid is None: return jsonify({"ok": False, "error": "no_household"}), 400 @@ -2311,7 +2296,7 @@ def create_app() -> Flask: db.finalize_consolidate_job(job_id, state="cancelled") return jsonify({"ok": True}) - # admin variants for kayos kick-off + # admin variants for operator-driven kickoff (no user session) @app.post("/api/admin/foods/consolidate-start") @require_bearer def admin_consolidate_start(): @@ -2431,9 +2416,10 @@ def create_app() -> Flask: db.get_discover_imports_for_group(mealie_group_id=my_group_id) if my_group_id else {} ) - # Per-household skips (audit F-2 routes — was a global flip). - # Default view filters out rows the caller's household has skipped; - # `?status=skipped` surfaces them so the user can unskip if needed. + # Per-household skips (was a global flip — now scoped per + # household). Default view filters out rows the caller's + # household has skipped; `?status=skipped` surfaces them so the + # user can unskip if needed. my_skipped = ( db.get_skipped_discover_ids_for_household(household_id=my_hid) if my_hid else set() @@ -2519,12 +2505,12 @@ def create_app() -> Flask: @app.post("/api/discover/reject/") @require_admin def discover_reject(discover_id: int): - """Per-household 'skip from discover'. Audit F-2 routes 2026-05-02: - previously this flipped the GLOBAL `cauldron_discovered_recipes.status - = 'rejected'` field, hiding the recipe from every household in every - group. Now writes to `cauldron_discover_skips(discover_id, household_id)` - — only the caller's household stops seeing it; other households are - unaffected. Different households have different tastes.""" + """Per-household 'skip from discover'. Previously this flipped + the GLOBAL `cauldron_discovered_recipes.status = 'rejected'` + field, hiding the recipe from every household in every group. + Now writes to `cauldron_discover_skips(discover_id, + household_id)` — only the caller's household stops seeing it; + other households are unaffected.""" u = session["user"] row = db.get_discovered_recipe(discover_id) if not row: @@ -2566,11 +2552,11 @@ def create_app() -> Flask: urls = [x for x in urls if x.startswith(("http://", "https://"))][:50] if not urls: return jsonify({"error": "no valid http(s) urls"}), 400 - # SSRF guard (audit F-1 routes, CRIT, 2026-05-02): every URL must - # resolve to a public IP. Reject any that hit private / loopback / - # link-local / multicast / reserved space — those are LAN, docker - # bridge, or cloud metadata endpoints. Apply BEFORE queueing so - # the caller gets a clean error per bad URL. + # SSRF guard: every URL must resolve to a public IP. Reject any + # that hit private / loopback / link-local / multicast / reserved + # space — those are LAN, docker bridge, or cloud metadata + # endpoints. Apply BEFORE queueing so the caller gets a clean + # error per bad URL. accepted: list[str] = [] rejected: list[dict] = [] for u_url in urls: @@ -2622,7 +2608,7 @@ def create_app() -> Flask: @require_bearer def admin_sterilize_bulk_start(): """Bearer-authed alternate to /api/sterilize/bulk-start. Body: - {"started_by_sub": "cobb@sulkta.com"} + {"started_by_sub": "user@example.com"} Resolves that user's household + decrypts their stored Mealie token + spawns a preview thread. Lets cauldron operators kick off bulk runs without needing a Flask session — same job state @@ -2690,7 +2676,7 @@ def create_app() -> Flask: @require_bearer def list_recipes_api(): # Defensive int parse — `?page=foo` previously raised ValueError - # and surfaced a 500 (audit CODE-9, 2026-05-02 PM). + # and surfaced a 500. try: page = max(1, int(request.args.get("page", "1") or "1")) except ValueError: @@ -2899,11 +2885,11 @@ def _index_row_to_card(row: dict, pick_slugs: set[str], mealie_public_url: str = def _const_eq(a: str, b: str) -> bool: - """Constant-time string compare for bearer-token validation. Audit - CVE-A3 (2026-05-02): the prior hand-rolled XOR loop early-returned - on length mismatch, which is itself a side-channel — an attacker - can probe the admin-bearer length. hmac.compare_digest handles - both length and content in constant time.""" + """Constant-time string compare for bearer-token validation. A + hand-rolled XOR loop that early-returns on length mismatch is + itself a side-channel — an attacker can probe the admin-bearer + length. hmac.compare_digest handles both length and content in + constant time.""" return hmac.compare_digest(a.encode(), b.encode()) diff --git a/cauldron/templates/discover.html b/cauldron/templates/discover.html index bf77196..da010ab 100644 --- a/cauldron/templates/discover.html +++ b/cauldron/templates/discover.html @@ -226,7 +226,7 @@ // crafted scraped image_url could close the url(...) string and inject // arbitrary CSS rules into the discover grid. With the // URL stays in HTML-attribute context end-to-end and `_esc` is - // sufficient. Audit CVE-NEW3-2 fix (2026-05-02 PM 3rd-pass). + // sufficient. // Defense-in-depth: only render the image element if the URL parses // as a well-formed http(s) URL — anything else falls back to the // placeholder. diff --git a/cauldron/templates/picks.html b/cauldron/templates/picks.html index b3ad6ac..2d99d0f 100644 --- a/cauldron/templates/picks.html +++ b/cauldron/templates/picks.html @@ -55,10 +55,10 @@