Public-flip audit: env-driven paths, scrub audit-ticket prefixes, terser README
Lucy bind paths + LAN host pins replaced with env defaults. Repository URLs → git.sulkta.com. Audit-changelog scaffolding stripped from inline comments (technical reasoning preserved). README sheds marketing scaffolding. AI-speak in load-bearing prompts/SOULs left alone — that IS the product.
This commit is contained in:
parent
84b16bc0ad
commit
592b4f1161
14 changed files with 201 additions and 270 deletions
50
.env.example
50
.env.example
|
|
@ -1,6 +1,5 @@
|
|||
# Cauldron — copy to /mnt/cache/appdata/secrets/cauldron.env on Lucy
|
||||
# (chmod 600, root:root). Some values are already populated by the deploy
|
||||
# bootstrap (CLAWDFORGE_*); fill in the rest before first start.
|
||||
# Cauldron — copy to .env (chmod 600). Point compose at it via
|
||||
# CAULDRON_ENV_FILE if you keep it elsewhere.
|
||||
|
||||
# Flask
|
||||
SECRET_KEY=change-me-32-bytes-of-entropy
|
||||
|
|
@ -9,12 +8,12 @@ SECRET_KEY=change-me-32-bytes-of-entropy
|
|||
BIND_HOST=0.0.0.0
|
||||
BIND_PORT=7790
|
||||
|
||||
# Mealie (recipes.sulkta.com is already wired with Authentik OIDC)
|
||||
MEALIE_BASE_URL=https://recipes.sulkta.com
|
||||
# Mealie
|
||||
MEALIE_BASE_URL=https://mealie.example.com
|
||||
MEALIE_API_TOKEN=
|
||||
|
||||
# clawdforge (centralized claude-runner on Lucy)
|
||||
CLAWDFORGE_URL=http://192.168.0.5:8800
|
||||
# clawdforge (claude-runner HTTP service)
|
||||
CLAWDFORGE_URL=http://clawdforge:8800
|
||||
CLAWDFORGE_TOKEN=
|
||||
DEFAULT_MODEL=sonnet
|
||||
DEFAULT_TIMEOUT_SECS=120
|
||||
|
|
@ -22,14 +21,15 @@ DEFAULT_TIMEOUT_SECS=120
|
|||
# Admin bearer for batch ops (sterilize-all, etc.) — separate from user OIDC
|
||||
ADMIN_BEARER=change-me-this-is-the-cauldron-admin-batch-token
|
||||
|
||||
# Authentik OIDC (provisioned 2026-04-28; client_id + secret minted by Authentik)
|
||||
OIDC_ISSUER=https://auth.sulkta.com/application/o/cauldron/
|
||||
# Authentik OIDC (or any OIDC provider that exposes
|
||||
# /.well-known/openid-configuration)
|
||||
OIDC_ISSUER=https://auth.example.com/application/o/cauldron/
|
||||
OIDC_CLIENT_ID=
|
||||
OIDC_CLIENT_SECRET=
|
||||
OIDC_REDIRECT_URI=http://192.168.0.5:7790/auth/callback
|
||||
OIDC_REDIRECT_URI=http://localhost:7790/auth/callback
|
||||
|
||||
# DB (sulkta-mariadb on the sulkta bridge)
|
||||
DB_HOST=sulkta-mariadb
|
||||
# DB
|
||||
DB_HOST=mariadb
|
||||
DB_PORT=3306
|
||||
DB_NAME=cauldron
|
||||
DB_USER=cauldron_app
|
||||
|
|
@ -39,28 +39,26 @@ DB_PASSWORD=
|
|||
# Generate with: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"
|
||||
CAULDRON_FERNET_KEY=
|
||||
|
||||
# --- Public-deploy hardening (added 2026-05-02 CVE audit) ---
|
||||
# Comma-separated list of authentik subjects who get the operator-tier
|
||||
# /me admin tools panel (consolidate, discover scrape). Empty = nobody.
|
||||
# Cobb's authentik sub goes here for production.
|
||||
# --- Public-deploy hardening ---
|
||||
# Comma-separated list of OIDC subjects who get the operator-tier /me
|
||||
# admin tools panel (consolidate, discover scrape). Empty = nobody.
|
||||
CAULDRON_ADMIN_SUBS=
|
||||
|
||||
# External base URL where cauldron is reachable. Set to your public host
|
||||
# (e.g. https://cauldron.sulkta.com) when going public; leave empty for
|
||||
# LAN-only HTTP. When set: enables CSRF Origin guard, HSTS, secure cookie.
|
||||
# External base URL where cauldron is reachable (e.g. https://cauldron.example.com).
|
||||
# Leave empty for LAN-only HTTP. When set: enables CSRF Origin guard,
|
||||
# HSTS, secure cookie.
|
||||
CAULDRON_BASE_URL=
|
||||
|
||||
# Whether the deploy is fronted by TLS (rackham apache → cauldron over
|
||||
# OpenVPN). Independent toggle from base_url so dev/staging can override.
|
||||
# When true: SESSION_COOKIE_SECURE=True, HSTS header emitted.
|
||||
# Whether the deploy is fronted by TLS. Independent toggle from base_url
|
||||
# so dev/staging can override. When true: SESSION_COOKIE_SECURE=True,
|
||||
# HSTS header emitted.
|
||||
CAULDRON_BEHIND_TLS=false
|
||||
|
||||
# Comma-separated CIDR list of trusted proxies whose X-Forwarded-* we
|
||||
# honor. Empty = trust nothing → ProxyFix is OFF and X-Forwarded-* are
|
||||
# stripped from every request. For the rackham→OpenVPN→lucy:7790 deploy,
|
||||
# set this to rackham's WireGuard-internal IP (e.g. 10.20.30.1/32). Any
|
||||
# X-Forwarded-* from a peer outside this list gets dropped before
|
||||
# ProxyFix sees it.
|
||||
# stripped from every request. Set this to the reverse-proxy peer's
|
||||
# address (e.g. 10.20.30.1/32). Any X-Forwarded-* from a peer outside
|
||||
# this list gets dropped before ProxyFix sees it.
|
||||
CAULDRON_TRUSTED_PROXIES=
|
||||
|
||||
# bugs.sulkta.com integration. Per-service key minted via:
|
||||
|
|
|
|||
130
README.md
130
README.md
|
|
@ -1,100 +1,60 @@
|
|||
# cauldron
|
||||
|
||||
Mealie-backed AI meal planner + shopping list for the family. LAN-only,
|
||||
internal tool. Mealie at `recipes.sulkta.com` is the source of truth for
|
||||
recipes / meal plans / shopping lists; cauldron is the AI layer + Abby's
|
||||
branded UI on top.
|
||||
Mealie-backed meal planner + shopping-list aggregator. Wraps a Mealie
|
||||
instance with: an ingredient sterilizer (free-form quantities → structured
|
||||
parses), a weekly meal-plan generator, and a household shopping list that
|
||||
collapses cross-recipe duplicates.
|
||||
|
||||
## Status
|
||||
## Stack
|
||||
|
||||
**v0.1 — backend bones (current).** Ingredient sterilizer endpoint working.
|
||||
No UI yet; bearer-auth API only. Frontend + Authentik OIDC arrives in v0.2.
|
||||
Native Kotlin Android in v0.5.
|
||||
- Flask + gunicorn, Python 3.12
|
||||
- Authentik (or any OIDC provider) for sessions
|
||||
- MariaDB for per-user prefs + Fernet-encrypted Mealie tokens
|
||||
- [clawdforge](https://github.com/Sulkta-Coop/clawdforge) for the AI layer
|
||||
|
||||
## Surface (v0.1)
|
||||
Mealie remains the source of truth for recipes / plans / shopping lists.
|
||||
Cauldron stores per-user prefs + cached metadata only.
|
||||
|
||||
## Run
|
||||
|
||||
```bash
|
||||
cp .env.example .env # fill in secrets
|
||||
docker compose up -d --build
|
||||
curl http://localhost:7790/healthz
|
||||
```
|
||||
|
||||
`CAULDRON_ENV_FILE=/path/to/secrets.env docker compose up -d` if your env
|
||||
file lives outside the repo.
|
||||
|
||||
## Endpoints
|
||||
|
||||
```
|
||||
GET /healthz liveness + clawdforge upstream
|
||||
GET /api/recipes list Mealie recipes (paginated)
|
||||
POST /api/sterilize/preview/<slug> dry-run AI parse, return proposals
|
||||
POST /api/sterilize/apply/<slug> write parses back to Mealie
|
||||
GET /login /auth/callback OIDC flow
|
||||
GET /me account + integration status
|
||||
GET /plan /list household plan + shopping list
|
||||
GET /api/recipes (admin bearer) proxy Mealie list
|
||||
POST /api/sterilize/preview/<slug> (admin bearer) dry-run parser
|
||||
POST /api/sterilize/apply/<slug> (admin bearer) write parses back
|
||||
```
|
||||
|
||||
All routes except `/healthz` require `Authorization: Bearer <ADMIN_BEARER>`.
|
||||
Admin-bearer endpoints expect `Authorization: Bearer $ADMIN_BEARER`.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
Abby's phone (later: Kotlin app)
|
||||
│
|
||||
▼
|
||||
cauldron (Flask, port 7790, LAN-only)
|
||||
├─ Mealie API client ─── recipes.sulkta.com (source of truth)
|
||||
├─ clawdforge client ─── 192.168.0.5:8800 (claude -p runner)
|
||||
└─ Authentik OIDC (v0.2)
|
||||
```
|
||||
|
||||
cauldron does NOT hold its own database in v0.1 — all state lives in Mealie.
|
||||
A small Postgres/MariaDB schema lands in v0.2 for Abby-specific prefs +
|
||||
chat history.
|
||||
|
||||
## Ingredient sterilizer
|
||||
|
||||
Mealie's CRF parser is mediocre. Cobb's hand-typed recipes have lots of
|
||||
free-form quantity strings ("about 2 cups cooked white rice", "1 small
|
||||
handful kale", "a pinch of salt") that don't aggregate cleanly into a
|
||||
shopping list.
|
||||
|
||||
The sterilizer batches all ingredients of one recipe into a single Sonnet
|
||||
call (via clawdforge), gets back parallel structured parses, then on apply
|
||||
links each parse to existing Mealie food/unit records (creating any missing
|
||||
by name) and PUTs the recipe back.
|
||||
|
||||
Preview is non-destructive — review proposals before apply.
|
||||
|
||||
```bash
|
||||
# Dry-run preview
|
||||
curl -sS -X POST -H "Authorization: Bearer $ADMIN_BEARER" \
|
||||
http://192.168.0.5:7790/api/sterilize/preview/spaghetti-bolognese | jq .
|
||||
|
||||
# Apply (creates missing foods/units by default)
|
||||
curl -sS -X POST -H "Authorization: Bearer $ADMIN_BEARER" \
|
||||
http://192.168.0.5:7790/api/sterilize/apply/spaghetti-bolognese | jq .
|
||||
```
|
||||
|
||||
## Deploy
|
||||
|
||||
1. `ssh lucy`
|
||||
2. `cd /mnt/user/appdata && git clone <gitea-url> cauldron && cd cauldron/build`
|
||||
(or wherever the deploy convention lands)
|
||||
3. Drop `.env` at `/mnt/cache/appdata/secrets/cauldron.env` (chmod 600 root:root)
|
||||
- `CLAWDFORGE_TOKEN` is already populated by the bootstrap (see `memory/2026-04-28.md`)
|
||||
- `MEALIE_API_TOKEN` — mint at `recipes.sulkta.com` → user → API tokens
|
||||
- `ADMIN_BEARER` — pick 32 bytes of entropy
|
||||
- `SECRET_KEY` — 32 bytes for Flask sessions
|
||||
4. `docker compose up -d --build`
|
||||
5. Smoke: `curl http://192.168.0.5:7790/healthz`
|
||||
|
||||
## Roadmap
|
||||
|
||||
- v0.1 ✓ — sterilizer backend + Flask shell
|
||||
- v0.2 — Authentik OIDC, Abby-branded web UI, palette CSS, postgres for prefs
|
||||
- v0.3 — meal plan generator (week → Mealie meal plan write)
|
||||
- v0.4 — shopping list aggregator (read meal plan → consolidated grocery list)
|
||||
- v0.5 — native Kotlin + Compose Android app (read-only shopping list + plan view)
|
||||
|
||||
## Repo layout
|
||||
## Layout
|
||||
|
||||
```
|
||||
cauldron/
|
||||
├─ cauldron/
|
||||
│ ├─ config.py env-driven config
|
||||
│ ├─ forge.py clawdforge HTTP client
|
||||
│ ├─ mealie.py Mealie API client
|
||||
│ ├─ sterilizer.py ingredient parse + apply pipeline
|
||||
│ └─ server.py Flask app
|
||||
├─ Dockerfile
|
||||
├─ compose.yml
|
||||
├─ requirements.txt
|
||||
└─ .env.example
|
||||
config.py env-driven config
|
||||
forge.py clawdforge HTTP client
|
||||
mealie.py Mealie API client
|
||||
sterilizer.py ingredient parse + apply pipeline
|
||||
aggregator.py cross-recipe shopping aggregator
|
||||
server.py Flask app
|
||||
scripts/
|
||||
build_foods_seed.py USDA → foods seed
|
||||
clean_foods_seed.py clawdforge-curated cleanup pass
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
MIT.
|
||||
|
|
|
|||
|
|
@ -189,8 +189,8 @@ def _aggregate_one_food(
|
|||
"""All ingredients for ONE food → 1+ ShoppingLines."""
|
||||
# Bucket by unit class. Ingredients with qty=None go to a separate
|
||||
# `no_qty` bucket so they DON'T silently disappear from the shopping
|
||||
# list when Mealie's parser couldn't extract a number (audit F-15
|
||||
# domain, 2026-05-02). The killer feature should surface "buy onion"
|
||||
# list when Mealie's parser couldn't extract a number. The killer
|
||||
# feature should surface "buy onion"
|
||||
# even if the source recipe just said "1 onion, chopped" without a
|
||||
# parseable quantity.
|
||||
buckets: dict[str, list[tuple[Ingredient, float]]] = {
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ class Config:
|
|||
oidc_client_secret: str
|
||||
oidc_redirect_uri: str
|
||||
|
||||
# DB (sulkta-mariadb)
|
||||
# DB
|
||||
db_host: str
|
||||
db_port: int
|
||||
db_name: str
|
||||
|
|
@ -50,20 +50,18 @@ class Config:
|
|||
# - werkzeug.middleware.proxy_fix.ProxyFix is wrapped (1 hop trusted)
|
||||
base_url: str
|
||||
|
||||
# Whether the deploy is fronted by TLS (rackham Apache → cauldron over OpenVPN).
|
||||
# Independent toggle from base_url so dev/staging can override.
|
||||
# Whether the deploy is fronted by TLS. Independent toggle from
|
||||
# base_url so dev/staging can override.
|
||||
behind_tls: bool
|
||||
|
||||
# Comma-separated list of CIDRs (or single IPs) whose X-Forwarded-*
|
||||
# headers we trust. Empty = trust nothing → ProxyFix is NOT enabled
|
||||
# and incoming X-Forwarded-* headers from any peer are stripped before
|
||||
# they reach the app. When non-empty, only the listed peers can set
|
||||
# the perceived scheme/host/port. Audit CVE-NEW-6 (2026-05-02 PM):
|
||||
# the prior `if cfg.behind_tls: ProxyFix(...)` trusted X-Forwarded-*
|
||||
# from ANY peer that could reach :7790 — including other containers
|
||||
# on the sulkta docker network, since gunicorn binds 0.0.0.0:7790.
|
||||
# An attacker on a sibling container could spoof X-Forwarded-Proto
|
||||
# and have request.is_secure return True even on plain HTTP.
|
||||
# and incoming X-Forwarded-* headers from any peer are stripped
|
||||
# before they reach the app. When non-empty, only the listed peers
|
||||
# can set the perceived scheme/host/port. Required because gunicorn
|
||||
# binds 0.0.0.0:7790; without it any sibling container on the same
|
||||
# docker network could spoof X-Forwarded-Proto and have
|
||||
# request.is_secure return True on plain HTTP.
|
||||
trusted_proxies: tuple[str, ...]
|
||||
|
||||
# bugs.sulkta.com integration (vendored SDK at cauldron/vendor/bugs_sulkta).
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ def _cluster(
|
|||
a few seconds. For larger catalogs (10K+) the inner loop polls
|
||||
cancel_check every 5K comparisons so a user-initiated cancel can
|
||||
abort cleanly mid-scan rather than waiting tens of seconds. 2nd-pass
|
||||
audit fix CODE-1 (2026-05-02 PM)."""
|
||||
cleanly mid-scan."""
|
||||
n = len(foods)
|
||||
names = [(f.get("name") or "").strip().lower() for f in foods]
|
||||
pairs: list[list[dict]] = []
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
"""DB access + migrations against sulkta-mariadb.
|
||||
"""DB access + migrations against MariaDB.
|
||||
|
||||
Uses PyMySQL with a tiny per-request connection (no pool) — Cauldron is
|
||||
LAN-only family-internal, traffic is single-digit qps. If load ever grows
|
||||
swap in DBUtils.PooledDB or SQLAlchemy.
|
||||
Uses PyMySQL with a tiny per-request connection (no pool) — traffic is
|
||||
single-digit qps. If load ever grows swap in DBUtils.PooledDB or
|
||||
SQLAlchemy.
|
||||
"""
|
||||
from contextlib import contextmanager
|
||||
from pathlib import Path
|
||||
|
|
@ -607,12 +607,12 @@ MIGRATIONS = [
|
|||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4
|
||||
""",
|
||||
# 039 — Per-household discover skips. Replaces the global
|
||||
# `cauldron_discovered_recipes.status='rejected'` write — that flipped
|
||||
# the row for EVERY household in EVERY group (audit finding F-2 routes,
|
||||
# 2026-05-02). Different households have different tastes; skip is
|
||||
# per-household. The global status column stays for spam URLs an admin
|
||||
# wants to nuke for everyone (set via a future bearer-only endpoint);
|
||||
# routine "not interested" goes here.
|
||||
# `cauldron_discovered_recipes.status='rejected'` write — that
|
||||
# flipped the row for EVERY household in EVERY group. Different
|
||||
# households have different tastes; skip is per-household. The
|
||||
# global status column stays for spam URLs an admin wants to nuke
|
||||
# for everyone (set via a future bearer-only endpoint); routine
|
||||
# "not interested" goes here.
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS cauldron_discover_skips (
|
||||
discover_id BIGINT NOT NULL,
|
||||
|
|
@ -1900,8 +1900,8 @@ class DB:
|
|||
with cauldron_recipe_meta to derive their picking pattern:
|
||||
|
||||
{
|
||||
"cobb@sulkta.com": {
|
||||
"display_name": "cobb",
|
||||
"user@example.com": {
|
||||
"display_name": "user",
|
||||
"total_picks": 24,
|
||||
"cuisines": {"asian": 6, "mexican": 4, "italian": 3, ...},
|
||||
"proteins": {"chicken": 8, "beef": 5, "fish": 2, ...},
|
||||
|
|
|
|||
|
|
@ -87,15 +87,15 @@ def _cluster_by_name(
|
|||
cancel_check: Callable[[], bool] | None = None,
|
||||
) -> list[list[dict]]:
|
||||
"""Pair-based: emit one 2-recipe candidate per (i, j) where
|
||||
token_set_ratio >= threshold. 2nd-pass audit fix (CODE-2, 2026-05-02 PM):
|
||||
the previous single-link agglomerative chained weak similarities through
|
||||
the recipe corpus the same way it did with foods — `chicken alfredo`
|
||||
→ `chicken parm` → `parm chicken cutlets` → ... — collapsing dozens of
|
||||
unrelated recipes into one megacluster that Sonnet then had to refuse.
|
||||
token_set_ratio >= threshold. The previous single-link agglomerative
|
||||
approach chained weak similarities through the recipe corpus the
|
||||
same way it did with foods — `chicken alfredo` → `chicken parm` →
|
||||
`parm chicken cutlets` → ... — collapsing dozens of unrelated
|
||||
recipes into one megacluster that Sonnet then had to refuse.
|
||||
Mirrors the pattern used in `consolidate_foods._cluster`.
|
||||
|
||||
cancel_check, when provided, is polled every 5K pair-comparisons so a
|
||||
user-initiated cancel can abort a long scan early (CODE-1 fix). On
|
||||
user-initiated cancel can abort a long scan early. On
|
||||
cancel we return the pairs accumulated so far rather than raising —
|
||||
the caller's _cancelled() in run_walk will catch and exit cleanly."""
|
||||
n = len(recipes)
|
||||
|
|
@ -265,7 +265,7 @@ def run_apply(*, db: DB, job_id: int, mealie: Mealie) -> None:
|
|||
# (A,B) approved+deleted; later (A,C) tries to delete A
|
||||
# again. Mealie returns 404 — treat that as already-
|
||||
# handled, not an error. Mirrors the consolidate
|
||||
# apply path. 3rd-pass audit fix CODE3-1 (2026-05-02 PM).
|
||||
# apply path.
|
||||
if "404" in msg or "not found" in msg.lower():
|
||||
log.info("[dedupe-recipes:%s] delete %s: stale (already removed)", job_id, slug)
|
||||
continue
|
||||
|
|
|
|||
|
|
@ -88,10 +88,9 @@ def is_public_url(url: str) -> tuple[bool, str]:
|
|||
non-loopback, non-link-local IP. Returns (ok, reason).
|
||||
|
||||
Used by both `/api/discover/scrape-start` (pre-queue rejection) and
|
||||
`_scrape_one` (defense-in-depth before fetch). Audit finding F-1
|
||||
(CRIT, 2026-05-02): without this, any session user could queue URLs
|
||||
pointing at Lucy's LAN, the docker bridge, or cloud metadata
|
||||
endpoints (169.254.169.254, etc).
|
||||
`_scrape_one` (defense-in-depth before fetch). Without this, any
|
||||
session user could queue URLs pointing at the LAN, the docker
|
||||
bridge, or cloud metadata endpoints (169.254.169.254, etc).
|
||||
|
||||
Strategy:
|
||||
1. Parse the URL. Reject non-http(s) schemes.
|
||||
|
|
@ -232,9 +231,8 @@ def _scrape_one(url: str) -> tuple[dict, str | None] | None:
|
|||
# allow_redirects=False: is_public_url validated the
|
||||
# original host as public; a 30x to 127.0.0.1 / 169.254.x
|
||||
# would otherwise route this scrape worker at internal
|
||||
# services (LAN scanner, cloud metadata IMDS). 3rd-pass
|
||||
# audit fix CVE-NEW3-1 (2026-05-02 PM): treat 30x as
|
||||
# scrape failure rather than chase the redirect chain.
|
||||
# services (LAN scanner, cloud metadata IMDS). Treat 30x
|
||||
# as scrape failure rather than chase the redirect chain.
|
||||
# The recipe_scrapers primary path has its own internal
|
||||
# request chain that's a known residual — the docstring
|
||||
# on is_public_url notes the long-term answer is a
|
||||
|
|
|
|||
|
|
@ -134,10 +134,10 @@ def run_enrich(
|
|||
|
||||
# Heartbeat between Sonnet sub-calls so a slow verify_allergens
|
||||
# doesn't push last_progress_at past db.fail_stuck_enrich_jobs's
|
||||
# stale_minutes (15) window. Audit CODE-5 (2026-05-02 PM):
|
||||
# without this, two ~3-4-min Sonnet calls back-to-back could
|
||||
# straddle the 15-min staleness gate and a still-alive job
|
||||
# would be incorrectly reaped at next worker restart.
|
||||
# stale_minutes (15) window. Without this, two ~3-4-min
|
||||
# Sonnet calls back-to-back could straddle the 15-min
|
||||
# staleness gate and a still-alive job would be incorrectly
|
||||
# reaped at next worker restart.
|
||||
db.update_enrich_job_progress(job_id, current_slug=slug)
|
||||
|
||||
# Verification pass: re-check contains.* booleans with a strict
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
"""Flask app — v0.2 foundation.
|
||||
|
||||
Adds Authentik OIDC + sulkta-mariadb DB + Fernet crypto for per-user Mealie
|
||||
tokens. v0.1 admin endpoints stay (still bearer-gated for now); user-facing
|
||||
routes start using OIDC sessions.
|
||||
Adds Authentik OIDC + MariaDB + Fernet crypto for per-user Mealie tokens.
|
||||
v0.1 admin endpoints stay (still bearer-gated for now); user-facing routes
|
||||
start using OIDC sessions.
|
||||
|
||||
Routes (current):
|
||||
GET /healthz liveness, no auth
|
||||
|
|
@ -64,29 +64,24 @@ def create_app() -> Flask:
|
|||
app.config.update(
|
||||
SESSION_COOKIE_HTTPONLY=True,
|
||||
SESSION_COOKIE_SAMESITE="Lax",
|
||||
# SESSION_COOKIE_SECURE: env-gated. ON when behind TLS so the
|
||||
# session cookie won't ride over plain HTTP (sslstrip / mixed-
|
||||
# content downgrade). Audit CVE-D1 (2026-05-02). Off in dev
|
||||
# so LAN HTTP development still works.
|
||||
# ON when behind TLS so the session cookie won't ride plain HTTP
|
||||
# (sslstrip / mixed-content downgrade). Off in dev so plain-HTTP
|
||||
# local development still works.
|
||||
SESSION_COOKIE_SECURE=cfg.behind_tls,
|
||||
# 14-day session lifetime + refresh-each-request idle slide.
|
||||
# Audit CVE-D2 (2026-05-02): Flask's default has no expiry.
|
||||
# Flask's default has no expiry.
|
||||
PERMANENT_SESSION_LIFETIME=timedelta(days=14),
|
||||
SESSION_REFRESH_EACH_REQUEST=True,
|
||||
# 1 MiB body cap. Cauldron POSTs are tiny JSON; no legitimate
|
||||
# reason for a request body to exceed this. Audit CVE-G1.
|
||||
# 1 MiB body cap. All POSTs are tiny JSON.
|
||||
MAX_CONTENT_LENGTH=1 * 1024 * 1024,
|
||||
)
|
||||
|
||||
# X-Forwarded-* trust chain: parse the trusted-proxy CIDR list once
|
||||
# at boot. Empty = trust nothing → ProxyFix is NOT enabled and any
|
||||
# X-Forwarded-* headers from any peer get stripped before they reach
|
||||
# the app. Audit CVE-NEW-6 (2026-05-02 PM 2nd-pass): the prior
|
||||
# `if cfg.behind_tls: ProxyFix(...)` trusted X-Forwarded-* from any
|
||||
# peer that could reach :7790 — including sibling containers on the
|
||||
# sulkta docker network, since gunicorn binds 0.0.0.0:7790. An
|
||||
# attacker on a co-located container could spoof X-Forwarded-Proto
|
||||
# and have request.is_secure return True even on plain HTTP.
|
||||
# the app. Required because gunicorn binds 0.0.0.0:7790 — without it
|
||||
# any sibling on the same docker network could spoof X-Forwarded-Proto
|
||||
# and have request.is_secure return True on plain HTTP.
|
||||
_trusted_networks: list[ipaddress.IPv4Network | ipaddress.IPv6Network] = []
|
||||
for entry in cfg.trusted_proxies:
|
||||
try:
|
||||
|
|
@ -229,28 +224,24 @@ def create_app() -> Flask:
|
|||
u = session.get("user") or {}
|
||||
return {"is_admin": u.get("sub") in cfg.admin_subs}
|
||||
|
||||
# CSRF Origin/Referer check (audit CVE-A1, 2026-05-02).
|
||||
# CSRF Origin/Referer check.
|
||||
# SAMESITE=Lax alone doesn't cover same-site subdomain CSRF (a
|
||||
# compromised *.sulkta.com page POSTing to cauldron.sulkta.com
|
||||
# carries cookies). When CAULDRON_BASE_URL is set, every state-
|
||||
# mutating request must EXACTLY match origin (scheme+host+port).
|
||||
# Bearer-token API calls are exempt — no cookie means no CSRF
|
||||
# surface. Pure-GET/HEAD/OPTIONS are exempt.
|
||||
# compromised sibling subdomain POSTing carries cookies). When
|
||||
# CAULDRON_BASE_URL is set, every state-mutating request must
|
||||
# EXACTLY match origin (scheme+host+port). Bearer-token API calls
|
||||
# are exempt — no cookie means no CSRF surface. GET/HEAD/OPTIONS
|
||||
# are exempt.
|
||||
#
|
||||
# 2nd-pass audit fix (2026-05-02 PM, CVE-NEW-1): the original guard
|
||||
# used `startswith(cfg.base_url)` which is bypassable by an attacker
|
||||
# registering `cauldron.sulkta.com.evil.com` — its Origin string
|
||||
# starts-with `https://cauldron.sulkta.com`. Switched to parsed-
|
||||
# origin equality so the host comparison is byte-exact at the
|
||||
# netloc boundary.
|
||||
# Note: do NOT compare with startswith() on the raw base_url. An
|
||||
# attacker registering `cauldron.example.com.evil.com` would pass
|
||||
# such a check. Compare parsed origins so the host match is byte-
|
||||
# exact at the netloc boundary.
|
||||
def _origin_of(url: str) -> str:
|
||||
"""RFC-normalized origin: lowercase scheme + lowercase host,
|
||||
plus port unless it's the scheme's default. 3rd-pass audit fix
|
||||
CVE-NEW3-3 (2026-05-02 PM): the prior byte-equality compare
|
||||
could false-reject browsers that send `Origin: https://Cauldron.SULKTA.com`
|
||||
(some preserve case in netloc) or `https://x.com:443` against a
|
||||
bare `https://x.com` base. urlparse already lowercases scheme but
|
||||
NOT host, and doesn't drop default ports."""
|
||||
plus port unless it's the scheme's default. urlparse lowercases
|
||||
scheme but NOT host, and doesn't drop default ports, so a naive
|
||||
byte compare can false-reject `Origin: https://Cauldron.example.com`
|
||||
or `https://x.com:443` vs a bare `https://x.com` base."""
|
||||
if not url:
|
||||
return ""
|
||||
try:
|
||||
|
|
@ -294,9 +285,9 @@ def create_app() -> Flask:
|
|||
return jsonify({"error": "csrf_origin_mismatch"}), 403
|
||||
return ("Cross-origin request rejected.", 403)
|
||||
|
||||
# Security response headers (audit CVE-E1, 2026-05-02). HSTS only
|
||||
# when behind TLS (sending HSTS over HTTP is invalid). CSP is
|
||||
# permissive on inline because templates use them; tighten later.
|
||||
# Security response headers. HSTS only when behind TLS (sending HSTS
|
||||
# over HTTP is invalid). CSP is permissive on inline because templates
|
||||
# use them; tighten later.
|
||||
@app.after_request
|
||||
def _security_headers(resp):
|
||||
resp.headers.setdefault("X-Frame-Options", "DENY")
|
||||
|
|
@ -349,7 +340,7 @@ def create_app() -> Flask:
|
|||
"""Layered on top of require_session — only members of cfg.admin_subs
|
||||
proceed. Non-admins get a 404 (not a 403) so the route's existence
|
||||
isn't advertised. Used for `/discover` and `/consolidate` whose
|
||||
admin-only nature was scoped per Cobb 2026-05-02."""
|
||||
admin-only nature was scoped intentionally."""
|
||||
@wraps(fn)
|
||||
def w(*a, **kw):
|
||||
u = session.get("user")
|
||||
|
|
@ -434,11 +425,10 @@ def create_app() -> Flask:
|
|||
"""Canonical household_id source for session-authenticated routes.
|
||||
EVERY session-auth handler that scopes data by household MUST get
|
||||
its `hid` from this helper, never from request body / query / form
|
||||
— otherwise we open a cross-household read/write surface (audit
|
||||
CODE-4 convention, 2026-05-02 PM 2nd-pass). Admin-bearer endpoints
|
||||
legitimately derive hid from `started_by_sub` because the bearer
|
||||
IS the trust anchor for those calls; that path is documented
|
||||
separately at the admin endpoints."""
|
||||
— otherwise we open a cross-household read/write surface.
|
||||
Admin-bearer endpoints legitimately derive hid from
|
||||
`started_by_sub` because the bearer IS the trust anchor for
|
||||
those calls; that path is documented at the admin endpoints."""
|
||||
u = session.get("user")
|
||||
if not u:
|
||||
return None
|
||||
|
|
@ -463,11 +453,10 @@ def create_app() -> Flask:
|
|||
@app.get("/healthz")
|
||||
def healthz():
|
||||
"""Liveness probe. Public, intentionally minimal — returns
|
||||
{"ok": true} or {"ok": false} ONLY. Audit CVE-E3 (2026-05-02):
|
||||
the previous version echoed upstream error strings (clawdforge URL,
|
||||
DB error message including hostname/user) which leak internal LAN
|
||||
topology to anyone who can reach cauldron.sulkta.com/healthz.
|
||||
Detailed upstream check moved to /api/admin/healthz (bearer-only)."""
|
||||
{"ok": true} or {"ok": false} ONLY. Never echo upstream error
|
||||
strings here: clawdforge URLs, DB hostnames/users, etc. would
|
||||
leak internal topology to anyone who can reach /healthz.
|
||||
Detailed upstream check lives at /api/admin/healthz (bearer-only)."""
|
||||
try:
|
||||
with db.conn() as c, c.cursor() as cur:
|
||||
cur.execute("SELECT 1")
|
||||
|
|
@ -505,11 +494,11 @@ def create_app() -> Flask:
|
|||
|
||||
def _safe_next(nxt: str | None) -> str:
|
||||
"""Validate a post-login redirect target is a same-origin local
|
||||
path. Defense-in-depth open-redirect guard — we apply this BOTH
|
||||
at the /login stash AND at /auth/callback consumption (CVE-NEW-3
|
||||
audit fix 2026-05-02 PM). The double-check protects against any
|
||||
future code path that writes session['post_login_next'] outside
|
||||
of /login, and against percent-encoded path tricks."""
|
||||
path. Defense-in-depth open-redirect guard — applied BOTH at the
|
||||
/login stash AND at /auth/callback consumption. The double-check
|
||||
protects against any future code path that writes
|
||||
session['post_login_next'] outside of /login, and against
|
||||
percent-encoded path tricks."""
|
||||
if not nxt:
|
||||
return "/me"
|
||||
# Must start with `/` and only `/`. Reject `//foo`, `/\\foo`,
|
||||
|
|
@ -526,11 +515,10 @@ def create_app() -> Flask:
|
|||
return "/me"
|
||||
# Allow only a strict path charset. Anything weirder lands at /me.
|
||||
# Path component is everything before the optional `?` / `#`.
|
||||
# `%` is allowed for percent-encoded chars (3rd-pass audit fix
|
||||
# CODE3-3, 2026-05-02 PM) so paths like /recipes/spaghetti%20bol
|
||||
# don't silently land at /me. Defense-in-depth: percent-decode
|
||||
# the path and re-validate so encoded path-traversal `%2e%2e/`
|
||||
# is still caught.
|
||||
# `%` is allowed for percent-encoded chars so paths like
|
||||
# /recipes/spaghetti%20bol don't silently land at /me.
|
||||
# Defense-in-depth: percent-decode and re-validate so encoded
|
||||
# path-traversal `%2e%2e/` is still caught.
|
||||
path = p.path or "/"
|
||||
for ch in path:
|
||||
if not (ch.isalnum() or ch in "-_./%"):
|
||||
|
|
@ -551,11 +539,10 @@ def create_app() -> Flask:
|
|||
# otherwise route an authenticated user to an attacker page
|
||||
# right after OIDC handshake.
|
||||
nxt = _safe_next(request.args.get("next"))
|
||||
# Already-authenticated users skip OIDC entirely (CVE-NEW-5 fix,
|
||||
# 2026-05-02 PM): a malicious cross-origin link
|
||||
# `<a href="…/login?next=/some-poisoned-path">` would otherwise
|
||||
# silently re-trigger the OIDC handshake on a logged-in user
|
||||
# and hand them off to the attacker-supplied next= path.
|
||||
# Already-authenticated users skip OIDC entirely. Otherwise a
|
||||
# malicious cross-origin link `<a href=".../login?next=/poison">`
|
||||
# could silently re-trigger the OIDC handshake on a logged-in
|
||||
# user and hand them off to the attacker-supplied next= path.
|
||||
if session.get("user"):
|
||||
return redirect(nxt)
|
||||
session["post_login_next"] = nxt
|
||||
|
|
@ -564,7 +551,7 @@ def create_app() -> Flask:
|
|||
@app.get("/auth/callback")
|
||||
def auth_callback():
|
||||
# Wrap the OIDC exchange so transient DNS/JWKS hiccups (resolver
|
||||
# blip on auth.sulkta.com → ConnectionError → 500) render a
|
||||
# blip on the auth host → ConnectionError → 500) render a
|
||||
# friendly retry page instead of dumping a stack trace, AND
|
||||
# clear the stashed state so the user's retry doesn't trip the
|
||||
# MismatchingState CSRF guard from a stale state cookie.
|
||||
|
|
@ -589,10 +576,9 @@ def create_app() -> Flask:
|
|||
), 400
|
||||
except OAuthError as e:
|
||||
# Log the full Authentik error server-side; render only a
|
||||
# generic detail to the user. Audit CVE-NEW-8 (2026-05-02 PM):
|
||||
# the prior `f"auth handshake failed: {e}"` echoed Authentik
|
||||
# error codes (e.g. invalid_client_id) into the auth_retry
|
||||
# page — anyone who can hit /auth/callback?state=evil could
|
||||
# generic detail to the user. Don't echo provider error
|
||||
# codes (e.g. invalid_client_id) into the auth_retry page —
|
||||
# anyone who can hit /auth/callback?state=evil could otherwise
|
||||
# probe Authentik internals via the rendered detail.
|
||||
app.logger.warning("OIDC callback: oauth error: %s", e)
|
||||
session.pop("_state_cauldron_authlib", None)
|
||||
|
|
@ -616,13 +602,12 @@ def create_app() -> Flask:
|
|||
# session is just a serialized dict in the cookie body, but
|
||||
# carrying any pre-auth key into the authenticated state is the
|
||||
# session-fixation/contamination shape best-practice asks us to
|
||||
# avoid. 3rd-pass audit fix INFO3-2 (2026-05-02 PM).
|
||||
# avoid.
|
||||
session.clear()
|
||||
session["user"] = {"sub": sub, "email": email, "name": name}
|
||||
# Mark session permanent so PERMANENT_SESSION_LIFETIME (14d) is
|
||||
# honored. Without this, Flask treats the session as a browser-
|
||||
# session cookie (no Expires) and tab-close kills it. Audit
|
||||
# CVE-D2 (2026-05-02).
|
||||
# session cookie (no Expires) and tab-close kills it.
|
||||
session.permanent = True
|
||||
return redirect(nxt)
|
||||
|
||||
|
|
@ -863,9 +848,9 @@ def create_app() -> Flask:
|
|||
# — and `picks.html` interpolated the slug straight into a JS
|
||||
# `onclick='removePick('{{ slug }}', ...)'` literal, opening a
|
||||
# stored-XSS surface where any household member viewing /picks
|
||||
# ran the attacker's JS. Audit CVE-NEW-2 (2026-05-02 PM 2nd-pass).
|
||||
# Also closes the prompt-injection-via-poison-slug vector since
|
||||
# the planner would otherwise pass garbage slugs to Sonnet.
|
||||
# ran the attacker's JS. Also closes the prompt-injection-via-
|
||||
# poison-slug vector — the planner would otherwise pass garbage
|
||||
# slugs straight to Sonnet.
|
||||
hid = current_household_id()
|
||||
if hid is None:
|
||||
return jsonify({"ok": False, "error": "no_household"}), 400
|
||||
|
|
@ -2311,7 +2296,7 @@ def create_app() -> Flask:
|
|||
db.finalize_consolidate_job(job_id, state="cancelled")
|
||||
return jsonify({"ok": True})
|
||||
|
||||
# admin variants for kayos kick-off
|
||||
# admin variants for operator-driven kickoff (no user session)
|
||||
@app.post("/api/admin/foods/consolidate-start")
|
||||
@require_bearer
|
||||
def admin_consolidate_start():
|
||||
|
|
@ -2431,9 +2416,10 @@ def create_app() -> Flask:
|
|||
db.get_discover_imports_for_group(mealie_group_id=my_group_id)
|
||||
if my_group_id else {}
|
||||
)
|
||||
# Per-household skips (audit F-2 routes — was a global flip).
|
||||
# Default view filters out rows the caller's household has skipped;
|
||||
# `?status=skipped` surfaces them so the user can unskip if needed.
|
||||
# Per-household skips (was a global flip — now scoped per
|
||||
# household). Default view filters out rows the caller's
|
||||
# household has skipped; `?status=skipped` surfaces them so the
|
||||
# user can unskip if needed.
|
||||
my_skipped = (
|
||||
db.get_skipped_discover_ids_for_household(household_id=my_hid)
|
||||
if my_hid else set()
|
||||
|
|
@ -2519,12 +2505,12 @@ def create_app() -> Flask:
|
|||
@app.post("/api/discover/reject/<int:discover_id>")
|
||||
@require_admin
|
||||
def discover_reject(discover_id: int):
|
||||
"""Per-household 'skip from discover'. Audit F-2 routes 2026-05-02:
|
||||
previously this flipped the GLOBAL `cauldron_discovered_recipes.status
|
||||
= 'rejected'` field, hiding the recipe from every household in every
|
||||
group. Now writes to `cauldron_discover_skips(discover_id, household_id)`
|
||||
— only the caller's household stops seeing it; other households are
|
||||
unaffected. Different households have different tastes."""
|
||||
"""Per-household 'skip from discover'. Previously this flipped
|
||||
the GLOBAL `cauldron_discovered_recipes.status = 'rejected'`
|
||||
field, hiding the recipe from every household in every group.
|
||||
Now writes to `cauldron_discover_skips(discover_id,
|
||||
household_id)` — only the caller's household stops seeing it;
|
||||
other households are unaffected."""
|
||||
u = session["user"]
|
||||
row = db.get_discovered_recipe(discover_id)
|
||||
if not row:
|
||||
|
|
@ -2566,11 +2552,11 @@ def create_app() -> Flask:
|
|||
urls = [x for x in urls if x.startswith(("http://", "https://"))][:50]
|
||||
if not urls:
|
||||
return jsonify({"error": "no valid http(s) urls"}), 400
|
||||
# SSRF guard (audit F-1 routes, CRIT, 2026-05-02): every URL must
|
||||
# resolve to a public IP. Reject any that hit private / loopback /
|
||||
# link-local / multicast / reserved space — those are LAN, docker
|
||||
# bridge, or cloud metadata endpoints. Apply BEFORE queueing so
|
||||
# the caller gets a clean error per bad URL.
|
||||
# SSRF guard: every URL must resolve to a public IP. Reject any
|
||||
# that hit private / loopback / link-local / multicast / reserved
|
||||
# space — those are LAN, docker bridge, or cloud metadata
|
||||
# endpoints. Apply BEFORE queueing so the caller gets a clean
|
||||
# error per bad URL.
|
||||
accepted: list[str] = []
|
||||
rejected: list[dict] = []
|
||||
for u_url in urls:
|
||||
|
|
@ -2622,7 +2608,7 @@ def create_app() -> Flask:
|
|||
@require_bearer
|
||||
def admin_sterilize_bulk_start():
|
||||
"""Bearer-authed alternate to /api/sterilize/bulk-start. Body:
|
||||
{"started_by_sub": "cobb@sulkta.com"}
|
||||
{"started_by_sub": "user@example.com"}
|
||||
Resolves that user's household + decrypts their stored Mealie
|
||||
token + spawns a preview thread. Lets cauldron operators kick
|
||||
off bulk runs without needing a Flask session — same job state
|
||||
|
|
@ -2690,7 +2676,7 @@ def create_app() -> Flask:
|
|||
@require_bearer
|
||||
def list_recipes_api():
|
||||
# Defensive int parse — `?page=foo` previously raised ValueError
|
||||
# and surfaced a 500 (audit CODE-9, 2026-05-02 PM).
|
||||
# and surfaced a 500.
|
||||
try:
|
||||
page = max(1, int(request.args.get("page", "1") or "1"))
|
||||
except ValueError:
|
||||
|
|
@ -2899,11 +2885,11 @@ def _index_row_to_card(row: dict, pick_slugs: set[str], mealie_public_url: str =
|
|||
|
||||
|
||||
def _const_eq(a: str, b: str) -> bool:
|
||||
"""Constant-time string compare for bearer-token validation. Audit
|
||||
CVE-A3 (2026-05-02): the prior hand-rolled XOR loop early-returned
|
||||
on length mismatch, which is itself a side-channel — an attacker
|
||||
can probe the admin-bearer length. hmac.compare_digest handles
|
||||
both length and content in constant time."""
|
||||
"""Constant-time string compare for bearer-token validation. A
|
||||
hand-rolled XOR loop that early-returns on length mismatch is
|
||||
itself a side-channel — an attacker can probe the admin-bearer
|
||||
length. hmac.compare_digest handles both length and content in
|
||||
constant time."""
|
||||
return hmac.compare_digest(a.encode(), b.encode())
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -226,7 +226,7 @@
|
|||
// crafted scraped image_url could close the url(...) string and inject
|
||||
// arbitrary CSS rules into the discover grid. With <img src=...> the
|
||||
// URL stays in HTML-attribute context end-to-end and `_esc` is
|
||||
// sufficient. Audit CVE-NEW3-2 fix (2026-05-02 PM 3rd-pass).
|
||||
// sufficient.
|
||||
// Defense-in-depth: only render the image element if the URL parses
|
||||
// as a well-formed http(s) URL — anything else falls back to the
|
||||
// placeholder.
|
||||
|
|
|
|||
|
|
@ -55,10 +55,10 @@
|
|||
<script>
|
||||
// Delegated unpin listener — slug is read from the parent <li>'s
|
||||
// data-slug attribute (HTML-attribute context, autoescaped by Jinja),
|
||||
// never interpolated into a JS string literal inside HTML. Audit
|
||||
// CVE-NEW-2 fix 2026-05-02 PM: the prior `onclick="removePick('{{ slug }}',...)"`
|
||||
// pattern was a stored-XSS surface because HTML attribute decoding
|
||||
// returns the bare `'` to the JS engine.
|
||||
// never interpolated into a JS string literal inside HTML. The prior
|
||||
// `onclick="removePick('{{ slug }}',...)"` pattern was a stored-XSS
|
||||
// surface because HTML attribute decoding returns the bare `'` to the
|
||||
// JS engine.
|
||||
document.getElementById('picks-list')?.addEventListener('click', async (ev) => {
|
||||
const btn = ev.target.closest('.js-unpin');
|
||||
if (!btn) return;
|
||||
|
|
|
|||
17
compose.yml
17
compose.yml
|
|
@ -9,17 +9,8 @@ services:
|
|||
container_name: cauldron
|
||||
restart: unless-stopped
|
||||
env_file:
|
||||
- /mnt/cache/appdata/secrets/cauldron.env
|
||||
# Point at wherever your secrets file lives. Override via .env at the
|
||||
# compose root: `CAULDRON_ENV_FILE=/srv/secrets/cauldron.env`.
|
||||
- ${CAULDRON_ENV_FILE:-./.env}
|
||||
ports:
|
||||
# LAN-only. Same pattern as cwho on 7777.
|
||||
- "192.168.0.5:7790:7790"
|
||||
- "127.0.0.1:7790:7790"
|
||||
networks:
|
||||
- sulkta
|
||||
- sulkta-db-net
|
||||
|
||||
networks:
|
||||
sulkta:
|
||||
external: true
|
||||
sulkta-db-net:
|
||||
external: true
|
||||
- "${CAULDRON_BIND:-0.0.0.0}:${CAULDRON_PORT:-7790}:7790"
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ HERE = Path(__file__).parent.parent
|
|||
RAW_PATH = HERE / "cauldron/data/foods_seed_usda.json"
|
||||
OUT_PATH = HERE / "cauldron/data/foods_seed.json"
|
||||
|
||||
CLAWDFORGE_URL = os.environ.get("CLAWDFORGE_URL", "http://192.168.0.5:8800")
|
||||
CLAWDFORGE_URL = os.environ.get("CLAWDFORGE_URL", "http://clawdforge:8800")
|
||||
CLAWDFORGE_TOKEN = os.environ["CLAWDFORGE_TOKEN"]
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue