audit-fixes: dedupe megacluster, consolidate cancel-poll, login-CSRF, misc

Continuing through the 2nd-pass audit findings.

dedupe_recipes.py CODE-2 (HIGH): _cluster_by_name dropped union-find
single-link agglomerative for the same pair-accumulator pattern
consolidate_foods._cluster already uses. Single-link chained weak
similarities through the recipe corpus the same way it did with foods,
producing one giant cluster on a 250+ corpus that Sonnet would refuse.
Now emits one 2-recipe pair per (i,j) above NAME_THRESHOLD.

consolidate_foods.py + dedupe_recipes.py CODE-1 (HIGH): added
in-loop cancel-poll to both _cluster passes. Polls cancel_check every
5K pair-comparisons so a user-initiated cancel can abort cleanly mid-
scan instead of waiting tens of seconds. Run-walk callers also
re-check _cancelled() right after clustering returns and bail.

server.py CVE-NEW-5 (MED): /login skips OIDC re-init when
session.get('user') exists. Closes the login-CSRF surface where a
malicious cross-origin link `<a href=…/login?next=/poison>` would
re-trigger the OIDC handshake on a logged-in user and silently change
their post-login landing.

server.py CVE-NEW-7 (LOW): Permissions-Policy now sends both
`interest-cohort=()` (FLoC, Chrome ≤94) and `browsing-topics=()`
(Topics API, Chrome ≥115) for opt-out across the lineage.

server.py CVE-NEW-8 (LOW): /auth/callback OAuthError branch no longer
echoes Authentik's raw error string to auth_retry.html. Detail is
logged server-side; users see a generic retry message. Closes the
information-disclosure on Authentik error codes.

server.py CODE-9/CODE-10 (LOW): wrapped int() of ?page= and
?per_page= in try/except so garbage args land on safe defaults
instead of surfacing ValueError as a 500.

Deferred to the rackham-vhost commit:
- CVE-NEW-6 (cauldron bind / ProxyFix trusted-peer filter) needs
  paired Apache vhost config (RequestHeader unset X-Forwarded-*)
  before the docker-side change is safe; landing it solo would
  either break the LAN deploy or leave a half-broken trust chain.
This commit is contained in:
Kayos 2026-05-02 17:36:25 -07:00
parent fdd1102a6f
commit 946abd0322
3 changed files with 87 additions and 38 deletions

View file

@ -27,7 +27,7 @@ from __future__ import annotations
import json import json
import logging import logging
import threading import threading
from typing import Optional from typing import Callable, Optional
from rapidfuzz import fuzz, process from rapidfuzz import fuzz, process
@ -87,7 +87,11 @@ def _foods_in_household(mealie: Mealie, household_id: str) -> list[dict]:
return out return out
def _cluster(foods: list[dict], threshold: int = CLUSTER_THRESHOLD) -> list[list[dict]]: def _cluster(
foods: list[dict],
threshold: int = CLUSTER_THRESHOLD,
cancel_check: Callable[[], bool] | None = None,
) -> list[list[dict]]:
"""Pair-based: emit one 2-food candidate per (i, j) where token_set_ratio """Pair-based: emit one 2-food candidate per (i, j) where token_set_ratio
>= threshold. Replaces the original single-link agglomerative which >= threshold. Replaces the original single-link agglomerative which
produced a 50+ food megacluster on Cobb's catalog by chaining weak produced a 50+ food megacluster on Cobb's catalog by chaining weak
@ -101,16 +105,25 @@ def _cluster(foods: list[dict], threshold: int = CLUSTER_THRESHOLD) -> list[list
away by an earlier pair. away by an earlier pair.
For ~3000 foods this is ~4M comparisons in pure Python runs in For ~3000 foods this is ~4M comparisons in pure Python runs in
a few seconds.""" a few seconds. For larger catalogs (10K+) the inner loop polls
cancel_check every 5K comparisons so a user-initiated cancel can
abort cleanly mid-scan rather than waiting tens of seconds. 2nd-pass
audit fix CODE-1 (2026-05-02 PM)."""
n = len(foods) n = len(foods)
names = [(f.get("name") or "").strip().lower() for f in foods] names = [(f.get("name") or "").strip().lower() for f in foods]
pairs: list[list[dict]] = [] pairs: list[list[dict]] = []
poll_every = 5000
cmp_count = 0
for i in range(n): for i in range(n):
if not names[i]: if not names[i]:
continue continue
for j in range(i + 1, n): for j in range(i + 1, n):
if not names[j]: if not names[j]:
continue continue
cmp_count += 1
if cancel_check is not None and cmp_count % poll_every == 0:
if cancel_check():
return pairs
if fuzz.token_set_ratio(names[i], names[j]) >= threshold: if fuzz.token_set_ratio(names[i], names[j]) >= threshold:
pairs.append([foods[i], foods[j]]) pairs.append([foods[i], foods[j]])
return pairs return pairs
@ -134,8 +147,11 @@ def run_walk(*, db: DB, job_id: int, mealie: Mealie, forge: Forge) -> None:
foods = _foods_in_household(mealie, hh) foods = _foods_in_household(mealie, hh)
log.info("[consolidate:%s] household=%s foods=%d", job_id, hh, len(foods)) log.info("[consolidate:%s] household=%s foods=%d", job_id, hh, len(foods))
clusters = _cluster(foods) clusters = _cluster(foods, cancel_check=_cancelled)
log.info("[consolidate:%s] clusters≥2: %d", job_id, len(clusters)) log.info("[consolidate:%s] clusters≥2: %d", job_id, len(clusters))
if _cancelled():
log.info("[consolidate:%s] walk aborted during clustering", job_id)
return
with db.conn() as c, c.cursor() as cur: with db.conn() as c, c.cursor() as cur:
cur.execute( cur.execute(

View file

@ -22,7 +22,7 @@ from __future__ import annotations
import json import json
import logging import logging
import threading import threading
from typing import Optional from typing import Callable, Optional
from rapidfuzz import fuzz from rapidfuzz import fuzz
@ -81,39 +81,41 @@ def _filter_to_household(recipes: list[dict], household_id: str) -> list[dict]:
return out return out
def _cluster_by_name(recipes: list[dict], threshold: int = NAME_THRESHOLD) -> list[list[dict]]: def _cluster_by_name(
"""Single-link agglomerative on rapidfuzz token_set_ratio. Returns recipes: list[dict],
clusters of size >= 2. ~250 recipes = ~30K comparisons, runs instantly.""" threshold: int = NAME_THRESHOLD,
cancel_check: Callable[[], bool] | None = None,
) -> list[list[dict]]:
"""Pair-based: emit one 2-recipe candidate per (i, j) where
token_set_ratio >= threshold. 2nd-pass audit fix (CODE-2, 2026-05-02 PM):
the previous single-link agglomerative chained weak similarities through
the recipe corpus the same way it did with foods `chicken alfredo`
`chicken parm` `parm chicken cutlets` ... collapsing dozens of
unrelated recipes into one megacluster that Sonnet then had to refuse.
Mirrors the pattern used in `consolidate_foods._cluster`.
cancel_check, when provided, is polled every 5K pair-comparisons so a
user-initiated cancel can abort a long scan early (CODE-1 fix). On
cancel we return the pairs accumulated so far rather than raising
the caller's _cancelled() in run_walk will catch and exit cleanly."""
n = len(recipes) n = len(recipes)
parent = list(range(n))
def find(x):
while parent[x] != x:
parent[x] = parent[parent[x]]
x = parent[x]
return x
def union(a, b):
ra, rb = find(a), find(b)
if ra != rb:
parent[ra] = rb
names = [(r.get("name") or "").strip().lower() for r in recipes] names = [(r.get("name") or "").strip().lower() for r in recipes]
pairs: list[list[dict]] = []
poll_every = 5000
cmp_count = 0
for i in range(n): for i in range(n):
if not names[i]: if not names[i]:
continue continue
for j in range(i + 1, n): for j in range(i + 1, n):
if not names[j]: if not names[j]:
continue continue
score = fuzz.token_set_ratio(names[i], names[j]) cmp_count += 1
if score >= threshold: if cancel_check is not None and cmp_count % poll_every == 0:
union(i, j) if cancel_check():
return pairs
groups: dict[int, list[dict]] = {} if fuzz.token_set_ratio(names[i], names[j]) >= threshold:
for i in range(n): pairs.append([recipes[i], recipes[j]])
r = find(i) return pairs
groups.setdefault(r, []).append(recipes[i])
return [g for g in groups.values() if len(g) >= 2]
def _summarize_recipe(full: dict) -> dict: def _summarize_recipe(full: dict) -> dict:
@ -155,8 +157,11 @@ def run_walk(*, db: DB, job_id: int, mealie: Mealie, forge: Forge) -> None:
slim = _filter_to_household(_all_recipes(mealie), hh) slim = _filter_to_household(_all_recipes(mealie), hh)
log.info("[dedupe-recipes:%s] household=%s recipes=%d", job_id, hh, len(slim)) log.info("[dedupe-recipes:%s] household=%s recipes=%d", job_id, hh, len(slim))
clusters_slim = _cluster_by_name(slim) clusters_slim = _cluster_by_name(slim, cancel_check=_cancelled)
log.info("[dedupe-recipes:%s] name-clusters≥2: %d", job_id, len(clusters_slim)) log.info("[dedupe-recipes:%s] name-pairs: %d", job_id, len(clusters_slim))
if _cancelled():
log.info("[dedupe-recipes:%s] walk aborted during clustering", job_id)
return
with db.conn() as c, c.cursor() as cur: with db.conn() as c, c.cursor() as cur:
cur.execute( cur.execute(

View file

@ -235,7 +235,10 @@ def create_app() -> Flask:
resp.headers.setdefault("X-Frame-Options", "DENY") resp.headers.setdefault("X-Frame-Options", "DENY")
resp.headers.setdefault("X-Content-Type-Options", "nosniff") resp.headers.setdefault("X-Content-Type-Options", "nosniff")
resp.headers.setdefault("Referrer-Policy", "same-origin") resp.headers.setdefault("Referrer-Policy", "same-origin")
resp.headers.setdefault("Permissions-Policy", "interest-cohort=()") # Opt-out of FLoC (Chrome ≤94) and the Topics API replacement
# (Chrome ≥115). Both directive names are unknown to other
# browsers and silently ignored — no parse-error risk.
resp.headers.setdefault("Permissions-Policy", "interest-cohort=(), browsing-topics=()")
resp.headers.setdefault( resp.headers.setdefault(
"Content-Security-Policy", "Content-Security-Policy",
"default-src 'self'; " "default-src 'self'; "
@ -460,7 +463,15 @@ def create_app() -> Flask:
# redirect surface — `next=https://evil.example/...` would # redirect surface — `next=https://evil.example/...` would
# otherwise route an authenticated user to an attacker page # otherwise route an authenticated user to an attacker page
# right after OIDC handshake. # right after OIDC handshake.
session["post_login_next"] = _safe_next(request.args.get("next")) nxt = _safe_next(request.args.get("next"))
# Already-authenticated users skip OIDC entirely (CVE-NEW-5 fix,
# 2026-05-02 PM): a malicious cross-origin link
# `<a href="…/login?next=/some-poisoned-path">` would otherwise
# silently re-trigger the OIDC handshake on a logged-in user
# and hand them off to the attacker-supplied next= path.
if session.get("user"):
return redirect(nxt)
session["post_login_next"] = nxt
return oauth.cauldron.authorize_redirect(cfg.oidc_redirect_uri) return oauth.cauldron.authorize_redirect(cfg.oidc_redirect_uri)
@app.get("/auth/callback") @app.get("/auth/callback")
@ -490,12 +501,18 @@ def create_app() -> Flask:
detail="that login link expired (you probably retried after a blip). hit login again to start fresh.", detail="that login link expired (you probably retried after a blip). hit login again to start fresh.",
), 400 ), 400
except OAuthError as e: except OAuthError as e:
# Log the full Authentik error server-side; render only a
# generic detail to the user. Audit CVE-NEW-8 (2026-05-02 PM):
# the prior `f"auth handshake failed: {e}"` echoed Authentik
# error codes (e.g. invalid_client_id) into the auth_retry
# page — anyone who can hit /auth/callback?state=evil could
# probe Authentik internals via the rendered detail.
app.logger.warning("OIDC callback: oauth error: %s", e) app.logger.warning("OIDC callback: oauth error: %s", e)
session.pop("_state_cauldron_authlib", None) session.pop("_state_cauldron_authlib", None)
return render_template( return render_template(
"auth_retry.html", "auth_retry.html",
reason="oauth", reason="oauth",
detail=f"auth handshake failed: {e}", detail="the auth handshake didn't complete. hit login again to start fresh.",
), 400 ), 400
userinfo = token.get("userinfo") or oauth.cauldron.userinfo(token=token) userinfo = token.get("userinfo") or oauth.cauldron.userinfo(token=token)
sub = userinfo.get("sub") or userinfo.get("email") sub = userinfo.get("sub") or userinfo.get("email")
@ -663,7 +680,10 @@ def create_app() -> Flask:
if not client: if not client:
return jsonify({"error": "not connected"}), 409 return jsonify({"error": "not connected"}), 409
u = session["user"] u = session["user"]
page = max(1, int(request.args.get("page", "1"))) try:
page = max(1, int(request.args.get("page", "1") or "1"))
except ValueError:
page = 1
search = (request.args.get("q") or "").strip() search = (request.args.get("q") or "").strip()
sort = request.args.get("sort", "newest") sort = request.args.get("sort", "newest")
category = (request.args.get("cat") or "").strip() or None category = (request.args.get("cat") or "").strip() or None
@ -2574,8 +2594,16 @@ def create_app() -> Flask:
@app.get("/api/recipes") @app.get("/api/recipes")
@require_bearer @require_bearer
def list_recipes_api(): def list_recipes_api():
page = int(request.args.get("page", "1")) # Defensive int parse — `?page=foo` previously raised ValueError
per_page = min(int(request.args.get("per_page", "50")), 200) # and surfaced a 500 (audit CODE-9, 2026-05-02 PM).
try:
page = max(1, int(request.args.get("page", "1") or "1"))
except ValueError:
page = 1
try:
per_page = min(max(1, int(request.args.get("per_page", "50") or "50")), 200)
except ValueError:
per_page = 50
return jsonify(system_mealie.list_recipes(page=page, per_page=per_page)) return jsonify(system_mealie.list_recipes(page=page, per_page=per_page))
@app.post("/api/sterilize/preview/<slug>") @app.post("/api/sterilize/preview/<slug>")