audit-fixes: dedupe megacluster, consolidate cancel-poll, login-CSRF, misc
Continuing through the 2nd-pass audit findings.
dedupe_recipes.py CODE-2 (HIGH): _cluster_by_name dropped union-find
single-link agglomerative for the same pair-accumulator pattern
consolidate_foods._cluster already uses. Single-link chained weak
similarities through the recipe corpus the same way it did with foods,
producing one giant cluster on a 250+ corpus that Sonnet would refuse.
Now emits one 2-recipe pair per (i,j) above NAME_THRESHOLD.
consolidate_foods.py + dedupe_recipes.py CODE-1 (HIGH): added
in-loop cancel-poll to both _cluster passes. Polls cancel_check every
5K pair-comparisons so a user-initiated cancel can abort cleanly mid-
scan instead of waiting tens of seconds. Run-walk callers also
re-check _cancelled() right after clustering returns and bail.
server.py CVE-NEW-5 (MED): /login skips OIDC re-init when
session.get('user') exists. Closes the login-CSRF surface where a
malicious cross-origin link `<a href=…/login?next=/poison>` would
re-trigger the OIDC handshake on a logged-in user and silently change
their post-login landing.
server.py CVE-NEW-7 (LOW): Permissions-Policy now sends both
`interest-cohort=()` (FLoC, Chrome ≤94) and `browsing-topics=()`
(Topics API, Chrome ≥115) for opt-out across the lineage.
server.py CVE-NEW-8 (LOW): /auth/callback OAuthError branch no longer
echoes Authentik's raw error string to auth_retry.html. Detail is
logged server-side; users see a generic retry message. Closes the
information-disclosure on Authentik error codes.
server.py CODE-9/CODE-10 (LOW): wrapped int() of ?page= and
?per_page= in try/except so garbage args land on safe defaults
instead of surfacing ValueError as a 500.
Deferred to the rackham-vhost commit:
- CVE-NEW-6 (cauldron bind / ProxyFix trusted-peer filter) needs
paired Apache vhost config (RequestHeader unset X-Forwarded-*)
before the docker-side change is safe; landing it solo would
either break the LAN deploy or leave a half-broken trust chain.
This commit is contained in:
parent
fdd1102a6f
commit
946abd0322
3 changed files with 87 additions and 38 deletions
|
|
@ -27,7 +27,7 @@ from __future__ import annotations
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
from typing import Optional
|
from typing import Callable, Optional
|
||||||
|
|
||||||
from rapidfuzz import fuzz, process
|
from rapidfuzz import fuzz, process
|
||||||
|
|
||||||
|
|
@ -87,7 +87,11 @@ def _foods_in_household(mealie: Mealie, household_id: str) -> list[dict]:
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
def _cluster(foods: list[dict], threshold: int = CLUSTER_THRESHOLD) -> list[list[dict]]:
|
def _cluster(
|
||||||
|
foods: list[dict],
|
||||||
|
threshold: int = CLUSTER_THRESHOLD,
|
||||||
|
cancel_check: Callable[[], bool] | None = None,
|
||||||
|
) -> list[list[dict]]:
|
||||||
"""Pair-based: emit one 2-food candidate per (i, j) where token_set_ratio
|
"""Pair-based: emit one 2-food candidate per (i, j) where token_set_ratio
|
||||||
>= threshold. Replaces the original single-link agglomerative which
|
>= threshold. Replaces the original single-link agglomerative which
|
||||||
produced a 50+ food megacluster on Cobb's catalog by chaining weak
|
produced a 50+ food megacluster on Cobb's catalog by chaining weak
|
||||||
|
|
@ -101,16 +105,25 @@ def _cluster(foods: list[dict], threshold: int = CLUSTER_THRESHOLD) -> list[list
|
||||||
away by an earlier pair.
|
away by an earlier pair.
|
||||||
|
|
||||||
For ~3000 foods this is ~4M comparisons in pure Python — runs in
|
For ~3000 foods this is ~4M comparisons in pure Python — runs in
|
||||||
a few seconds."""
|
a few seconds. For larger catalogs (10K+) the inner loop polls
|
||||||
|
cancel_check every 5K comparisons so a user-initiated cancel can
|
||||||
|
abort cleanly mid-scan rather than waiting tens of seconds. 2nd-pass
|
||||||
|
audit fix CODE-1 (2026-05-02 PM)."""
|
||||||
n = len(foods)
|
n = len(foods)
|
||||||
names = [(f.get("name") or "").strip().lower() for f in foods]
|
names = [(f.get("name") or "").strip().lower() for f in foods]
|
||||||
pairs: list[list[dict]] = []
|
pairs: list[list[dict]] = []
|
||||||
|
poll_every = 5000
|
||||||
|
cmp_count = 0
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
if not names[i]:
|
if not names[i]:
|
||||||
continue
|
continue
|
||||||
for j in range(i + 1, n):
|
for j in range(i + 1, n):
|
||||||
if not names[j]:
|
if not names[j]:
|
||||||
continue
|
continue
|
||||||
|
cmp_count += 1
|
||||||
|
if cancel_check is not None and cmp_count % poll_every == 0:
|
||||||
|
if cancel_check():
|
||||||
|
return pairs
|
||||||
if fuzz.token_set_ratio(names[i], names[j]) >= threshold:
|
if fuzz.token_set_ratio(names[i], names[j]) >= threshold:
|
||||||
pairs.append([foods[i], foods[j]])
|
pairs.append([foods[i], foods[j]])
|
||||||
return pairs
|
return pairs
|
||||||
|
|
@ -134,8 +147,11 @@ def run_walk(*, db: DB, job_id: int, mealie: Mealie, forge: Forge) -> None:
|
||||||
foods = _foods_in_household(mealie, hh)
|
foods = _foods_in_household(mealie, hh)
|
||||||
log.info("[consolidate:%s] household=%s foods=%d", job_id, hh, len(foods))
|
log.info("[consolidate:%s] household=%s foods=%d", job_id, hh, len(foods))
|
||||||
|
|
||||||
clusters = _cluster(foods)
|
clusters = _cluster(foods, cancel_check=_cancelled)
|
||||||
log.info("[consolidate:%s] clusters≥2: %d", job_id, len(clusters))
|
log.info("[consolidate:%s] clusters≥2: %d", job_id, len(clusters))
|
||||||
|
if _cancelled():
|
||||||
|
log.info("[consolidate:%s] walk aborted during clustering", job_id)
|
||||||
|
return
|
||||||
|
|
||||||
with db.conn() as c, c.cursor() as cur:
|
with db.conn() as c, c.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
|
|
|
||||||
|
|
@ -22,7 +22,7 @@ from __future__ import annotations
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import threading
|
import threading
|
||||||
from typing import Optional
|
from typing import Callable, Optional
|
||||||
|
|
||||||
from rapidfuzz import fuzz
|
from rapidfuzz import fuzz
|
||||||
|
|
||||||
|
|
@ -81,39 +81,41 @@ def _filter_to_household(recipes: list[dict], household_id: str) -> list[dict]:
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
def _cluster_by_name(recipes: list[dict], threshold: int = NAME_THRESHOLD) -> list[list[dict]]:
|
def _cluster_by_name(
|
||||||
"""Single-link agglomerative on rapidfuzz token_set_ratio. Returns
|
recipes: list[dict],
|
||||||
clusters of size >= 2. ~250 recipes = ~30K comparisons, runs instantly."""
|
threshold: int = NAME_THRESHOLD,
|
||||||
|
cancel_check: Callable[[], bool] | None = None,
|
||||||
|
) -> list[list[dict]]:
|
||||||
|
"""Pair-based: emit one 2-recipe candidate per (i, j) where
|
||||||
|
token_set_ratio >= threshold. 2nd-pass audit fix (CODE-2, 2026-05-02 PM):
|
||||||
|
the previous single-link agglomerative chained weak similarities through
|
||||||
|
the recipe corpus the same way it did with foods — `chicken alfredo`
|
||||||
|
→ `chicken parm` → `parm chicken cutlets` → ... — collapsing dozens of
|
||||||
|
unrelated recipes into one megacluster that Sonnet then had to refuse.
|
||||||
|
Mirrors the pattern used in `consolidate_foods._cluster`.
|
||||||
|
|
||||||
|
cancel_check, when provided, is polled every 5K pair-comparisons so a
|
||||||
|
user-initiated cancel can abort a long scan early (CODE-1 fix). On
|
||||||
|
cancel we return the pairs accumulated so far rather than raising —
|
||||||
|
the caller's _cancelled() in run_walk will catch and exit cleanly."""
|
||||||
n = len(recipes)
|
n = len(recipes)
|
||||||
parent = list(range(n))
|
|
||||||
|
|
||||||
def find(x):
|
|
||||||
while parent[x] != x:
|
|
||||||
parent[x] = parent[parent[x]]
|
|
||||||
x = parent[x]
|
|
||||||
return x
|
|
||||||
|
|
||||||
def union(a, b):
|
|
||||||
ra, rb = find(a), find(b)
|
|
||||||
if ra != rb:
|
|
||||||
parent[ra] = rb
|
|
||||||
|
|
||||||
names = [(r.get("name") or "").strip().lower() for r in recipes]
|
names = [(r.get("name") or "").strip().lower() for r in recipes]
|
||||||
|
pairs: list[list[dict]] = []
|
||||||
|
poll_every = 5000
|
||||||
|
cmp_count = 0
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
if not names[i]:
|
if not names[i]:
|
||||||
continue
|
continue
|
||||||
for j in range(i + 1, n):
|
for j in range(i + 1, n):
|
||||||
if not names[j]:
|
if not names[j]:
|
||||||
continue
|
continue
|
||||||
score = fuzz.token_set_ratio(names[i], names[j])
|
cmp_count += 1
|
||||||
if score >= threshold:
|
if cancel_check is not None and cmp_count % poll_every == 0:
|
||||||
union(i, j)
|
if cancel_check():
|
||||||
|
return pairs
|
||||||
groups: dict[int, list[dict]] = {}
|
if fuzz.token_set_ratio(names[i], names[j]) >= threshold:
|
||||||
for i in range(n):
|
pairs.append([recipes[i], recipes[j]])
|
||||||
r = find(i)
|
return pairs
|
||||||
groups.setdefault(r, []).append(recipes[i])
|
|
||||||
return [g for g in groups.values() if len(g) >= 2]
|
|
||||||
|
|
||||||
|
|
||||||
def _summarize_recipe(full: dict) -> dict:
|
def _summarize_recipe(full: dict) -> dict:
|
||||||
|
|
@ -155,8 +157,11 @@ def run_walk(*, db: DB, job_id: int, mealie: Mealie, forge: Forge) -> None:
|
||||||
slim = _filter_to_household(_all_recipes(mealie), hh)
|
slim = _filter_to_household(_all_recipes(mealie), hh)
|
||||||
log.info("[dedupe-recipes:%s] household=%s recipes=%d", job_id, hh, len(slim))
|
log.info("[dedupe-recipes:%s] household=%s recipes=%d", job_id, hh, len(slim))
|
||||||
|
|
||||||
clusters_slim = _cluster_by_name(slim)
|
clusters_slim = _cluster_by_name(slim, cancel_check=_cancelled)
|
||||||
log.info("[dedupe-recipes:%s] name-clusters≥2: %d", job_id, len(clusters_slim))
|
log.info("[dedupe-recipes:%s] name-pairs: %d", job_id, len(clusters_slim))
|
||||||
|
if _cancelled():
|
||||||
|
log.info("[dedupe-recipes:%s] walk aborted during clustering", job_id)
|
||||||
|
return
|
||||||
|
|
||||||
with db.conn() as c, c.cursor() as cur:
|
with db.conn() as c, c.cursor() as cur:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
|
|
|
||||||
|
|
@ -235,7 +235,10 @@ def create_app() -> Flask:
|
||||||
resp.headers.setdefault("X-Frame-Options", "DENY")
|
resp.headers.setdefault("X-Frame-Options", "DENY")
|
||||||
resp.headers.setdefault("X-Content-Type-Options", "nosniff")
|
resp.headers.setdefault("X-Content-Type-Options", "nosniff")
|
||||||
resp.headers.setdefault("Referrer-Policy", "same-origin")
|
resp.headers.setdefault("Referrer-Policy", "same-origin")
|
||||||
resp.headers.setdefault("Permissions-Policy", "interest-cohort=()")
|
# Opt-out of FLoC (Chrome ≤94) and the Topics API replacement
|
||||||
|
# (Chrome ≥115). Both directive names are unknown to other
|
||||||
|
# browsers and silently ignored — no parse-error risk.
|
||||||
|
resp.headers.setdefault("Permissions-Policy", "interest-cohort=(), browsing-topics=()")
|
||||||
resp.headers.setdefault(
|
resp.headers.setdefault(
|
||||||
"Content-Security-Policy",
|
"Content-Security-Policy",
|
||||||
"default-src 'self'; "
|
"default-src 'self'; "
|
||||||
|
|
@ -460,7 +463,15 @@ def create_app() -> Flask:
|
||||||
# redirect surface — `next=https://evil.example/...` would
|
# redirect surface — `next=https://evil.example/...` would
|
||||||
# otherwise route an authenticated user to an attacker page
|
# otherwise route an authenticated user to an attacker page
|
||||||
# right after OIDC handshake.
|
# right after OIDC handshake.
|
||||||
session["post_login_next"] = _safe_next(request.args.get("next"))
|
nxt = _safe_next(request.args.get("next"))
|
||||||
|
# Already-authenticated users skip OIDC entirely (CVE-NEW-5 fix,
|
||||||
|
# 2026-05-02 PM): a malicious cross-origin link
|
||||||
|
# `<a href="…/login?next=/some-poisoned-path">` would otherwise
|
||||||
|
# silently re-trigger the OIDC handshake on a logged-in user
|
||||||
|
# and hand them off to the attacker-supplied next= path.
|
||||||
|
if session.get("user"):
|
||||||
|
return redirect(nxt)
|
||||||
|
session["post_login_next"] = nxt
|
||||||
return oauth.cauldron.authorize_redirect(cfg.oidc_redirect_uri)
|
return oauth.cauldron.authorize_redirect(cfg.oidc_redirect_uri)
|
||||||
|
|
||||||
@app.get("/auth/callback")
|
@app.get("/auth/callback")
|
||||||
|
|
@ -490,12 +501,18 @@ def create_app() -> Flask:
|
||||||
detail="that login link expired (you probably retried after a blip). hit login again to start fresh.",
|
detail="that login link expired (you probably retried after a blip). hit login again to start fresh.",
|
||||||
), 400
|
), 400
|
||||||
except OAuthError as e:
|
except OAuthError as e:
|
||||||
|
# Log the full Authentik error server-side; render only a
|
||||||
|
# generic detail to the user. Audit CVE-NEW-8 (2026-05-02 PM):
|
||||||
|
# the prior `f"auth handshake failed: {e}"` echoed Authentik
|
||||||
|
# error codes (e.g. invalid_client_id) into the auth_retry
|
||||||
|
# page — anyone who can hit /auth/callback?state=evil could
|
||||||
|
# probe Authentik internals via the rendered detail.
|
||||||
app.logger.warning("OIDC callback: oauth error: %s", e)
|
app.logger.warning("OIDC callback: oauth error: %s", e)
|
||||||
session.pop("_state_cauldron_authlib", None)
|
session.pop("_state_cauldron_authlib", None)
|
||||||
return render_template(
|
return render_template(
|
||||||
"auth_retry.html",
|
"auth_retry.html",
|
||||||
reason="oauth",
|
reason="oauth",
|
||||||
detail=f"auth handshake failed: {e}",
|
detail="the auth handshake didn't complete. hit login again to start fresh.",
|
||||||
), 400
|
), 400
|
||||||
userinfo = token.get("userinfo") or oauth.cauldron.userinfo(token=token)
|
userinfo = token.get("userinfo") or oauth.cauldron.userinfo(token=token)
|
||||||
sub = userinfo.get("sub") or userinfo.get("email")
|
sub = userinfo.get("sub") or userinfo.get("email")
|
||||||
|
|
@ -663,7 +680,10 @@ def create_app() -> Flask:
|
||||||
if not client:
|
if not client:
|
||||||
return jsonify({"error": "not connected"}), 409
|
return jsonify({"error": "not connected"}), 409
|
||||||
u = session["user"]
|
u = session["user"]
|
||||||
page = max(1, int(request.args.get("page", "1")))
|
try:
|
||||||
|
page = max(1, int(request.args.get("page", "1") or "1"))
|
||||||
|
except ValueError:
|
||||||
|
page = 1
|
||||||
search = (request.args.get("q") or "").strip()
|
search = (request.args.get("q") or "").strip()
|
||||||
sort = request.args.get("sort", "newest")
|
sort = request.args.get("sort", "newest")
|
||||||
category = (request.args.get("cat") or "").strip() or None
|
category = (request.args.get("cat") or "").strip() or None
|
||||||
|
|
@ -2574,8 +2594,16 @@ def create_app() -> Flask:
|
||||||
@app.get("/api/recipes")
|
@app.get("/api/recipes")
|
||||||
@require_bearer
|
@require_bearer
|
||||||
def list_recipes_api():
|
def list_recipes_api():
|
||||||
page = int(request.args.get("page", "1"))
|
# Defensive int parse — `?page=foo` previously raised ValueError
|
||||||
per_page = min(int(request.args.get("per_page", "50")), 200)
|
# and surfaced a 500 (audit CODE-9, 2026-05-02 PM).
|
||||||
|
try:
|
||||||
|
page = max(1, int(request.args.get("page", "1") or "1"))
|
||||||
|
except ValueError:
|
||||||
|
page = 1
|
||||||
|
try:
|
||||||
|
per_page = min(max(1, int(request.args.get("per_page", "50") or "50")), 200)
|
||||||
|
except ValueError:
|
||||||
|
per_page = 50
|
||||||
return jsonify(system_mealie.list_recipes(page=page, per_page=per_page))
|
return jsonify(system_mealie.list_recipes(page=page, per_page=per_page))
|
||||||
|
|
||||||
@app.post("/api/sterilize/preview/<slug>")
|
@app.post("/api/sterilize/preview/<slug>")
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue