From 32a570b9d4952bc84bb3a333220b986859578478 Mon Sep 17 00:00:00 2001 From: Kayos Date: Sat, 2 May 2026 17:53:51 -0700 Subject: [PATCH] audit-fixes: 3rd-pass HIGH + 2 MEDs (dedupe stale-404, SSRF redirect, int parse) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dedupe_recipes.py CODE3-1 (HIGH): mirror the consolidate apply path's stale-404 tolerance into dedupe. Pair-based clustering can emit overlapping pairs — (A,B) approved+deleted; later (A,C) tries to delete A again. Mealie returns 404; the prior code logged this as an error and bumped error_count for a desired-end-state-achieved operation. Now treats 404 as already-handled and continues. discover_recipes.py CVE-NEW3-1 (MED): added allow_redirects=False to the fallback _rq.get(url, ...) call. is_public_url validates the original host as public-IP-space, but requests' default redirect- following would chase a 30x to 127.0.0.1 / 169.254.x — letting a malicious recipe-page server redirect a scrape worker at internal LAN services or cloud metadata. The recipe_scrapers primary path has its own internal request chain that's a documented residual (per is_public_url's docstring). Closes the easier of the two paths. server.py CODE3-2 (MED): /api/discover/search ?limit / ?offset swap raw int() for the existing _opt_int helper that's defined 20 lines up. Mirrors the wave-2 fix on /api/recipes and /me-recipes that the prior pass installed everywhere except this endpoint. 3rd-pass audit (against HEAD 291fea0) verdict: codebase is in a defensible production-deploy state. Remaining LOW/INFO items (Origin RFC normalization, CSS-injection in discover image_url, session.clear() on login, .env.example freshness) are robustness- class rather than security-class and don't gate deploy. --- cauldron/dedupe_recipes.py | 9 +++++++++ cauldron/discover_recipes.py | 11 +++++++++++ cauldron/server.py | 4 ++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/cauldron/dedupe_recipes.py b/cauldron/dedupe_recipes.py index 0596e7e..e1cdaa9 100644 --- a/cauldron/dedupe_recipes.py +++ b/cauldron/dedupe_recipes.py @@ -260,6 +260,15 @@ def run_apply(*, db: DB, job_id: int, mealie: Mealie) -> None: mealie.delete_recipe(slug) db.update_recipe_dedupe_job_progress(job_id, deleted_delta=1) except MealieError as e: + msg = str(e) + # Pair-based clustering can emit overlapping pairs: + # (A,B) approved+deleted; later (A,C) tries to delete A + # again. Mealie returns 404 — treat that as already- + # handled, not an error. Mirrors the consolidate + # apply path. 3rd-pass audit fix CODE3-1 (2026-05-02 PM). + if "404" in msg or "not found" in msg.lower(): + log.info("[dedupe-recipes:%s] delete %s: stale (already removed)", job_id, slug) + continue err = f"delete {slug}: {e}" log.warning("[dedupe-recipes:%s] %s", job_id, err) break diff --git a/cauldron/discover_recipes.py b/cauldron/discover_recipes.py index e973035..31edd8d 100644 --- a/cauldron/discover_recipes.py +++ b/cauldron/discover_recipes.py @@ -229,6 +229,17 @@ def _scrape_one(url: str) -> tuple[dict, str | None] | None: resp = _rq.get( url, timeout=15, + # allow_redirects=False: is_public_url validated the + # original host as public; a 30x to 127.0.0.1 / 169.254.x + # would otherwise route this scrape worker at internal + # services (LAN scanner, cloud metadata IMDS). 3rd-pass + # audit fix CVE-NEW3-1 (2026-05-02 PM): treat 30x as + # scrape failure rather than chase the redirect chain. + # The recipe_scrapers primary path has its own internal + # request chain that's a known residual — the docstring + # on is_public_url notes the long-term answer is a + # custom requests transport that re-validates per hop. + allow_redirects=False, headers={ # Realistic desktop UA — many recipe sites 403 anything # that smells like a bot. We're identifying as a normal diff --git a/cauldron/server.py b/cauldron/server.py index 184e22a..ef9743b 100644 --- a/cauldron/server.py +++ b/cauldron/server.py @@ -2383,8 +2383,8 @@ def create_app() -> Flask: meal_type=_opt("meal_type"), kid_friendly_min=_opt_int("kid_friendly_min"), max_minutes=_opt_int("max_minutes"), - limit=min(int(args.get("limit") or 60), 200), - offset=max(int(args.get("offset") or 0), 0), + limit=min(_opt_int("limit") or 60, 200), + offset=max(_opt_int("offset") or 0, 0), ) # Decorate each row with per-household / per-group import status.