diff --git a/cauldron/dedupe_recipes.py b/cauldron/dedupe_recipes.py index 0596e7e..e1cdaa9 100644 --- a/cauldron/dedupe_recipes.py +++ b/cauldron/dedupe_recipes.py @@ -260,6 +260,15 @@ def run_apply(*, db: DB, job_id: int, mealie: Mealie) -> None: mealie.delete_recipe(slug) db.update_recipe_dedupe_job_progress(job_id, deleted_delta=1) except MealieError as e: + msg = str(e) + # Pair-based clustering can emit overlapping pairs: + # (A,B) approved+deleted; later (A,C) tries to delete A + # again. Mealie returns 404 — treat that as already- + # handled, not an error. Mirrors the consolidate + # apply path. 3rd-pass audit fix CODE3-1 (2026-05-02 PM). + if "404" in msg or "not found" in msg.lower(): + log.info("[dedupe-recipes:%s] delete %s: stale (already removed)", job_id, slug) + continue err = f"delete {slug}: {e}" log.warning("[dedupe-recipes:%s] %s", job_id, err) break diff --git a/cauldron/discover_recipes.py b/cauldron/discover_recipes.py index e973035..31edd8d 100644 --- a/cauldron/discover_recipes.py +++ b/cauldron/discover_recipes.py @@ -229,6 +229,17 @@ def _scrape_one(url: str) -> tuple[dict, str | None] | None: resp = _rq.get( url, timeout=15, + # allow_redirects=False: is_public_url validated the + # original host as public; a 30x to 127.0.0.1 / 169.254.x + # would otherwise route this scrape worker at internal + # services (LAN scanner, cloud metadata IMDS). 3rd-pass + # audit fix CVE-NEW3-1 (2026-05-02 PM): treat 30x as + # scrape failure rather than chase the redirect chain. + # The recipe_scrapers primary path has its own internal + # request chain that's a known residual — the docstring + # on is_public_url notes the long-term answer is a + # custom requests transport that re-validates per hop. + allow_redirects=False, headers={ # Realistic desktop UA — many recipe sites 403 anything # that smells like a bot. We're identifying as a normal diff --git a/cauldron/server.py b/cauldron/server.py index 184e22a..ef9743b 100644 --- a/cauldron/server.py +++ b/cauldron/server.py @@ -2383,8 +2383,8 @@ def create_app() -> Flask: meal_type=_opt("meal_type"), kid_friendly_min=_opt_int("kid_friendly_min"), max_minutes=_opt_int("max_minutes"), - limit=min(int(args.get("limit") or 60), 200), - offset=max(int(args.get("offset") or 0), 0), + limit=min(_opt_int("limit") or 60, 200), + offset=max(_opt_int("offset") or 0, 0), ) # Decorate each row with per-household / per-group import status.