diff --git a/cauldron/db.py b/cauldron/db.py index eec29fe..8621eeb 100644 --- a/cauldron/db.py +++ b/cauldron/db.py @@ -2251,7 +2251,18 @@ class DB: ) -> int | None: """INSERT a freshly-scraped recipe in 'raw' state. Returns the new row id, or None if the source_url was already present (UNIQUE - violation = duplicate scrape, treat as skip).""" + violation = duplicate scrape, treat as skip). + + Normalizes source_url by stripping trailing slashes so that + `.../recipes/falafel` and `.../recipes/falafel/` map to the same + UNIQUE key. 2026-05-02: caught when manual `/discover` paste + included trailing slash but listing-page extractor stripped it, + producing 1:1 duplicates.""" + # URL canonicalization — single rstrip is safe for recipe paths + # (they always have a non-slash terminal segment; `https://host/` + # alone wouldn't be a valid recipe URL anyway). + if source_url.endswith("/"): + source_url = source_url.rstrip("/") with self.conn() as c, c.cursor() as cur: cur.execute( """INSERT IGNORE INTO cauldron_discovered_recipes