diff --git a/cauldron/discover_recipes.py b/cauldron/discover_recipes.py index a58c88b..16692e2 100644 --- a/cauldron/discover_recipes.py +++ b/cauldron/discover_recipes.py @@ -157,7 +157,19 @@ def _scrape_one(url: str) -> tuple[dict, str | None] | None: resp = _rq.get( url, timeout=15, - headers={"User-Agent": "Mozilla/5.0 (cauldron-discover)"}, + headers={ + # Realistic desktop UA — many recipe sites 403 anything + # that smells like a bot. We're identifying as a normal + # browser; per-site robots.txt we still respect via + # recipe_scrapers' built-in wild_mode safety nets. + "User-Agent": ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/120.0 Safari/537.36" + ), + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + }, ) if resp.status_code != 200: log.warning("[discover] fetch %s -> %s", url, resp.status_code)