discover: realistic Chrome UA on fallback fetch — most sites 403 bot UAs
This commit is contained in:
parent
7773b2785c
commit
09d716116a
1 changed files with 13 additions and 1 deletions
|
|
@ -157,7 +157,19 @@ def _scrape_one(url: str) -> tuple[dict, str | None] | None:
|
|||
resp = _rq.get(
|
||||
url,
|
||||
timeout=15,
|
||||
headers={"User-Agent": "Mozilla/5.0 (cauldron-discover)"},
|
||||
headers={
|
||||
# Realistic desktop UA — many recipe sites 403 anything
|
||||
# that smells like a bot. We're identifying as a normal
|
||||
# browser; per-site robots.txt we still respect via
|
||||
# recipe_scrapers' built-in wild_mode safety nets.
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120.0 Safari/537.36"
|
||||
),
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.5",
|
||||
},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
log.warning("[discover] fetch %s -> %s", url, resp.status_code)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue