discover: fix recipe-scrapers 15.6 API — drop wild_mode kw
scrape_me(url) no longer accepts wild_mode in 15.x. Two-stage fallback:
1. scrape_me(url) — site-specific scraper (best quality)
2. fetch html + scrape_html(html, org_url, supported_only=False) —
generic JSON-LD/microdata pass for unsupported sites
Caught at first dogfood test against allrecipes; previous code raised
"unexpected keyword argument 'wild_mode'" before issuing any HTTP.
This commit is contained in:
parent
3ec120c1d9
commit
7773b2785c
1 changed files with 21 additions and 3 deletions
|
|
@ -146,10 +146,28 @@ def _scrape_one(url: str) -> tuple[dict, str | None] | None:
|
|||
return None
|
||||
|
||||
try:
|
||||
scraper = scrape_me(url, wild_mode=True)
|
||||
scraper = scrape_me(url)
|
||||
except Exception as e:
|
||||
log.warning("[discover] scrape_me(%s) failed: %s", url, e)
|
||||
return None
|
||||
# scraper_exists_for is False or the request failed.
|
||||
# Fall back to scrape_html with supported_only=False so unknown
|
||||
# sites still get a JSON-LD/microdata pass.
|
||||
try:
|
||||
from recipe_scrapers import scrape_html # type: ignore
|
||||
import requests as _rq
|
||||
resp = _rq.get(
|
||||
url,
|
||||
timeout=15,
|
||||
headers={"User-Agent": "Mozilla/5.0 (cauldron-discover)"},
|
||||
)
|
||||
if resp.status_code != 200:
|
||||
log.warning("[discover] fetch %s -> %s", url, resp.status_code)
|
||||
return None
|
||||
scraper = scrape_html(
|
||||
html=resp.text, org_url=url, supported_only=False
|
||||
)
|
||||
except Exception as e2:
|
||||
log.warning("[discover] scrape_me(%s) failed: %s / fallback: %s", url, e, e2)
|
||||
return None
|
||||
|
||||
shaped = _to_mealie_shape(scraper, url)
|
||||
image = shaped.get("image") or None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue