diff --git a/cauldron/discover_recipes.py b/cauldron/discover_recipes.py index 13fe457..a58c88b 100644 --- a/cauldron/discover_recipes.py +++ b/cauldron/discover_recipes.py @@ -146,10 +146,28 @@ def _scrape_one(url: str) -> tuple[dict, str | None] | None: return None try: - scraper = scrape_me(url, wild_mode=True) + scraper = scrape_me(url) except Exception as e: - log.warning("[discover] scrape_me(%s) failed: %s", url, e) - return None + # scraper_exists_for is False or the request failed. + # Fall back to scrape_html with supported_only=False so unknown + # sites still get a JSON-LD/microdata pass. + try: + from recipe_scrapers import scrape_html # type: ignore + import requests as _rq + resp = _rq.get( + url, + timeout=15, + headers={"User-Agent": "Mozilla/5.0 (cauldron-discover)"}, + ) + if resp.status_code != 200: + log.warning("[discover] fetch %s -> %s", url, resp.status_code) + return None + scraper = scrape_html( + html=resp.text, org_url=url, supported_only=False + ) + except Exception as e2: + log.warning("[discover] scrape_me(%s) failed: %s / fallback: %s", url, e, e2) + return None shaped = _to_mealie_shape(scraper, url) image = shaped.get("image") or None