From 3ec120c1d95facfce1d45986715a026021034248 Mon Sep 17 00:00:00 2001
From: Kayos <kayos@sulkta.com>
Date: Fri, 1 May 2026 07:38:27 -0700
Subject: [PATCH] discover v0.1: scrape + search + import
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- requirements: add recipe-scrapers 15.6.0
- mealie.import_from_url(): POST /api/recipes/create/url returns slug
- db helpers: insert_discovered_recipe, update_discovered_meta,
  set_discovered_status, list_discovered_recipes (FULLTEXT + JSON
  filters), count_discovered_by_status, get_discovered_recipe;
  discover-job CRUD + anti-zombie finalize + stuck-job recovery
- discover_recipes.py: daemon-thread runner (mirrors enrich pattern)
  walks a URL list; scrape_me → reshape to mealie shape → INSERT IGNORE
  → forge.enrich_recipe → flip raw → enriched. SEED_URLS curated
  starter packs for allrecipes / bbc / smitten / pinch / hbh.
- endpoints: GET /discover, GET /api/discover/search (q + cuisine +
  complexity + protein + meal_type + kid-fit + max_minutes + status),
  POST /api/discover/import/<id>, /reject/<id>, /scrape-start (seed
  or urls list), /scrape-status, /scrape-cancel/<id>
- discover.html: filter row + card grid + collapsible scrape panel
  with seed chips and url textarea + live progress poll
- nav: 'discover' tab on /, link card on /me
- boot recovery: fail_stuck_discover_jobs at startup
---
 cauldron/db.py                   | 228 ++++++++++++++++++
 cauldron/discover_recipes.py     | 267 +++++++++++++++++++++
 cauldron/mealie.py               |  44 ++++
 cauldron/server.py               | 169 ++++++++++++-
 cauldron/templates/_base.html    |   1 +
 cauldron/templates/discover.html | 397 +++++++++++++++++++++++++++++++
 cauldron/templates/me.html       |   3 +
 requirements.txt                 |   1 +
 8 files changed, 1109 insertions(+), 1 deletion(-)
 create mode 100644 cauldron/discover_recipes.py
 create mode 100644 cauldron/templates/discover.html
diff --git a/cauldron/db.py b/cauldron/db.py
index b3d91c4..fc3fe0d 100644
--- a/cauldron/db.py
+++ b/cauldron/db.py
@@ -2143,3 +2143,231 @@ class DB:
                 (stale_minutes,),
             )
             return cur.rowcount
+
+    # --- discover (Discover v0.1) ------------------------------------------
+
+    def insert_discovered_recipe(
+        self,
+        *,
+        slug: str | None,
+        source_url: str,
+        name: str | None,
+        description: str | None,
+        image_url: str | None,
+        scraped_json: str,
+    ) -> int | None:
+        """INSERT a freshly-scraped recipe in 'raw' state. Returns the new
+        row id, or None if the source_url was already present (UNIQUE
+        violation = duplicate scrape, treat as skip)."""
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(
+                """INSERT IGNORE INTO cauldron_discovered_recipes
+                     (slug, source_url, name, description, image_url,
+                      scraped_json, status, scraped_at, last_action_at)
+                   VALUES (%s, %s, %s, %s, %s, %s, 'raw', NOW(), NOW())""",
+                (slug, source_url[:768], name, description, image_url, scraped_json),
+            )
+            return cur.lastrowid or None
+
+    def update_discovered_meta(
+        self, discover_id: int, *, meta_json: str, version: int
+    ) -> None:
+        """Persist enriched metadata + flip status raw → enriched."""
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(
+                """UPDATE cauldron_discovered_recipes
+                   SET meta_json=%s,
+                       enrich_version=%s,
+                       status=CASE WHEN status='raw' THEN 'enriched'
+                                   ELSE status END,
+                       last_action_at=NOW()
+                   WHERE id=%s""",
+                (meta_json, version, discover_id),
+            )
+
+    def set_discovered_status(self, discover_id: int, status: str) -> None:
+        """Move a discovered recipe to 'imported' or 'rejected'."""
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(
+                """UPDATE cauldron_discovered_recipes
+                   SET status=%s, last_action_at=NOW()
+                   WHERE id=%s""",
+                (status, discover_id),
+            )
+
+    def get_discovered_recipe(self, discover_id: int) -> dict | None:
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(
+                "SELECT * FROM cauldron_discovered_recipes WHERE id=%s",
+                (discover_id,),
+            )
+            return cur.fetchone()
+
+    def list_discovered_recipes(
+        self,
+        *,
+        status: str | list[str] | None = "enriched",
+        q: str | None = None,
+        cuisine: str | None = None,
+        complexity: str | None = None,
+        primary_protein: str | None = None,
+        meal_type: str | None = None,
+        kid_friendly_min: int | None = None,
+        max_minutes: int | None = None,
+        limit: int = 60,
+        offset: int = 0,
+    ) -> list[dict]:
+        """Browse discovered recipes with filters. Status defaults to
+        'enriched' so the /discover page surfaces only ready-to-import
+        rows. JSON path filters use MySQL JSON_EXTRACT against meta_json."""
+        where = []
+        args: list = []
+        if status is not None:
+            if isinstance(status, list):
+                if not status:
+                    return []
+                placeholders = ",".join(["%s"] * len(status))
+                where.append(f"status IN ({placeholders})")
+                args.extend(status)
+            else:
+                where.append("status = %s")
+                args.append(status)
+        if q:
+            where.append("MATCH(name, description) AGAINST (%s IN NATURAL LANGUAGE MODE)")
+            args.append(q)
+        if cuisine:
+            where.append("JSON_UNQUOTE(JSON_EXTRACT(meta_json, '$.cuisine')) = %s")
+            args.append(cuisine)
+        if complexity:
+            where.append("JSON_UNQUOTE(JSON_EXTRACT(meta_json, '$.complexity')) = %s")
+            args.append(complexity)
+        if primary_protein:
+            where.append("JSON_UNQUOTE(JSON_EXTRACT(meta_json, '$.primary_protein')) = %s")
+            args.append(primary_protein)
+        if meal_type:
+            where.append("JSON_UNQUOTE(JSON_EXTRACT(meta_json, '$.meal_type')) = %s")
+            args.append(meal_type)
+        if kid_friendly_min is not None:
+            where.append("CAST(JSON_EXTRACT(meta_json, '$.kid_friendly_score') AS UNSIGNED) >= %s")
+            args.append(kid_friendly_min)
+        if max_minutes is not None:
+            where.append("CAST(JSON_EXTRACT(meta_json, '$.estimated_minutes') AS UNSIGNED) <= %s")
+            args.append(max_minutes)
+        sql = "SELECT * FROM cauldron_discovered_recipes"
+        if where:
+            sql += " WHERE " + " AND ".join(where)
+        # Relevance-rank when there's a search query, else newest-first
+        if q:
+            sql += " ORDER BY MATCH(name, description) AGAINST (%s IN NATURAL LANGUAGE MODE) DESC, scraped_at DESC"
+            args.append(q)
+        else:
+            sql += " ORDER BY scraped_at DESC"
+        sql += " LIMIT %s OFFSET %s"
+        args.extend([int(limit), int(offset)])
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(sql, args)
+            return list(cur.fetchall() or [])
+
+    def count_discovered_by_status(self) -> dict[str, int]:
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(
+                """SELECT status, COUNT(*) AS n
+                   FROM cauldron_discovered_recipes GROUP BY status"""
+            )
+            return {r["status"]: int(r["n"]) for r in (cur.fetchall() or [])}
+
+    def create_discover_job(
+        self, *, started_by_sub: str, source_seed: str
+    ) -> int:
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(
+                """INSERT INTO cauldron_discover_jobs
+                     (started_by_sub, source_seed, state)
+                   VALUES (%s, %s, 'running')""",
+                (started_by_sub, source_seed[:255]),
+            )
+            return cur.lastrowid
+
+    def get_discover_job(self, job_id: int) -> dict | None:
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(
+                "SELECT * FROM cauldron_discover_jobs WHERE id=%s", (job_id,)
+            )
+            return cur.fetchone()
+
+    def get_discover_job_state(self, job_id: int) -> str | None:
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(
+                "SELECT state FROM cauldron_discover_jobs WHERE id=%s", (job_id,)
+            )
+            row = cur.fetchone()
+            return row["state"] if row else None
+
+    def latest_discover_job(self) -> dict | None:
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(
+                """SELECT * FROM cauldron_discover_jobs
+                   ORDER BY started_at DESC LIMIT 1"""
+            )
+            return cur.fetchone()
+
+    def running_discover_job(self) -> dict | None:
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(
+                """SELECT * FROM cauldron_discover_jobs
+                   WHERE state='running' ORDER BY started_at DESC LIMIT 1"""
+            )
+            return cur.fetchone()
+
+    def update_discover_job_progress(
+        self,
+        job_id: int,
+        *,
+        pages_delta: int = 0,
+        added_delta: int = 0,
+        skipped_delta: int = 0,
+        error_delta: int = 0,
+        last_error: str | None = None,
+    ) -> None:
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(
+                """UPDATE cauldron_discover_jobs
+                   SET pages_scraped = pages_scraped + %s,
+                       recipes_added = recipes_added + %s,
+                       skipped_count = skipped_count + %s,
+                       error_count   = error_count   + %s,
+                       last_error    = COALESCE(%s, last_error),
+                       last_progress_at = NOW()
+                   WHERE id=%s""",
+                (pages_delta, added_delta, skipped_delta, error_delta,
+                 last_error[:500] if last_error else None, job_id),
+            )
+
+    def finalize_discover_job(self, job_id: int, *, state: str) -> None:
+        """Anti-zombie guard: only update if the job isn't already in a
+        terminal state. Mirrors finalize_enrich_job."""
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(
+                """UPDATE cauldron_discover_jobs
+                   SET state=%s,
+                       finished_at = CASE WHEN %s IN ('done','failed','cancelled')
+                                          THEN NOW() ELSE finished_at END,
+                       last_progress_at = NOW()
+                   WHERE id=%s
+                     AND state NOT IN ('done','failed','cancelled')""",
+                (state, state, job_id),
+            )
+
+    def fail_stuck_discover_jobs(self, *, stale_minutes: int = 15) -> int:
+        with self.conn() as c, c.cursor() as cur:
+            cur.execute(
+                """UPDATE cauldron_discover_jobs
+                   SET state='failed',
+                       finished_at=NOW(),
+                       last_error=COALESCE(last_error,
+                                  'recovery: worker exited mid-run')
+                   WHERE state='running'
+                     AND last_progress_at < NOW() - INTERVAL %s MINUTE""",
+                (stale_minutes,),
+            )
+            return cur.rowcount
diff --git a/cauldron/discover_recipes.py b/cauldron/discover_recipes.py
new file mode 100644
index 0000000..13fe457
--- /dev/null
+++ b/cauldron/discover_recipes.py
@@ -0,0 +1,267 @@
+"""Discover v0.1 — scrape external recipe URLs into the discover corpus.
+
+Pipeline per URL:
+  1. recipe_scrapers.scrape_me(url) → schema.org structured recipe
+  2. Reshape into a Mealie-ish dict (name, description, recipeYield,
+     recipeIngredient[{note}], recipeInstructions[{text}])
+  3. INSERT IGNORE into cauldron_discovered_recipes (UNIQUE on source_url)
+  4. forge.enrich_recipe(reshaped) → Hecate-tier metadata
+  5. Persist meta_json, flip status raw → enriched
+
+Same daemon-thread + cancel + stuck-recovery pattern as enrich/sterilize.
+
+Seed sources are hardcoded URL lists per source_seed (allrecipes-popular,
+bbc-popular, smitten-kitchen-recent, ...). Cobb supplies a seed name OR
+a literal list of URLs via the admin endpoint. Either way, the runner
+walks the list, scrape→insert→enrich each, and emits progress.
+"""
+from __future__ import annotations
+
+import json
+import logging
+import threading
+from urllib.parse import urlparse
+
+from .db import DB
+from .forge import Forge, ForgeError
+
+log = logging.getLogger(__name__)
+
+
+# Curated seed URL lists for v0.1 dogfood. Each is a small starter pack —
+# we expand later by adding sitemap/category-page walkers. Keeping these
+# manual lets v0.1 ship without a separate site-walker per source.
+SEED_URLS: dict[str, list[str]] = {
+    "allrecipes-popular": [
+        "https://www.allrecipes.com/recipe/24074/alyssas-chicken/",
+        "https://www.allrecipes.com/recipe/229960/world-best-now-veggie-burgers/",
+        "https://www.allrecipes.com/recipe/16641/old-fashioned-mac-and-cheese/",
+        "https://www.allrecipes.com/recipe/8499082/instant-pot-pulled-pork/",
+        "https://www.allrecipes.com/recipe/220854/chef-johns-creamy-mushroom-pasta/",
+        "https://www.allrecipes.com/recipe/8514308/dr-pepper-pulled-pork/",
+        "https://www.allrecipes.com/recipe/16700/salisbury-steak/",
+        "https://www.allrecipes.com/recipe/8536048/oven-baked-bbq-chicken-thighs/",
+    ],
+    "bbc-good-food": [
+        "https://www.bbcgoodfood.com/recipes/spaghetti-bolognese-recipe",
+        "https://www.bbcgoodfood.com/recipes/best-spaghetti-carbonara-recipe",
+        "https://www.bbcgoodfood.com/recipes/easy-chicken-curry",
+        "https://www.bbcgoodfood.com/recipes/chilli-con-carne-recipe",
+        "https://www.bbcgoodfood.com/recipes/perfect-roast-chicken",
+        "https://www.bbcgoodfood.com/recipes/chicken-tikka-masala",
+        "https://www.bbcgoodfood.com/recipes/sticky-toffee-pudding",
+    ],
+    "smitten-kitchen": [
+        "https://smittenkitchen.com/2023/02/black-pepper-chicken/",
+        "https://smittenkitchen.com/2024/01/orecchiette-with-broccoli-rabe/",
+        "https://smittenkitchen.com/2023/09/baked-orzo-with-eggplant-and-mozzarella/",
+        "https://smittenkitchen.com/2022/12/cacio-e-pepe-soup-with-broccoli-rabe/",
+        "https://smittenkitchen.com/2022/05/spinach-chickpea-skillet/",
+    ],
+    "pinch-of-yum": [
+        "https://pinchofyum.com/the-best-soft-chocolate-chip-cookies",
+        "https://pinchofyum.com/spicy-peanut-soba-noodle-salad",
+        "https://pinchofyum.com/best-chicken-marinade",
+        "https://pinchofyum.com/15-minute-meal-prep-cilantro-lime-chicken-and-cauliflower-rice",
+        "https://pinchofyum.com/pesto-cavatappi",
+    ],
+    "half-baked-harvest": [
+        "https://www.halfbakedharvest.com/cajun-chicken-pasta/",
+        "https://www.halfbakedharvest.com/garlic-butter-creamed-spinach-salmon/",
+        "https://www.halfbakedharvest.com/spicy-pretzel-chicken/",
+        "https://www.halfbakedharvest.com/crispy-buffalo-chicken-tacos/",
+        "https://www.halfbakedharvest.com/butter-chicken-meatballs/",
+    ],
+}
+
+
+def list_seeds() -> list[dict]:
+    """For the /discover admin UI: name + count of curated URLs per seed."""
+    return [{"name": k, "count": len(v)} for k, v in SEED_URLS.items()]
+
+
+def _slug_from_url(url: str) -> str | None:
+    """Cheap slug fallback when the scraper doesn't expose one."""
+    try:
+        parts = [p for p in urlparse(url).path.split("/") if p]
+        return parts[-1][:255] if parts else None
+    except Exception:
+        return None
+
+
+def _safe_call(fn, default=None):
+    """recipe_scrapers raises various Exception subclasses for missing
+    fields. Swallow them per-field rather than aborting the whole scrape."""
+    try:
+        return fn()
+    except Exception:
+        return default
+
+
+def _to_mealie_shape(scraper, source_url: str) -> dict:
+    """Reshape a recipe_scrapers.AbstractScraper into the dict shape
+    forge.enrich_recipe expects (a Mealie recipe). Falls back gracefully
+    when individual fields are unavailable."""
+    title = _safe_call(scraper.title) or ""
+    description = _safe_call(getattr(scraper, "description", lambda: ""), "") or ""
+    yields = _safe_call(scraper.yields, "") or ""
+    image = _safe_call(scraper.image, "") or ""
+
+    ings_raw = _safe_call(scraper.ingredients, []) or []
+    ingredients = [
+        {"note": str(x).strip()}
+        for x in ings_raw
+        if x and str(x).strip()
+    ]
+
+    # Prefer instructions_list when supported; some scrapers only expose
+    # the joined string.
+    steps_list: list[str] = []
+    instructions_list = _safe_call(getattr(scraper, "instructions_list", lambda: None), None)
+    if instructions_list:
+        steps_list = [str(s).strip() for s in instructions_list if s and str(s).strip()]
+    else:
+        joined = _safe_call(scraper.instructions, "") or ""
+        steps_list = [s.strip() for s in joined.split("\n") if s.strip()]
+    instructions = [{"text": s} for s in steps_list]
+
+    return {
+        "name": title,
+        "description": description,
+        "recipeYield": yields,
+        "image": image,
+        "source_url": source_url,
+        "recipeIngredient": ingredients,
+        "recipeInstructions": instructions,
+    }
+
+
+def _scrape_one(url: str) -> tuple[dict, str | None] | None:
+    """Scrape a single URL. Returns (mealie_shape_dict, image_url) on
+    success. Returns None on any unrecoverable scraper error."""
+    try:
+        from recipe_scrapers import scrape_me  # type: ignore
+    except ImportError:
+        log.exception("[discover] recipe_scrapers not installed")
+        return None
+
+    try:
+        scraper = scrape_me(url, wild_mode=True)
+    except Exception as e:
+        log.warning("[discover] scrape_me(%s) failed: %s", url, e)
+        return None
+
+    shaped = _to_mealie_shape(scraper, url)
+    image = shaped.get("image") or None
+    if not shaped.get("name"):
+        log.warning("[discover] no name extracted from %s", url)
+        return None
+    if not shaped.get("recipeIngredient"):
+        log.warning("[discover] no ingredients extracted from %s", url)
+        return None
+    return shaped, image
+
+
+def run_discover(
+    *,
+    db: DB,
+    job_id: int,
+    forge: Forge,
+    urls: list[str],
+) -> None:
+    """Walk a list of URLs: scrape → insert → enrich. Runs in a daemon
+    thread; respects external cancel via state poll."""
+    log.info("[discover:%s] start (%d urls)", job_id, len(urls))
+
+    def _cancelled() -> bool:
+        s = db.get_discover_job_state(job_id)
+        return s in ("cancelled", "failed", "done")
+
+    try:
+        for url in urls:
+            if _cancelled():
+                log.info("[discover:%s] aborted (state changed)", job_id)
+                return
+
+            db.update_discover_job_progress(job_id, pages_delta=1)
+
+            scraped = _scrape_one(url)
+            if scraped is None:
+                db.update_discover_job_progress(
+                    job_id, error_delta=1, last_error=f"scrape failed: {url[:200]}"
+                )
+                continue
+            shaped, image = scraped
+
+            try:
+                slug = _slug_from_url(url)
+                discover_id = db.insert_discovered_recipe(
+                    slug=slug,
+                    source_url=url,
+                    name=shaped.get("name") or None,
+                    description=(shaped.get("description") or "")[:60000] or None,
+                    image_url=image,
+                    scraped_json=json.dumps(shaped, ensure_ascii=False),
+                )
+            except Exception as e:
+                log.warning("[discover:%s] insert(%s) failed: %s", job_id, url, e)
+                db.update_discover_job_progress(
+                    job_id, error_delta=1, last_error=f"insert: {str(e)[:200]}"
+                )
+                continue
+
+            if not discover_id:
+                # UNIQUE conflict — already in the corpus from a prior scrape
+                db.update_discover_job_progress(job_id, skipped_delta=1)
+                continue
+
+            try:
+                meta = forge.enrich_recipe(shaped)
+            except (ForgeError, RuntimeError) as e:
+                msg = str(e)[:500]
+                log.warning("[discover:%s] enrich(%s): %s", job_id, url, msg)
+                db.update_discover_job_progress(
+                    job_id, error_delta=1, last_error=f"enrich: {msg[:200]}"
+                )
+                # Leave the row in 'raw' so we can retry enrichment later.
+                # The recipe IS in the corpus; just hasn't been classified.
+                continue
+
+            try:
+                db.update_discovered_meta(
+                    discover_id,
+                    meta_json=json.dumps(meta, ensure_ascii=False),
+                    version=DB.ENRICH_VERSION,
+                )
+                db.update_discover_job_progress(job_id, added_delta=1)
+            except Exception as e:
+                log.warning("[discover:%s] persist meta(%s): %s", job_id, url, e)
+                db.update_discover_job_progress(
+                    job_id, error_delta=1, last_error=f"persist: {str(e)[:200]}"
+                )
+
+        db.finalize_discover_job(job_id, state="done")
+        log.info("[discover:%s] done", job_id)
+    except Exception:
+        log.exception("[discover:%s] crashed", job_id)
+        try:
+            db.finalize_discover_job(job_id, state="failed")
+        except Exception:
+            pass
+
+
+def spawn_thread(
+    *,
+    db: DB,
+    job_id: int,
+    forge: Forge,
+    urls: list[str],
+) -> threading.Thread:
+    t = threading.Thread(
+        target=run_discover,
+        kwargs={"db": db, "job_id": job_id, "forge": forge, "urls": urls},
+        name=f"discover-recipes-{job_id}",
+        daemon=True,
+    )
+    t.start()
+    return t
diff --git a/cauldron/mealie.py b/cauldron/mealie.py
index c3bb246..0b54866 100644
--- a/cauldron/mealie.py
+++ b/cauldron/mealie.py
@@ -105,6 +105,50 @@ class Mealie:
     def update_recipe(self, slug: str, body: dict) -> dict:
         return self._put(f"/api/recipes/{slug}", body)
 
+    def import_from_url(
+        self,
+        url: str,
+        *,
+        include_tags: bool = False,
+        include_categories: bool = False,
+    ) -> str:
+        """POST /api/recipes/create/url — Mealie scrapes the URL itself
+        and creates a recipe row in the caller's household. Returns the
+        new recipe slug. After this lands, the household's existing
+        sterilize+enrich pipelines will pick it up on next walk.
+
+        Mealie does its own scraping with recipe_scrapers internally; we
+        don't pass our scraped JSON. This keeps the import path canonical
+        — same code path as the user clicking "Import from URL" in
+        Mealie's UI."""
+        body = {
+            "url": url,
+            "includeTags": bool(include_tags),
+            "includeCategories": bool(include_categories),
+        }
+        try:
+            r = self.session.post(
+                f"{self.base_url}/api/recipes/create/url",
+                json=body,
+                timeout=60,
+            )
+        except requests.RequestException as e:
+            raise MealieError(f"POST /api/recipes/create/url transport: {e}") from e
+        if r.status_code >= 400:
+            raise MealieError(
+                f"POST /api/recipes/create/url -> {r.status_code}: {r.text[:300]}"
+            )
+        # Mealie returns the new slug as a bare JSON string
+        try:
+            slug = r.json()
+        except Exception:
+            slug = r.text.strip().strip('"')
+        if isinstance(slug, dict):
+            slug = slug.get("slug") or slug.get("id")
+        if not isinstance(slug, str) or not slug:
+            raise MealieError(f"create/url returned no slug: {r.text[:200]}")
+        return slug
+
     def delete_recipe(self, slug: str) -> dict:
         """DELETE /api/recipes/<slug>. Permanently removes the recipe and
         its recipe_ingredient rows. Permission-scoped per-household.
diff --git a/cauldron/server.py b/cauldron/server.py
index 89a3bc2..6b9239a 100644
--- a/cauldron/server.py
+++ b/cauldron/server.py
@@ -33,7 +33,7 @@ from .config import load
 from .crypto import TokenCrypto
 from .db import DB
 from .forge import Forge, ForgeError
-from . import aggregator, bulk_sterilize, consolidate_foods, dedupe_recipes, enrich_recipes, foods
+from . import aggregator, bulk_sterilize, consolidate_foods, dedupe_recipes, discover_recipes, enrich_recipes, foods
 from .mealie import Mealie, MealieError
 from .oidc import init_oauth
 from .recipe_index import flatten_recipe, refresh_household_index, search_index
@@ -132,6 +132,13 @@ def create_app() -> Flask:
     except Exception as e:
         app.logger.warning("enrich stuck-job recovery failed: %s", e)
 
+    try:
+        n_failed = db.fail_stuck_discover_jobs(stale_minutes=15)
+        if n_failed:
+            app.logger.info("failed %d stuck discover jobs at boot", n_failed)
+    except Exception as e:
+        app.logger.warning("discover stuck-job recovery failed: %s", e)
+
     oauth = init_oauth(
         app,
         issuer=cfg.oidc_issuer,
@@ -1865,6 +1872,166 @@ def create_app() -> Flask:
         db.finalize_consolidate_job(job_id, state="cancelled")
         return jsonify({"ok": True})
 
+    # ---------- Discover v0.1 (browse external recipes) ------------------
+
+    @app.get("/discover")
+    @require_session
+    def discover_page():
+        # Discover is a global, cross-household corpus — no household
+        # gate. But we still want a connected user before showing the
+        # import buttons (since import targets the user's Mealie).
+        counts = db.count_discovered_by_status()
+        latest = db.latest_discover_job()
+        seeds = discover_recipes.list_seeds()
+        return render_template(
+            "discover.html",
+            active="discover",
+            counts=counts,
+            latest_job=_consolidate_job_payload(latest) if latest else None,
+            seeds=seeds,
+        )
+
+    @app.get("/api/discover/search")
+    @require_session
+    def discover_search():
+        args = request.args
+        q = (args.get("q") or "").strip() or None
+        # 'all' default — show enriched + raw together so newly-scraped
+        # rows surface even before enrichment finishes. Imported/rejected
+        # are hidden by default.
+        status_arg = (args.get("status") or "active").strip()
+        if status_arg == "active":
+            status: list[str] | str | None = ["enriched", "raw"]
+        elif status_arg == "all":
+            status = None
+        else:
+            status = status_arg
+
+        def _opt(name: str) -> str | None:
+            v = (args.get(name) or "").strip()
+            return v or None
+
+        def _opt_int(name: str) -> int | None:
+            v = (args.get(name) or "").strip()
+            if not v:
+                return None
+            try:
+                return int(v)
+            except ValueError:
+                return None
+
+        rows = db.list_discovered_recipes(
+            status=status,
+            q=q,
+            cuisine=_opt("cuisine"),
+            complexity=_opt("complexity"),
+            primary_protein=_opt("primary_protein"),
+            meal_type=_opt("meal_type"),
+            kid_friendly_min=_opt_int("kid_friendly_min"),
+            max_minutes=_opt_int("max_minutes"),
+            limit=min(int(args.get("limit") or 60), 200),
+            offset=max(int(args.get("offset") or 0), 0),
+        )
+
+        out = []
+        for r in rows:
+            meta = r.get("meta_json")
+            if isinstance(meta, str):
+                try:
+                    meta = _json_loads(meta)
+                except Exception:
+                    meta = None
+            for k in ("scraped_at", "last_action_at"):
+                v = r.get(k)
+                if v is not None and hasattr(v, "isoformat"):
+                    r[k] = v.isoformat()
+            # scraped_json can be heavy — drop it from list responses
+            r.pop("scraped_json", None)
+            r["meta_json"] = meta
+            out.append(r)
+        return jsonify({"recipes": out, "count": len(out)})
+
+    @app.post("/api/discover/import/<int:discover_id>")
+    @require_session
+    def discover_import(discover_id: int):
+        row = db.get_discovered_recipe(discover_id)
+        if not row:
+            return jsonify({"error": "not_found"}), 404
+        if row.get("status") == "imported":
+            return jsonify({"error": "already_imported"}), 409
+        client = current_user_mealie()
+        if client is None:
+            return jsonify({"error": "mealie_not_connected"}), 409
+        try:
+            new_slug = client.import_from_url(row["source_url"])
+        except MealieError as e:
+            return jsonify({"error": "mealie_import_failed", "detail": str(e)[:300]}), 502
+        db.set_discovered_status(discover_id, "imported")
+        return jsonify({"ok": True, "slug": new_slug})
+
+    @app.post("/api/discover/reject/<int:discover_id>")
+    @require_session
+    def discover_reject(discover_id: int):
+        row = db.get_discovered_recipe(discover_id)
+        if not row:
+            return jsonify({"error": "not_found"}), 404
+        db.set_discovered_status(discover_id, "rejected")
+        return jsonify({"ok": True})
+
+    @app.post("/api/discover/scrape-start")
+    @require_session
+    def discover_scrape_start():
+        u = session["user"]
+        active = db.running_discover_job()
+        if active:
+            return jsonify({"error": "already_running", "job_id": active["id"]}), 409
+        body = request.get_json(silent=True) or {}
+        urls: list[str] = []
+        seed_name = (body.get("seed") or "").strip()
+        if seed_name:
+            seeds = discover_recipes.SEED_URLS
+            if seed_name not in seeds:
+                return jsonify({"error": "unknown_seed", "available": list(seeds.keys())}), 400
+            urls = list(seeds[seed_name])
+        else:
+            url_list = body.get("urls") or []
+            if not isinstance(url_list, list):
+                return jsonify({"error": "urls must be a list"}), 400
+            urls = [str(x).strip() for x in url_list if str(x).strip()]
+            seed_name = "manual"
+        if not urls:
+            return jsonify({"error": "no urls supplied"}), 400
+        # Light sanity guard: scrub bad entries, cap at 50/job
+        urls = [x for x in urls if x.startswith(("http://", "https://"))][:50]
+        if not urls:
+            return jsonify({"error": "no valid http(s) urls"}), 400
+        job_id = db.create_discover_job(
+            started_by_sub=u["sub"], source_seed=seed_name,
+        )
+        discover_recipes.spawn_thread(
+            db=db, job_id=job_id, forge=forge, urls=urls,
+        )
+        return jsonify({"ok": True, "job_id": job_id, "urls_queued": len(urls)})
+
+    @app.get("/api/discover/scrape-status")
+    @require_session
+    def discover_scrape_status():
+        job = db.latest_discover_job()
+        if not job:
+            return jsonify({"job": None})
+        return jsonify({"job": _consolidate_job_payload(job)})
+
+    @app.post("/api/discover/scrape-cancel/<int:job_id>")
+    @require_session
+    def discover_scrape_cancel(job_id: int):
+        job = db.get_discover_job(job_id)
+        if not job:
+            return jsonify({"error": "not_found"}), 404
+        if job["state"] != "running":
+            return jsonify({"error": f"bad_state:{job['state']}"}), 409
+        db.finalize_discover_job(job_id, state="cancelled")
+        return jsonify({"ok": True})
+
     # ---------- admin sterilizer (bearer-auth, kick off on user's behalf) -
 
     @app.post("/api/admin/sterilize/bulk-start")
diff --git a/cauldron/templates/_base.html b/cauldron/templates/_base.html
index f73b624..edcb145 100644
--- a/cauldron/templates/_base.html
+++ b/cauldron/templates/_base.html
@@ -482,6 +482,7 @@ button { font-family: inherit; }
     <a href="/picks" class="{% if active == 'picks' %}active{% endif %}">picks</a>
     <a href="/plan" class="{% if active == 'plan' %}active{% endif %}">plan</a>
     <a href="/list" class="{% if active == 'list' %}active{% endif %}">list</a>
+    <a href="/discover" class="{% if active == 'discover' %}active{% endif %}">discover</a>
     <a href="/me" class="{% if active == 'me' %}active{% endif %}">me</a>
   </nav>
   <div class="topmeta">
diff --git a/cauldron/templates/discover.html b/cauldron/templates/discover.html
new file mode 100644
index 0000000..192cf21
--- /dev/null
+++ b/cauldron/templates/discover.html
@@ -0,0 +1,397 @@
+{% extends "_base.html" %}
+{% block title %}Discover · Cauldron{% endblock %}
+{% block content %}
+
+<style>
+  .filter-row { display:flex; flex-wrap:wrap; gap:10px; margin:10px 0 18px 0;
+    align-items:center; }
+  .filter-row label { font-family:var(--mono); font-size:11px;
+    color:var(--bone-dim); letter-spacing:.1em; text-transform:uppercase;
+    display:flex; flex-direction:column; gap:4px; }
+  .filter-row input[type=text], .filter-row input[type=number],
+  .filter-row select {
+    background:var(--bg-2); border:1px solid var(--line); color:var(--bone);
+    padding:6px 8px; border-radius:6px; font-family:var(--mono);
+    font-size:13px; min-width:130px; }
+  .filter-row input[type=text].search { min-width:240px; }
+  .grid { display:grid; gap:14px;
+    grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); }
+  .dcard { background:var(--bg-2); border:1px solid var(--line);
+    border-radius:10px; overflow:hidden; display:flex; flex-direction:column; }
+  .dcard .img { width:100%; aspect-ratio: 16/10;
+    background:var(--bg-1) center/cover no-repeat;
+    border-bottom:1px solid var(--line); }
+  .dcard .img.placeholder { display:flex; align-items:center;
+    justify-content:center; color:var(--muted); font-size:36px; }
+  .dcard .body { padding:12px 14px; flex:1; display:flex;
+    flex-direction:column; gap:8px; }
+  .dcard h3 { font-family:var(--serif); font-size:1.05em; margin:0;
+    color:var(--bone); line-height:1.2; }
+  .dcard .meta-line { font-family:var(--mono); font-size:11px;
+    color:var(--bone-dim); letter-spacing:.05em; }
+  .dcard .quip { font-family:var(--serif); font-style:italic;
+    color:var(--purple-bright); font-size:.92em; line-height:1.35; }
+  .dcard .desc { color:var(--muted); font-size:.9em; line-height:1.4;
+    display:-webkit-box; -webkit-line-clamp:3; -webkit-box-orient:vertical;
+    overflow:hidden; }
+  .dcard .src { font-family:var(--mono); font-size:10.5px;
+    color:var(--muted); text-transform:lowercase; }
+  .dcard .src a { color:var(--green-bright); text-decoration:none; }
+  .dcard .actions { display:flex; gap:8px; padding:10px 14px;
+    border-top:1px solid var(--line); background:var(--bg-1); }
+  .dcard .actions .btn { flex:1; }
+  .dcard.imported { opacity:.55; }
+  .dcard.rejected { opacity:.4; }
+  .dcard .raw-tag { display:inline-block; padding:2px 6px; font-size:10px;
+    border:1px solid var(--line); border-radius:4px; color:var(--muted);
+    font-family:var(--mono); letter-spacing:.1em; text-transform:uppercase; }
+  .empty { color:var(--muted); font-style:italic; padding:32px;
+    text-align:center; border:1px dashed var(--line); border-radius:8px; }
+  .seed-row { display:flex; flex-wrap:wrap; gap:6px; margin:8px 0; }
+  .seed-row .chip { cursor:pointer; }
+  .progress-rail { width:100%; height:10px; background:var(--bg-2);
+    border:1px solid var(--line); border-radius:6px; overflow:hidden;
+    margin:8px 0; }
+  .progress-fill { height:100%;
+    background:linear-gradient(90deg, var(--purple-deep), var(--purple-bright));
+    transition:width .3s ease; }
+  .progress-meta { color:var(--bone-dim); font-family:var(--mono);
+    font-size:11px; letter-spacing:.1em; display:flex; gap:14px; flex-wrap:wrap; }
+  .progress-meta strong { color:var(--bone); }
+  details.scrape-panel { margin-bottom:14px; }
+  details.scrape-panel summary { cursor:pointer; font-family:var(--mono);
+    font-size:12px; letter-spacing:.1em; color:var(--bone-dim);
+    text-transform:uppercase; padding:6px 0; }
+  details.scrape-panel summary:hover { color:var(--purple-bright); }
+  textarea#urls-input { width:100%; min-height:80px;
+    background:var(--bg-2); border:1px solid var(--line); color:var(--bone);
+    padding:8px; border-radius:6px; font-family:var(--mono); font-size:12px; }
+</style>
+
+<div class="page-head">
+  <div class="crumb">// discover · external recipe corpus</div>
+  <h1>recipe <span class="accent">discover</span></h1>
+  <div class="lede">
+    a cross-household library of recipes scraped from the open web —
+    each enriched by hecate so you can filter by cuisine, complexity,
+    primary protein, kid-friendliness, time. one click imports to your
+    mealie household; the same sterilize + enrich pipelines you already
+    trust run on it.
+  </div>
+</div>
+
+<section class="panel">
+  <div class="panel-head">
+    <h2>browse</h2>
+    <span class="ctx" id="status-line">
+      {{ counts.get('enriched', 0) }} enriched · {{ counts.get('raw', 0) }} raw ·
+      {{ counts.get('imported', 0) }} imported · {{ counts.get('rejected', 0) }} rejected
+    </span>
+  </div>
+
+  <div class="filter-row">
+    <label>search
+      <input type="text" id="q" class="search" placeholder="chicken, soup, ramen…">
+    </label>
+    <label>cuisine
+      <select id="cuisine">
+        <option value="">any</option>
+        <option>american</option><option>italian</option><option>asian</option>
+        <option>mexican</option><option>mediterranean</option>
+        <option>indian</option><option>french</option>
+        <option>middle-eastern</option><option>other</option>
+      </select>
+    </label>
+    <label>complexity
+      <select id="complexity">
+        <option value="">any</option>
+        <option>easy</option><option>medium</option><option>involved</option>
+      </select>
+    </label>
+    <label>protein
+      <select id="primary_protein">
+        <option value="">any</option>
+        <option>chicken</option><option>beef</option><option>pork</option>
+        <option>fish</option><option>seafood</option><option>tofu</option>
+        <option>tempeh</option><option>beans</option><option>eggs</option>
+        <option>cheese</option><option>nuts</option><option>none</option>
+        <option>mixed</option>
+      </select>
+    </label>
+    <label>meal type
+      <select id="meal_type">
+        <option value="">any</option>
+        <option>breakfast</option><option>lunch</option><option>dinner</option>
+        <option>snack</option><option>dessert</option><option>side</option>
+      </select>
+    </label>
+    <label>kid-fit ≥
+      <select id="kid_friendly_min">
+        <option value="">any</option>
+        <option value="1">1</option><option value="2">2</option>
+        <option value="3">3</option><option value="4">4</option>
+        <option value="5">5</option>
+      </select>
+    </label>
+    <label>max minutes
+      <input type="number" id="max_minutes" min="1" max="600" placeholder="—">
+    </label>
+    <label>status
+      <select id="status">
+        <option value="active" selected>enriched + raw</option>
+        <option value="enriched">enriched only</option>
+        <option value="all">all (incl imported/rejected)</option>
+        <option value="imported">imported</option>
+        <option value="rejected">rejected</option>
+      </select>
+    </label>
+  </div>
+
+  <details class="scrape-panel">
+    <summary>+ scrape new recipes (admin)</summary>
+    <p class="muted" style="margin-top:8px;">
+      kick off a background scrape from a curated seed list, or paste your own
+      urls (one per line, max 50). each url goes through scrape →
+      insert → hecate enrich.
+    </p>
+    <div class="seed-row">
+      {% for s in seeds %}
+        <button class="chip" type="button" onclick="seedSet('{{ s.name }}')">
+          {{ s.name }} <span class="muted">({{ s.count }})</span>
+        </button>
+      {% endfor %}
+    </div>
+    <textarea id="urls-input" placeholder="https://… one per line"></textarea>
+    <div class="btn-row" style="margin-top:8px;">
+      <button class="btn btn-purple" type="button" onclick="startScrape()">▸ start scrape</button>
+      <button class="btn" type="button" onclick="cancelScrape()" id="cancel-btn" style="display:none;">cancel</button>
+    </div>
+    <div id="scrape-progress" style="display:none; margin-top:12px;">
+      <div class="progress-rail"><div class="progress-fill" id="bar" style="width:0%;"></div></div>
+      <div class="progress-meta">
+        <span><strong id="pages">0</strong> walked</span>
+        <span><strong id="added">0</strong> added</span>
+        <span><strong id="skipped">0</strong> skipped</span>
+        <span><strong id="errors">0</strong> errors</span>
+        <span class="muted" id="last-error"></span>
+      </div>
+    </div>
+  </details>
+
+  <div id="grid" class="grid"></div>
+  <div id="empty-msg" class="empty" style="display:none;">no recipes match. widen filters or scrape some new ones.</div>
+</section>
+
+<script>
+  let scrapeJob = {{ (latest_job | tojson) if latest_job else 'null' }};
+  let scrapePoll = null;
+  let searchTimer = null;
+
+  function $(id){ return document.getElementById(id); }
+  function _esc(s){
+    if(s == null) return '';
+    return String(s).replace(/[&<>"']/g, ch => ({
+      '&':'&amp;','<':'&lt;','>':'&gt;','"':'&quot;',"'":'&#39;'
+    })[ch]);
+  }
+
+  function _metaLine(meta){
+    if(!meta) return '<span class="raw-tag">awaiting enrich</span>';
+    const bits = [];
+    if(meta.cuisine && meta.cuisine !== 'unknown') bits.push(meta.cuisine);
+    if(meta.complexity) bits.push(meta.complexity);
+    if(meta.primary_protein && meta.primary_protein !== 'none') bits.push(meta.primary_protein);
+    if(meta.estimated_minutes) bits.push(meta.estimated_minutes + ' min');
+    if(meta.kid_friendly_score != null) bits.push('kid:' + meta.kid_friendly_score);
+    return _esc(bits.join(' · '));
+  }
+
+  function _renderCard(r){
+    const meta = r.meta_json || null;
+    const quip = meta && meta.hecate_quip ? meta.hecate_quip : '';
+    const desc = r.description || (meta && meta.summary) || '';
+    const imgUrl = r.image_url || '';
+    const klass = 'dcard ' + (r.status === 'imported' ? 'imported' :
+                  r.status === 'rejected' ? 'rejected' : '');
+    const imgHtml = imgUrl
+      ? `<div class="img" style="background-image:url('${_esc(imgUrl)}')"></div>`
+      : `<div class="img placeholder">🍴</div>`;
+    let actionsHtml = '';
+    if(r.status === 'imported'){
+      actionsHtml = '<span class="muted" style="flex:1; text-align:center; font-family:var(--mono); font-size:11px;">✓ imported</span>';
+    } else if(r.status === 'rejected'){
+      actionsHtml = '<span class="muted" style="flex:1; text-align:center; font-family:var(--mono); font-size:11px;">✗ rejected</span>';
+    } else {
+      actionsHtml = `
+        <button class="btn btn-purple" type="button" onclick="importDiscover(${r.id}, this)">🍳 import</button>
+        <button class="btn" type="button" onclick="rejectDiscover(${r.id}, this)" title="hide from discover">✗ skip</button>
+      `;
+    }
+    return `
+      <div class="${klass}" data-id="${r.id}">
+        ${imgHtml}
+        <div class="body">
+          <h3>${_esc(r.name || '(untitled)')}</h3>
+          <div class="meta-line">${_metaLine(meta)}</div>
+          ${quip ? `<div class="quip">${_esc(quip)}</div>` : ''}
+          ${desc ? `<div class="desc">${_esc(desc)}</div>` : ''}
+          <div class="src"><a href="${_esc(r.source_url)}" target="_blank" rel="noopener noreferrer">${_esc(new URL(r.source_url).host)}</a></div>
+        </div>
+        <div class="actions">${actionsHtml}</div>
+      </div>`;
+  }
+
+  async function refreshSearch(){
+    const params = new URLSearchParams();
+    for(const id of ['q','cuisine','complexity','primary_protein','meal_type','kid_friendly_min','max_minutes','status']){
+      const v = $(id).value;
+      if(v !== '') params.set(id, v);
+    }
+    try {
+      const r = await fetch('/api/discover/search?' + params.toString());
+      const d = await r.json();
+      const recipes = d.recipes || [];
+      $('grid').innerHTML = recipes.map(_renderCard).join('');
+      $('empty-msg').style.display = recipes.length === 0 ? '' : 'none';
+    } catch(e){
+      console.error('search failed', e);
+    }
+  }
+
+  function debouncedSearch(){
+    clearTimeout(searchTimer);
+    searchTimer = setTimeout(refreshSearch, 250);
+  }
+
+  for(const id of ['q','cuisine','complexity','primary_protein','meal_type','kid_friendly_min','max_minutes','status']){
+    const el = $(id);
+    el.addEventListener('input', debouncedSearch);
+    el.addEventListener('change', debouncedSearch);
+  }
+
+  async function importDiscover(id, btn){
+    btn.disabled = true; btn.textContent = 'importing…';
+    try {
+      const r = await fetch('/api/discover/import/' + id, { method:'POST' });
+      const d = await r.json();
+      if(!r.ok) throw new Error(d.error || r.status);
+      // Mark card as imported in-place
+      const card = btn.closest('.dcard');
+      if(card){
+        card.classList.add('imported');
+        card.querySelector('.actions').innerHTML =
+          '<span class="muted" style="flex:1; text-align:center; font-family:var(--mono); font-size:11px;">✓ imported as <code>' + _esc(d.slug) + '</code></span>';
+      }
+    } catch(e){
+      btn.disabled = false; btn.textContent = '🍳 import';
+      alert('import failed: ' + e.message);
+    }
+  }
+
+  async function rejectDiscover(id, btn){
+    btn.disabled = true; btn.textContent = '…';
+    try {
+      const r = await fetch('/api/discover/reject/' + id, { method:'POST' });
+      if(!r.ok){ const d = await r.json().catch(()=>({})); throw new Error(d.error || r.status); }
+      const card = btn.closest('.dcard');
+      if(card) card.remove();
+    } catch(e){
+      btn.disabled = false; btn.textContent = '✗ skip';
+      alert('reject failed: ' + e.message);
+    }
+  }
+
+  function seedSet(name){
+    $('urls-input').value = '__seed:' + name;
+    $('urls-input').setAttribute('data-seed', name);
+  }
+
+  async function startScrape(){
+    const txt = $('urls-input').value.trim();
+    let body;
+    if(txt.startsWith('__seed:')){
+      body = { seed: txt.slice(7).trim() };
+    } else {
+      const urls = txt.split(/\r?\n/).map(s => s.trim()).filter(Boolean);
+      if(urls.length === 0){ alert('paste some urls or pick a seed'); return; }
+      body = { urls };
+    }
+    try {
+      const r = await fetch('/api/discover/scrape-start', {
+        method:'POST', headers:{'Content-Type':'application/json'},
+        body: JSON.stringify(body),
+      });
+      const d = await r.json();
+      if(!r.ok) throw new Error(d.error || r.status);
+      scrapeJob = { id: d.job_id, state: 'running' };
+      paintScrape();
+      pollScrape();
+    } catch(e){
+      alert('scrape start failed: ' + e.message);
+    }
+  }
+
+  async function cancelScrape(){
+    if(!scrapeJob) return;
+    if(!confirm('cancel scrape?')) return;
+    try {
+      await fetch('/api/discover/scrape-cancel/' + scrapeJob.id, { method:'POST' });
+      await fetchScrapeStatus();
+    } catch(e){
+      alert('cancel failed: ' + e.message);
+    }
+  }
+
+  async function fetchScrapeStatus(){
+    try {
+      const r = await fetch('/api/discover/scrape-status');
+      const d = await r.json();
+      scrapeJob = d.job || null;
+      paintScrape();
+    } catch(e){
+      console.error('scrape status failed', e);
+    }
+  }
+
+  function paintScrape(){
+    const j = scrapeJob;
+    const pp = $('scrape-progress');
+    const cb = $('cancel-btn');
+    if(!j || j.state !== 'running'){
+      pp.style.display = j ? '' : 'none';
+      cb.style.display = 'none';
+      stopPollScrape();
+    } else {
+      pp.style.display = '';
+      cb.style.display = '';
+    }
+    if(!j) return;
+    const total = (j.pages_scraped || 0) + 0;  // we don't pre-emit total; pages tracks done
+    $('pages').textContent   = j.pages_scraped || 0;
+    $('added').textContent   = j.recipes_added || 0;
+    $('skipped').textContent = j.skipped_count || 0;
+    $('errors').textContent  = j.error_count   || 0;
+    $('last-error').textContent = j.last_error ? '· ' + j.last_error : '';
+    // Bar can't show absolute pct without a known total; show a slow pulse on progress
+    if(j.state === 'running'){
+      const pct = Math.min(95, ((j.pages_scraped || 0) * 7) % 95);
+      $('bar').style.width = pct + '%';
+    } else if(j.state === 'done'){
+      $('bar').style.width = '100%';
+    }
+    if(j.state === 'done' || j.state === 'cancelled' || j.state === 'failed'){
+      // refresh the grid so any new rows appear
+      refreshSearch();
+    }
+  }
+
+  function pollScrape(){ if(!scrapePoll) scrapePoll = setInterval(fetchScrapeStatus, 2000); }
+  function stopPollScrape(){ if(scrapePoll){ clearInterval(scrapePoll); scrapePoll = null; } }
+
+  // Initial paint
+  paintScrape();
+  if(scrapeJob && scrapeJob.state === 'running') pollScrape();
+  refreshSearch();
+</script>
+
+{% endblock %}
diff --git a/cauldron/templates/me.html b/cauldron/templates/me.html
index 070ac70..b1bee15 100644
--- a/cauldron/templates/me.html
+++ b/cauldron/templates/me.html
@@ -65,6 +65,9 @@
 
   <p class="muted" style="margin-top:14px;">have hecate generate per-recipe metadata — cuisine, complexity, macros, primary protein/carb, comfort tier, summary. the plan generator reads this so "high protein week" is a real query, not just a vibe.</p>
   <p><a class="btn" href="/enrich-recipes">✨ enrich recipes →</a></p>
+
+  <p class="muted" style="margin-top:14px;">browse a cross-household corpus of scraped recipes — search by cuisine / protein / time / kid-friendliness. one click sends a recipe to your mealie library; sterilize+enrich pipelines run on it like any other.</p>
+  <p><a class="btn" href="/discover">🌐 discover recipes →</a></p>
 </section>
 {% endif %}
 
diff --git a/requirements.txt b/requirements.txt
index cd2d775..5463b47 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ Authlib==1.3.2
 PyMySQL==1.1.1
 cryptography==43.0.3
 rapidfuzz==3.10.1
+recipe-scrapers==15.6.0