discover v0.1: scrape + search + import
- requirements: add recipe-scrapers 15.6.0 - mealie.import_from_url(): POST /api/recipes/create/url returns slug - db helpers: insert_discovered_recipe, update_discovered_meta, set_discovered_status, list_discovered_recipes (FULLTEXT + JSON filters), count_discovered_by_status, get_discovered_recipe; discover-job CRUD + anti-zombie finalize + stuck-job recovery - discover_recipes.py: daemon-thread runner (mirrors enrich pattern) walks a URL list; scrape_me → reshape to mealie shape → INSERT IGNORE → forge.enrich_recipe → flip raw → enriched. SEED_URLS curated starter packs for allrecipes / bbc / smitten / pinch / hbh. - endpoints: GET /discover, GET /api/discover/search (q + cuisine + complexity + protein + meal_type + kid-fit + max_minutes + status), POST /api/discover/import/<id>, /reject/<id>, /scrape-start (seed or urls list), /scrape-status, /scrape-cancel/<id> - discover.html: filter row + card grid + collapsible scrape panel with seed chips and url textarea + live progress poll - nav: 'discover' tab on /, link card on /me - boot recovery: fail_stuck_discover_jobs at startup
This commit is contained in:
parent
8a09b8f8be
commit
3ec120c1d9
8 changed files with 1109 additions and 1 deletions
228
cauldron/db.py
228
cauldron/db.py
|
|
@ -2143,3 +2143,231 @@ class DB:
|
|||
(stale_minutes,),
|
||||
)
|
||||
return cur.rowcount
|
||||
|
||||
# --- discover (Discover v0.1) ------------------------------------------
|
||||
|
||||
def insert_discovered_recipe(
|
||||
self,
|
||||
*,
|
||||
slug: str | None,
|
||||
source_url: str,
|
||||
name: str | None,
|
||||
description: str | None,
|
||||
image_url: str | None,
|
||||
scraped_json: str,
|
||||
) -> int | None:
|
||||
"""INSERT a freshly-scraped recipe in 'raw' state. Returns the new
|
||||
row id, or None if the source_url was already present (UNIQUE
|
||||
violation = duplicate scrape, treat as skip)."""
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(
|
||||
"""INSERT IGNORE INTO cauldron_discovered_recipes
|
||||
(slug, source_url, name, description, image_url,
|
||||
scraped_json, status, scraped_at, last_action_at)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, 'raw', NOW(), NOW())""",
|
||||
(slug, source_url[:768], name, description, image_url, scraped_json),
|
||||
)
|
||||
return cur.lastrowid or None
|
||||
|
||||
def update_discovered_meta(
|
||||
self, discover_id: int, *, meta_json: str, version: int
|
||||
) -> None:
|
||||
"""Persist enriched metadata + flip status raw → enriched."""
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(
|
||||
"""UPDATE cauldron_discovered_recipes
|
||||
SET meta_json=%s,
|
||||
enrich_version=%s,
|
||||
status=CASE WHEN status='raw' THEN 'enriched'
|
||||
ELSE status END,
|
||||
last_action_at=NOW()
|
||||
WHERE id=%s""",
|
||||
(meta_json, version, discover_id),
|
||||
)
|
||||
|
||||
def set_discovered_status(self, discover_id: int, status: str) -> None:
|
||||
"""Move a discovered recipe to 'imported' or 'rejected'."""
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(
|
||||
"""UPDATE cauldron_discovered_recipes
|
||||
SET status=%s, last_action_at=NOW()
|
||||
WHERE id=%s""",
|
||||
(status, discover_id),
|
||||
)
|
||||
|
||||
def get_discovered_recipe(self, discover_id: int) -> dict | None:
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(
|
||||
"SELECT * FROM cauldron_discovered_recipes WHERE id=%s",
|
||||
(discover_id,),
|
||||
)
|
||||
return cur.fetchone()
|
||||
|
||||
def list_discovered_recipes(
|
||||
self,
|
||||
*,
|
||||
status: str | list[str] | None = "enriched",
|
||||
q: str | None = None,
|
||||
cuisine: str | None = None,
|
||||
complexity: str | None = None,
|
||||
primary_protein: str | None = None,
|
||||
meal_type: str | None = None,
|
||||
kid_friendly_min: int | None = None,
|
||||
max_minutes: int | None = None,
|
||||
limit: int = 60,
|
||||
offset: int = 0,
|
||||
) -> list[dict]:
|
||||
"""Browse discovered recipes with filters. Status defaults to
|
||||
'enriched' so the /discover page surfaces only ready-to-import
|
||||
rows. JSON path filters use MySQL JSON_EXTRACT against meta_json."""
|
||||
where = []
|
||||
args: list = []
|
||||
if status is not None:
|
||||
if isinstance(status, list):
|
||||
if not status:
|
||||
return []
|
||||
placeholders = ",".join(["%s"] * len(status))
|
||||
where.append(f"status IN ({placeholders})")
|
||||
args.extend(status)
|
||||
else:
|
||||
where.append("status = %s")
|
||||
args.append(status)
|
||||
if q:
|
||||
where.append("MATCH(name, description) AGAINST (%s IN NATURAL LANGUAGE MODE)")
|
||||
args.append(q)
|
||||
if cuisine:
|
||||
where.append("JSON_UNQUOTE(JSON_EXTRACT(meta_json, '$.cuisine')) = %s")
|
||||
args.append(cuisine)
|
||||
if complexity:
|
||||
where.append("JSON_UNQUOTE(JSON_EXTRACT(meta_json, '$.complexity')) = %s")
|
||||
args.append(complexity)
|
||||
if primary_protein:
|
||||
where.append("JSON_UNQUOTE(JSON_EXTRACT(meta_json, '$.primary_protein')) = %s")
|
||||
args.append(primary_protein)
|
||||
if meal_type:
|
||||
where.append("JSON_UNQUOTE(JSON_EXTRACT(meta_json, '$.meal_type')) = %s")
|
||||
args.append(meal_type)
|
||||
if kid_friendly_min is not None:
|
||||
where.append("CAST(JSON_EXTRACT(meta_json, '$.kid_friendly_score') AS UNSIGNED) >= %s")
|
||||
args.append(kid_friendly_min)
|
||||
if max_minutes is not None:
|
||||
where.append("CAST(JSON_EXTRACT(meta_json, '$.estimated_minutes') AS UNSIGNED) <= %s")
|
||||
args.append(max_minutes)
|
||||
sql = "SELECT * FROM cauldron_discovered_recipes"
|
||||
if where:
|
||||
sql += " WHERE " + " AND ".join(where)
|
||||
# Relevance-rank when there's a search query, else newest-first
|
||||
if q:
|
||||
sql += " ORDER BY MATCH(name, description) AGAINST (%s IN NATURAL LANGUAGE MODE) DESC, scraped_at DESC"
|
||||
args.append(q)
|
||||
else:
|
||||
sql += " ORDER BY scraped_at DESC"
|
||||
sql += " LIMIT %s OFFSET %s"
|
||||
args.extend([int(limit), int(offset)])
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(sql, args)
|
||||
return list(cur.fetchall() or [])
|
||||
|
||||
def count_discovered_by_status(self) -> dict[str, int]:
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(
|
||||
"""SELECT status, COUNT(*) AS n
|
||||
FROM cauldron_discovered_recipes GROUP BY status"""
|
||||
)
|
||||
return {r["status"]: int(r["n"]) for r in (cur.fetchall() or [])}
|
||||
|
||||
def create_discover_job(
|
||||
self, *, started_by_sub: str, source_seed: str
|
||||
) -> int:
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(
|
||||
"""INSERT INTO cauldron_discover_jobs
|
||||
(started_by_sub, source_seed, state)
|
||||
VALUES (%s, %s, 'running')""",
|
||||
(started_by_sub, source_seed[:255]),
|
||||
)
|
||||
return cur.lastrowid
|
||||
|
||||
def get_discover_job(self, job_id: int) -> dict | None:
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(
|
||||
"SELECT * FROM cauldron_discover_jobs WHERE id=%s", (job_id,)
|
||||
)
|
||||
return cur.fetchone()
|
||||
|
||||
def get_discover_job_state(self, job_id: int) -> str | None:
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(
|
||||
"SELECT state FROM cauldron_discover_jobs WHERE id=%s", (job_id,)
|
||||
)
|
||||
row = cur.fetchone()
|
||||
return row["state"] if row else None
|
||||
|
||||
def latest_discover_job(self) -> dict | None:
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(
|
||||
"""SELECT * FROM cauldron_discover_jobs
|
||||
ORDER BY started_at DESC LIMIT 1"""
|
||||
)
|
||||
return cur.fetchone()
|
||||
|
||||
def running_discover_job(self) -> dict | None:
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(
|
||||
"""SELECT * FROM cauldron_discover_jobs
|
||||
WHERE state='running' ORDER BY started_at DESC LIMIT 1"""
|
||||
)
|
||||
return cur.fetchone()
|
||||
|
||||
def update_discover_job_progress(
|
||||
self,
|
||||
job_id: int,
|
||||
*,
|
||||
pages_delta: int = 0,
|
||||
added_delta: int = 0,
|
||||
skipped_delta: int = 0,
|
||||
error_delta: int = 0,
|
||||
last_error: str | None = None,
|
||||
) -> None:
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(
|
||||
"""UPDATE cauldron_discover_jobs
|
||||
SET pages_scraped = pages_scraped + %s,
|
||||
recipes_added = recipes_added + %s,
|
||||
skipped_count = skipped_count + %s,
|
||||
error_count = error_count + %s,
|
||||
last_error = COALESCE(%s, last_error),
|
||||
last_progress_at = NOW()
|
||||
WHERE id=%s""",
|
||||
(pages_delta, added_delta, skipped_delta, error_delta,
|
||||
last_error[:500] if last_error else None, job_id),
|
||||
)
|
||||
|
||||
def finalize_discover_job(self, job_id: int, *, state: str) -> None:
|
||||
"""Anti-zombie guard: only update if the job isn't already in a
|
||||
terminal state. Mirrors finalize_enrich_job."""
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(
|
||||
"""UPDATE cauldron_discover_jobs
|
||||
SET state=%s,
|
||||
finished_at = CASE WHEN %s IN ('done','failed','cancelled')
|
||||
THEN NOW() ELSE finished_at END,
|
||||
last_progress_at = NOW()
|
||||
WHERE id=%s
|
||||
AND state NOT IN ('done','failed','cancelled')""",
|
||||
(state, state, job_id),
|
||||
)
|
||||
|
||||
def fail_stuck_discover_jobs(self, *, stale_minutes: int = 15) -> int:
|
||||
with self.conn() as c, c.cursor() as cur:
|
||||
cur.execute(
|
||||
"""UPDATE cauldron_discover_jobs
|
||||
SET state='failed',
|
||||
finished_at=NOW(),
|
||||
last_error=COALESCE(last_error,
|
||||
'recovery: worker exited mid-run')
|
||||
WHERE state='running'
|
||||
AND last_progress_at < NOW() - INTERVAL %s MINUTE""",
|
||||
(stale_minutes,),
|
||||
)
|
||||
return cur.rowcount
|
||||
|
|
|
|||
267
cauldron/discover_recipes.py
Normal file
267
cauldron/discover_recipes.py
Normal file
|
|
@ -0,0 +1,267 @@
|
|||
"""Discover v0.1 — scrape external recipe URLs into the discover corpus.
|
||||
|
||||
Pipeline per URL:
|
||||
1. recipe_scrapers.scrape_me(url) → schema.org structured recipe
|
||||
2. Reshape into a Mealie-ish dict (name, description, recipeYield,
|
||||
recipeIngredient[{note}], recipeInstructions[{text}])
|
||||
3. INSERT IGNORE into cauldron_discovered_recipes (UNIQUE on source_url)
|
||||
4. forge.enrich_recipe(reshaped) → Hecate-tier metadata
|
||||
5. Persist meta_json, flip status raw → enriched
|
||||
|
||||
Same daemon-thread + cancel + stuck-recovery pattern as enrich/sterilize.
|
||||
|
||||
Seed sources are hardcoded URL lists per source_seed (allrecipes-popular,
|
||||
bbc-popular, smitten-kitchen-recent, ...). Cobb supplies a seed name OR
|
||||
a literal list of URLs via the admin endpoint. Either way, the runner
|
||||
walks the list, scrape→insert→enrich each, and emits progress.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import threading
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from .db import DB
|
||||
from .forge import Forge, ForgeError
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Curated seed URL lists for v0.1 dogfood. Each is a small starter pack —
|
||||
# we expand later by adding sitemap/category-page walkers. Keeping these
|
||||
# manual lets v0.1 ship without a separate site-walker per source.
|
||||
SEED_URLS: dict[str, list[str]] = {
|
||||
"allrecipes-popular": [
|
||||
"https://www.allrecipes.com/recipe/24074/alyssas-chicken/",
|
||||
"https://www.allrecipes.com/recipe/229960/world-best-now-veggie-burgers/",
|
||||
"https://www.allrecipes.com/recipe/16641/old-fashioned-mac-and-cheese/",
|
||||
"https://www.allrecipes.com/recipe/8499082/instant-pot-pulled-pork/",
|
||||
"https://www.allrecipes.com/recipe/220854/chef-johns-creamy-mushroom-pasta/",
|
||||
"https://www.allrecipes.com/recipe/8514308/dr-pepper-pulled-pork/",
|
||||
"https://www.allrecipes.com/recipe/16700/salisbury-steak/",
|
||||
"https://www.allrecipes.com/recipe/8536048/oven-baked-bbq-chicken-thighs/",
|
||||
],
|
||||
"bbc-good-food": [
|
||||
"https://www.bbcgoodfood.com/recipes/spaghetti-bolognese-recipe",
|
||||
"https://www.bbcgoodfood.com/recipes/best-spaghetti-carbonara-recipe",
|
||||
"https://www.bbcgoodfood.com/recipes/easy-chicken-curry",
|
||||
"https://www.bbcgoodfood.com/recipes/chilli-con-carne-recipe",
|
||||
"https://www.bbcgoodfood.com/recipes/perfect-roast-chicken",
|
||||
"https://www.bbcgoodfood.com/recipes/chicken-tikka-masala",
|
||||
"https://www.bbcgoodfood.com/recipes/sticky-toffee-pudding",
|
||||
],
|
||||
"smitten-kitchen": [
|
||||
"https://smittenkitchen.com/2023/02/black-pepper-chicken/",
|
||||
"https://smittenkitchen.com/2024/01/orecchiette-with-broccoli-rabe/",
|
||||
"https://smittenkitchen.com/2023/09/baked-orzo-with-eggplant-and-mozzarella/",
|
||||
"https://smittenkitchen.com/2022/12/cacio-e-pepe-soup-with-broccoli-rabe/",
|
||||
"https://smittenkitchen.com/2022/05/spinach-chickpea-skillet/",
|
||||
],
|
||||
"pinch-of-yum": [
|
||||
"https://pinchofyum.com/the-best-soft-chocolate-chip-cookies",
|
||||
"https://pinchofyum.com/spicy-peanut-soba-noodle-salad",
|
||||
"https://pinchofyum.com/best-chicken-marinade",
|
||||
"https://pinchofyum.com/15-minute-meal-prep-cilantro-lime-chicken-and-cauliflower-rice",
|
||||
"https://pinchofyum.com/pesto-cavatappi",
|
||||
],
|
||||
"half-baked-harvest": [
|
||||
"https://www.halfbakedharvest.com/cajun-chicken-pasta/",
|
||||
"https://www.halfbakedharvest.com/garlic-butter-creamed-spinach-salmon/",
|
||||
"https://www.halfbakedharvest.com/spicy-pretzel-chicken/",
|
||||
"https://www.halfbakedharvest.com/crispy-buffalo-chicken-tacos/",
|
||||
"https://www.halfbakedharvest.com/butter-chicken-meatballs/",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def list_seeds() -> list[dict]:
|
||||
"""For the /discover admin UI: name + count of curated URLs per seed."""
|
||||
return [{"name": k, "count": len(v)} for k, v in SEED_URLS.items()]
|
||||
|
||||
|
||||
def _slug_from_url(url: str) -> str | None:
|
||||
"""Cheap slug fallback when the scraper doesn't expose one."""
|
||||
try:
|
||||
parts = [p for p in urlparse(url).path.split("/") if p]
|
||||
return parts[-1][:255] if parts else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _safe_call(fn, default=None):
|
||||
"""recipe_scrapers raises various Exception subclasses for missing
|
||||
fields. Swallow them per-field rather than aborting the whole scrape."""
|
||||
try:
|
||||
return fn()
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
|
||||
def _to_mealie_shape(scraper, source_url: str) -> dict:
|
||||
"""Reshape a recipe_scrapers.AbstractScraper into the dict shape
|
||||
forge.enrich_recipe expects (a Mealie recipe). Falls back gracefully
|
||||
when individual fields are unavailable."""
|
||||
title = _safe_call(scraper.title) or ""
|
||||
description = _safe_call(getattr(scraper, "description", lambda: ""), "") or ""
|
||||
yields = _safe_call(scraper.yields, "") or ""
|
||||
image = _safe_call(scraper.image, "") or ""
|
||||
|
||||
ings_raw = _safe_call(scraper.ingredients, []) or []
|
||||
ingredients = [
|
||||
{"note": str(x).strip()}
|
||||
for x in ings_raw
|
||||
if x and str(x).strip()
|
||||
]
|
||||
|
||||
# Prefer instructions_list when supported; some scrapers only expose
|
||||
# the joined string.
|
||||
steps_list: list[str] = []
|
||||
instructions_list = _safe_call(getattr(scraper, "instructions_list", lambda: None), None)
|
||||
if instructions_list:
|
||||
steps_list = [str(s).strip() for s in instructions_list if s and str(s).strip()]
|
||||
else:
|
||||
joined = _safe_call(scraper.instructions, "") or ""
|
||||
steps_list = [s.strip() for s in joined.split("\n") if s.strip()]
|
||||
instructions = [{"text": s} for s in steps_list]
|
||||
|
||||
return {
|
||||
"name": title,
|
||||
"description": description,
|
||||
"recipeYield": yields,
|
||||
"image": image,
|
||||
"source_url": source_url,
|
||||
"recipeIngredient": ingredients,
|
||||
"recipeInstructions": instructions,
|
||||
}
|
||||
|
||||
|
||||
def _scrape_one(url: str) -> tuple[dict, str | None] | None:
|
||||
"""Scrape a single URL. Returns (mealie_shape_dict, image_url) on
|
||||
success. Returns None on any unrecoverable scraper error."""
|
||||
try:
|
||||
from recipe_scrapers import scrape_me # type: ignore
|
||||
except ImportError:
|
||||
log.exception("[discover] recipe_scrapers not installed")
|
||||
return None
|
||||
|
||||
try:
|
||||
scraper = scrape_me(url, wild_mode=True)
|
||||
except Exception as e:
|
||||
log.warning("[discover] scrape_me(%s) failed: %s", url, e)
|
||||
return None
|
||||
|
||||
shaped = _to_mealie_shape(scraper, url)
|
||||
image = shaped.get("image") or None
|
||||
if not shaped.get("name"):
|
||||
log.warning("[discover] no name extracted from %s", url)
|
||||
return None
|
||||
if not shaped.get("recipeIngredient"):
|
||||
log.warning("[discover] no ingredients extracted from %s", url)
|
||||
return None
|
||||
return shaped, image
|
||||
|
||||
|
||||
def run_discover(
|
||||
*,
|
||||
db: DB,
|
||||
job_id: int,
|
||||
forge: Forge,
|
||||
urls: list[str],
|
||||
) -> None:
|
||||
"""Walk a list of URLs: scrape → insert → enrich. Runs in a daemon
|
||||
thread; respects external cancel via state poll."""
|
||||
log.info("[discover:%s] start (%d urls)", job_id, len(urls))
|
||||
|
||||
def _cancelled() -> bool:
|
||||
s = db.get_discover_job_state(job_id)
|
||||
return s in ("cancelled", "failed", "done")
|
||||
|
||||
try:
|
||||
for url in urls:
|
||||
if _cancelled():
|
||||
log.info("[discover:%s] aborted (state changed)", job_id)
|
||||
return
|
||||
|
||||
db.update_discover_job_progress(job_id, pages_delta=1)
|
||||
|
||||
scraped = _scrape_one(url)
|
||||
if scraped is None:
|
||||
db.update_discover_job_progress(
|
||||
job_id, error_delta=1, last_error=f"scrape failed: {url[:200]}"
|
||||
)
|
||||
continue
|
||||
shaped, image = scraped
|
||||
|
||||
try:
|
||||
slug = _slug_from_url(url)
|
||||
discover_id = db.insert_discovered_recipe(
|
||||
slug=slug,
|
||||
source_url=url,
|
||||
name=shaped.get("name") or None,
|
||||
description=(shaped.get("description") or "")[:60000] or None,
|
||||
image_url=image,
|
||||
scraped_json=json.dumps(shaped, ensure_ascii=False),
|
||||
)
|
||||
except Exception as e:
|
||||
log.warning("[discover:%s] insert(%s) failed: %s", job_id, url, e)
|
||||
db.update_discover_job_progress(
|
||||
job_id, error_delta=1, last_error=f"insert: {str(e)[:200]}"
|
||||
)
|
||||
continue
|
||||
|
||||
if not discover_id:
|
||||
# UNIQUE conflict — already in the corpus from a prior scrape
|
||||
db.update_discover_job_progress(job_id, skipped_delta=1)
|
||||
continue
|
||||
|
||||
try:
|
||||
meta = forge.enrich_recipe(shaped)
|
||||
except (ForgeError, RuntimeError) as e:
|
||||
msg = str(e)[:500]
|
||||
log.warning("[discover:%s] enrich(%s): %s", job_id, url, msg)
|
||||
db.update_discover_job_progress(
|
||||
job_id, error_delta=1, last_error=f"enrich: {msg[:200]}"
|
||||
)
|
||||
# Leave the row in 'raw' so we can retry enrichment later.
|
||||
# The recipe IS in the corpus; just hasn't been classified.
|
||||
continue
|
||||
|
||||
try:
|
||||
db.update_discovered_meta(
|
||||
discover_id,
|
||||
meta_json=json.dumps(meta, ensure_ascii=False),
|
||||
version=DB.ENRICH_VERSION,
|
||||
)
|
||||
db.update_discover_job_progress(job_id, added_delta=1)
|
||||
except Exception as e:
|
||||
log.warning("[discover:%s] persist meta(%s): %s", job_id, url, e)
|
||||
db.update_discover_job_progress(
|
||||
job_id, error_delta=1, last_error=f"persist: {str(e)[:200]}"
|
||||
)
|
||||
|
||||
db.finalize_discover_job(job_id, state="done")
|
||||
log.info("[discover:%s] done", job_id)
|
||||
except Exception:
|
||||
log.exception("[discover:%s] crashed", job_id)
|
||||
try:
|
||||
db.finalize_discover_job(job_id, state="failed")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def spawn_thread(
|
||||
*,
|
||||
db: DB,
|
||||
job_id: int,
|
||||
forge: Forge,
|
||||
urls: list[str],
|
||||
) -> threading.Thread:
|
||||
t = threading.Thread(
|
||||
target=run_discover,
|
||||
kwargs={"db": db, "job_id": job_id, "forge": forge, "urls": urls},
|
||||
name=f"discover-recipes-{job_id}",
|
||||
daemon=True,
|
||||
)
|
||||
t.start()
|
||||
return t
|
||||
|
|
@ -105,6 +105,50 @@ class Mealie:
|
|||
def update_recipe(self, slug: str, body: dict) -> dict:
|
||||
return self._put(f"/api/recipes/{slug}", body)
|
||||
|
||||
def import_from_url(
|
||||
self,
|
||||
url: str,
|
||||
*,
|
||||
include_tags: bool = False,
|
||||
include_categories: bool = False,
|
||||
) -> str:
|
||||
"""POST /api/recipes/create/url — Mealie scrapes the URL itself
|
||||
and creates a recipe row in the caller's household. Returns the
|
||||
new recipe slug. After this lands, the household's existing
|
||||
sterilize+enrich pipelines will pick it up on next walk.
|
||||
|
||||
Mealie does its own scraping with recipe_scrapers internally; we
|
||||
don't pass our scraped JSON. This keeps the import path canonical
|
||||
— same code path as the user clicking "Import from URL" in
|
||||
Mealie's UI."""
|
||||
body = {
|
||||
"url": url,
|
||||
"includeTags": bool(include_tags),
|
||||
"includeCategories": bool(include_categories),
|
||||
}
|
||||
try:
|
||||
r = self.session.post(
|
||||
f"{self.base_url}/api/recipes/create/url",
|
||||
json=body,
|
||||
timeout=60,
|
||||
)
|
||||
except requests.RequestException as e:
|
||||
raise MealieError(f"POST /api/recipes/create/url transport: {e}") from e
|
||||
if r.status_code >= 400:
|
||||
raise MealieError(
|
||||
f"POST /api/recipes/create/url -> {r.status_code}: {r.text[:300]}"
|
||||
)
|
||||
# Mealie returns the new slug as a bare JSON string
|
||||
try:
|
||||
slug = r.json()
|
||||
except Exception:
|
||||
slug = r.text.strip().strip('"')
|
||||
if isinstance(slug, dict):
|
||||
slug = slug.get("slug") or slug.get("id")
|
||||
if not isinstance(slug, str) or not slug:
|
||||
raise MealieError(f"create/url returned no slug: {r.text[:200]}")
|
||||
return slug
|
||||
|
||||
def delete_recipe(self, slug: str) -> dict:
|
||||
"""DELETE /api/recipes/<slug>. Permanently removes the recipe and
|
||||
its recipe_ingredient rows. Permission-scoped per-household.
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ from .config import load
|
|||
from .crypto import TokenCrypto
|
||||
from .db import DB
|
||||
from .forge import Forge, ForgeError
|
||||
from . import aggregator, bulk_sterilize, consolidate_foods, dedupe_recipes, enrich_recipes, foods
|
||||
from . import aggregator, bulk_sterilize, consolidate_foods, dedupe_recipes, discover_recipes, enrich_recipes, foods
|
||||
from .mealie import Mealie, MealieError
|
||||
from .oidc import init_oauth
|
||||
from .recipe_index import flatten_recipe, refresh_household_index, search_index
|
||||
|
|
@ -132,6 +132,13 @@ def create_app() -> Flask:
|
|||
except Exception as e:
|
||||
app.logger.warning("enrich stuck-job recovery failed: %s", e)
|
||||
|
||||
try:
|
||||
n_failed = db.fail_stuck_discover_jobs(stale_minutes=15)
|
||||
if n_failed:
|
||||
app.logger.info("failed %d stuck discover jobs at boot", n_failed)
|
||||
except Exception as e:
|
||||
app.logger.warning("discover stuck-job recovery failed: %s", e)
|
||||
|
||||
oauth = init_oauth(
|
||||
app,
|
||||
issuer=cfg.oidc_issuer,
|
||||
|
|
@ -1865,6 +1872,166 @@ def create_app() -> Flask:
|
|||
db.finalize_consolidate_job(job_id, state="cancelled")
|
||||
return jsonify({"ok": True})
|
||||
|
||||
# ---------- Discover v0.1 (browse external recipes) ------------------
|
||||
|
||||
@app.get("/discover")
|
||||
@require_session
|
||||
def discover_page():
|
||||
# Discover is a global, cross-household corpus — no household
|
||||
# gate. But we still want a connected user before showing the
|
||||
# import buttons (since import targets the user's Mealie).
|
||||
counts = db.count_discovered_by_status()
|
||||
latest = db.latest_discover_job()
|
||||
seeds = discover_recipes.list_seeds()
|
||||
return render_template(
|
||||
"discover.html",
|
||||
active="discover",
|
||||
counts=counts,
|
||||
latest_job=_consolidate_job_payload(latest) if latest else None,
|
||||
seeds=seeds,
|
||||
)
|
||||
|
||||
@app.get("/api/discover/search")
|
||||
@require_session
|
||||
def discover_search():
|
||||
args = request.args
|
||||
q = (args.get("q") or "").strip() or None
|
||||
# 'all' default — show enriched + raw together so newly-scraped
|
||||
# rows surface even before enrichment finishes. Imported/rejected
|
||||
# are hidden by default.
|
||||
status_arg = (args.get("status") or "active").strip()
|
||||
if status_arg == "active":
|
||||
status: list[str] | str | None = ["enriched", "raw"]
|
||||
elif status_arg == "all":
|
||||
status = None
|
||||
else:
|
||||
status = status_arg
|
||||
|
||||
def _opt(name: str) -> str | None:
|
||||
v = (args.get(name) or "").strip()
|
||||
return v or None
|
||||
|
||||
def _opt_int(name: str) -> int | None:
|
||||
v = (args.get(name) or "").strip()
|
||||
if not v:
|
||||
return None
|
||||
try:
|
||||
return int(v)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
rows = db.list_discovered_recipes(
|
||||
status=status,
|
||||
q=q,
|
||||
cuisine=_opt("cuisine"),
|
||||
complexity=_opt("complexity"),
|
||||
primary_protein=_opt("primary_protein"),
|
||||
meal_type=_opt("meal_type"),
|
||||
kid_friendly_min=_opt_int("kid_friendly_min"),
|
||||
max_minutes=_opt_int("max_minutes"),
|
||||
limit=min(int(args.get("limit") or 60), 200),
|
||||
offset=max(int(args.get("offset") or 0), 0),
|
||||
)
|
||||
|
||||
out = []
|
||||
for r in rows:
|
||||
meta = r.get("meta_json")
|
||||
if isinstance(meta, str):
|
||||
try:
|
||||
meta = _json_loads(meta)
|
||||
except Exception:
|
||||
meta = None
|
||||
for k in ("scraped_at", "last_action_at"):
|
||||
v = r.get(k)
|
||||
if v is not None and hasattr(v, "isoformat"):
|
||||
r[k] = v.isoformat()
|
||||
# scraped_json can be heavy — drop it from list responses
|
||||
r.pop("scraped_json", None)
|
||||
r["meta_json"] = meta
|
||||
out.append(r)
|
||||
return jsonify({"recipes": out, "count": len(out)})
|
||||
|
||||
@app.post("/api/discover/import/<int:discover_id>")
|
||||
@require_session
|
||||
def discover_import(discover_id: int):
|
||||
row = db.get_discovered_recipe(discover_id)
|
||||
if not row:
|
||||
return jsonify({"error": "not_found"}), 404
|
||||
if row.get("status") == "imported":
|
||||
return jsonify({"error": "already_imported"}), 409
|
||||
client = current_user_mealie()
|
||||
if client is None:
|
||||
return jsonify({"error": "mealie_not_connected"}), 409
|
||||
try:
|
||||
new_slug = client.import_from_url(row["source_url"])
|
||||
except MealieError as e:
|
||||
return jsonify({"error": "mealie_import_failed", "detail": str(e)[:300]}), 502
|
||||
db.set_discovered_status(discover_id, "imported")
|
||||
return jsonify({"ok": True, "slug": new_slug})
|
||||
|
||||
@app.post("/api/discover/reject/<int:discover_id>")
|
||||
@require_session
|
||||
def discover_reject(discover_id: int):
|
||||
row = db.get_discovered_recipe(discover_id)
|
||||
if not row:
|
||||
return jsonify({"error": "not_found"}), 404
|
||||
db.set_discovered_status(discover_id, "rejected")
|
||||
return jsonify({"ok": True})
|
||||
|
||||
@app.post("/api/discover/scrape-start")
|
||||
@require_session
|
||||
def discover_scrape_start():
|
||||
u = session["user"]
|
||||
active = db.running_discover_job()
|
||||
if active:
|
||||
return jsonify({"error": "already_running", "job_id": active["id"]}), 409
|
||||
body = request.get_json(silent=True) or {}
|
||||
urls: list[str] = []
|
||||
seed_name = (body.get("seed") or "").strip()
|
||||
if seed_name:
|
||||
seeds = discover_recipes.SEED_URLS
|
||||
if seed_name not in seeds:
|
||||
return jsonify({"error": "unknown_seed", "available": list(seeds.keys())}), 400
|
||||
urls = list(seeds[seed_name])
|
||||
else:
|
||||
url_list = body.get("urls") or []
|
||||
if not isinstance(url_list, list):
|
||||
return jsonify({"error": "urls must be a list"}), 400
|
||||
urls = [str(x).strip() for x in url_list if str(x).strip()]
|
||||
seed_name = "manual"
|
||||
if not urls:
|
||||
return jsonify({"error": "no urls supplied"}), 400
|
||||
# Light sanity guard: scrub bad entries, cap at 50/job
|
||||
urls = [x for x in urls if x.startswith(("http://", "https://"))][:50]
|
||||
if not urls:
|
||||
return jsonify({"error": "no valid http(s) urls"}), 400
|
||||
job_id = db.create_discover_job(
|
||||
started_by_sub=u["sub"], source_seed=seed_name,
|
||||
)
|
||||
discover_recipes.spawn_thread(
|
||||
db=db, job_id=job_id, forge=forge, urls=urls,
|
||||
)
|
||||
return jsonify({"ok": True, "job_id": job_id, "urls_queued": len(urls)})
|
||||
|
||||
@app.get("/api/discover/scrape-status")
|
||||
@require_session
|
||||
def discover_scrape_status():
|
||||
job = db.latest_discover_job()
|
||||
if not job:
|
||||
return jsonify({"job": None})
|
||||
return jsonify({"job": _consolidate_job_payload(job)})
|
||||
|
||||
@app.post("/api/discover/scrape-cancel/<int:job_id>")
|
||||
@require_session
|
||||
def discover_scrape_cancel(job_id: int):
|
||||
job = db.get_discover_job(job_id)
|
||||
if not job:
|
||||
return jsonify({"error": "not_found"}), 404
|
||||
if job["state"] != "running":
|
||||
return jsonify({"error": f"bad_state:{job['state']}"}), 409
|
||||
db.finalize_discover_job(job_id, state="cancelled")
|
||||
return jsonify({"ok": True})
|
||||
|
||||
# ---------- admin sterilizer (bearer-auth, kick off on user's behalf) -
|
||||
|
||||
@app.post("/api/admin/sterilize/bulk-start")
|
||||
|
|
|
|||
|
|
@ -482,6 +482,7 @@ button { font-family: inherit; }
|
|||
<a href="/picks" class="{% if active == 'picks' %}active{% endif %}">picks</a>
|
||||
<a href="/plan" class="{% if active == 'plan' %}active{% endif %}">plan</a>
|
||||
<a href="/list" class="{% if active == 'list' %}active{% endif %}">list</a>
|
||||
<a href="/discover" class="{% if active == 'discover' %}active{% endif %}">discover</a>
|
||||
<a href="/me" class="{% if active == 'me' %}active{% endif %}">me</a>
|
||||
</nav>
|
||||
<div class="topmeta">
|
||||
|
|
|
|||
397
cauldron/templates/discover.html
Normal file
397
cauldron/templates/discover.html
Normal file
|
|
@ -0,0 +1,397 @@
|
|||
{% extends "_base.html" %}
|
||||
{% block title %}Discover · Cauldron{% endblock %}
|
||||
{% block content %}
|
||||
|
||||
<style>
|
||||
.filter-row { display:flex; flex-wrap:wrap; gap:10px; margin:10px 0 18px 0;
|
||||
align-items:center; }
|
||||
.filter-row label { font-family:var(--mono); font-size:11px;
|
||||
color:var(--bone-dim); letter-spacing:.1em; text-transform:uppercase;
|
||||
display:flex; flex-direction:column; gap:4px; }
|
||||
.filter-row input[type=text], .filter-row input[type=number],
|
||||
.filter-row select {
|
||||
background:var(--bg-2); border:1px solid var(--line); color:var(--bone);
|
||||
padding:6px 8px; border-radius:6px; font-family:var(--mono);
|
||||
font-size:13px; min-width:130px; }
|
||||
.filter-row input[type=text].search { min-width:240px; }
|
||||
.grid { display:grid; gap:14px;
|
||||
grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); }
|
||||
.dcard { background:var(--bg-2); border:1px solid var(--line);
|
||||
border-radius:10px; overflow:hidden; display:flex; flex-direction:column; }
|
||||
.dcard .img { width:100%; aspect-ratio: 16/10;
|
||||
background:var(--bg-1) center/cover no-repeat;
|
||||
border-bottom:1px solid var(--line); }
|
||||
.dcard .img.placeholder { display:flex; align-items:center;
|
||||
justify-content:center; color:var(--muted); font-size:36px; }
|
||||
.dcard .body { padding:12px 14px; flex:1; display:flex;
|
||||
flex-direction:column; gap:8px; }
|
||||
.dcard h3 { font-family:var(--serif); font-size:1.05em; margin:0;
|
||||
color:var(--bone); line-height:1.2; }
|
||||
.dcard .meta-line { font-family:var(--mono); font-size:11px;
|
||||
color:var(--bone-dim); letter-spacing:.05em; }
|
||||
.dcard .quip { font-family:var(--serif); font-style:italic;
|
||||
color:var(--purple-bright); font-size:.92em; line-height:1.35; }
|
||||
.dcard .desc { color:var(--muted); font-size:.9em; line-height:1.4;
|
||||
display:-webkit-box; -webkit-line-clamp:3; -webkit-box-orient:vertical;
|
||||
overflow:hidden; }
|
||||
.dcard .src { font-family:var(--mono); font-size:10.5px;
|
||||
color:var(--muted); text-transform:lowercase; }
|
||||
.dcard .src a { color:var(--green-bright); text-decoration:none; }
|
||||
.dcard .actions { display:flex; gap:8px; padding:10px 14px;
|
||||
border-top:1px solid var(--line); background:var(--bg-1); }
|
||||
.dcard .actions .btn { flex:1; }
|
||||
.dcard.imported { opacity:.55; }
|
||||
.dcard.rejected { opacity:.4; }
|
||||
.dcard .raw-tag { display:inline-block; padding:2px 6px; font-size:10px;
|
||||
border:1px solid var(--line); border-radius:4px; color:var(--muted);
|
||||
font-family:var(--mono); letter-spacing:.1em; text-transform:uppercase; }
|
||||
.empty { color:var(--muted); font-style:italic; padding:32px;
|
||||
text-align:center; border:1px dashed var(--line); border-radius:8px; }
|
||||
.seed-row { display:flex; flex-wrap:wrap; gap:6px; margin:8px 0; }
|
||||
.seed-row .chip { cursor:pointer; }
|
||||
.progress-rail { width:100%; height:10px; background:var(--bg-2);
|
||||
border:1px solid var(--line); border-radius:6px; overflow:hidden;
|
||||
margin:8px 0; }
|
||||
.progress-fill { height:100%;
|
||||
background:linear-gradient(90deg, var(--purple-deep), var(--purple-bright));
|
||||
transition:width .3s ease; }
|
||||
.progress-meta { color:var(--bone-dim); font-family:var(--mono);
|
||||
font-size:11px; letter-spacing:.1em; display:flex; gap:14px; flex-wrap:wrap; }
|
||||
.progress-meta strong { color:var(--bone); }
|
||||
details.scrape-panel { margin-bottom:14px; }
|
||||
details.scrape-panel summary { cursor:pointer; font-family:var(--mono);
|
||||
font-size:12px; letter-spacing:.1em; color:var(--bone-dim);
|
||||
text-transform:uppercase; padding:6px 0; }
|
||||
details.scrape-panel summary:hover { color:var(--purple-bright); }
|
||||
textarea#urls-input { width:100%; min-height:80px;
|
||||
background:var(--bg-2); border:1px solid var(--line); color:var(--bone);
|
||||
padding:8px; border-radius:6px; font-family:var(--mono); font-size:12px; }
|
||||
</style>
|
||||
|
||||
<div class="page-head">
|
||||
<div class="crumb">// discover · external recipe corpus</div>
|
||||
<h1>recipe <span class="accent">discover</span></h1>
|
||||
<div class="lede">
|
||||
a cross-household library of recipes scraped from the open web —
|
||||
each enriched by hecate so you can filter by cuisine, complexity,
|
||||
primary protein, kid-friendliness, time. one click imports to your
|
||||
mealie household; the same sterilize + enrich pipelines you already
|
||||
trust run on it.
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<section class="panel">
|
||||
<div class="panel-head">
|
||||
<h2>browse</h2>
|
||||
<span class="ctx" id="status-line">
|
||||
{{ counts.get('enriched', 0) }} enriched · {{ counts.get('raw', 0) }} raw ·
|
||||
{{ counts.get('imported', 0) }} imported · {{ counts.get('rejected', 0) }} rejected
|
||||
</span>
|
||||
</div>
|
||||
|
||||
<div class="filter-row">
|
||||
<label>search
|
||||
<input type="text" id="q" class="search" placeholder="chicken, soup, ramen…">
|
||||
</label>
|
||||
<label>cuisine
|
||||
<select id="cuisine">
|
||||
<option value="">any</option>
|
||||
<option>american</option><option>italian</option><option>asian</option>
|
||||
<option>mexican</option><option>mediterranean</option>
|
||||
<option>indian</option><option>french</option>
|
||||
<option>middle-eastern</option><option>other</option>
|
||||
</select>
|
||||
</label>
|
||||
<label>complexity
|
||||
<select id="complexity">
|
||||
<option value="">any</option>
|
||||
<option>easy</option><option>medium</option><option>involved</option>
|
||||
</select>
|
||||
</label>
|
||||
<label>protein
|
||||
<select id="primary_protein">
|
||||
<option value="">any</option>
|
||||
<option>chicken</option><option>beef</option><option>pork</option>
|
||||
<option>fish</option><option>seafood</option><option>tofu</option>
|
||||
<option>tempeh</option><option>beans</option><option>eggs</option>
|
||||
<option>cheese</option><option>nuts</option><option>none</option>
|
||||
<option>mixed</option>
|
||||
</select>
|
||||
</label>
|
||||
<label>meal type
|
||||
<select id="meal_type">
|
||||
<option value="">any</option>
|
||||
<option>breakfast</option><option>lunch</option><option>dinner</option>
|
||||
<option>snack</option><option>dessert</option><option>side</option>
|
||||
</select>
|
||||
</label>
|
||||
<label>kid-fit ≥
|
||||
<select id="kid_friendly_min">
|
||||
<option value="">any</option>
|
||||
<option value="1">1</option><option value="2">2</option>
|
||||
<option value="3">3</option><option value="4">4</option>
|
||||
<option value="5">5</option>
|
||||
</select>
|
||||
</label>
|
||||
<label>max minutes
|
||||
<input type="number" id="max_minutes" min="1" max="600" placeholder="—">
|
||||
</label>
|
||||
<label>status
|
||||
<select id="status">
|
||||
<option value="active" selected>enriched + raw</option>
|
||||
<option value="enriched">enriched only</option>
|
||||
<option value="all">all (incl imported/rejected)</option>
|
||||
<option value="imported">imported</option>
|
||||
<option value="rejected">rejected</option>
|
||||
</select>
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<details class="scrape-panel">
|
||||
<summary>+ scrape new recipes (admin)</summary>
|
||||
<p class="muted" style="margin-top:8px;">
|
||||
kick off a background scrape from a curated seed list, or paste your own
|
||||
urls (one per line, max 50). each url goes through scrape →
|
||||
insert → hecate enrich.
|
||||
</p>
|
||||
<div class="seed-row">
|
||||
{% for s in seeds %}
|
||||
<button class="chip" type="button" onclick="seedSet('{{ s.name }}')">
|
||||
{{ s.name }} <span class="muted">({{ s.count }})</span>
|
||||
</button>
|
||||
{% endfor %}
|
||||
</div>
|
||||
<textarea id="urls-input" placeholder="https://… one per line"></textarea>
|
||||
<div class="btn-row" style="margin-top:8px;">
|
||||
<button class="btn btn-purple" type="button" onclick="startScrape()">▸ start scrape</button>
|
||||
<button class="btn" type="button" onclick="cancelScrape()" id="cancel-btn" style="display:none;">cancel</button>
|
||||
</div>
|
||||
<div id="scrape-progress" style="display:none; margin-top:12px;">
|
||||
<div class="progress-rail"><div class="progress-fill" id="bar" style="width:0%;"></div></div>
|
||||
<div class="progress-meta">
|
||||
<span><strong id="pages">0</strong> walked</span>
|
||||
<span><strong id="added">0</strong> added</span>
|
||||
<span><strong id="skipped">0</strong> skipped</span>
|
||||
<span><strong id="errors">0</strong> errors</span>
|
||||
<span class="muted" id="last-error"></span>
|
||||
</div>
|
||||
</div>
|
||||
</details>
|
||||
|
||||
<div id="grid" class="grid"></div>
|
||||
<div id="empty-msg" class="empty" style="display:none;">no recipes match. widen filters or scrape some new ones.</div>
|
||||
</section>
|
||||
|
||||
<script>
|
||||
let scrapeJob = {{ (latest_job | tojson) if latest_job else 'null' }};
|
||||
let scrapePoll = null;
|
||||
let searchTimer = null;
|
||||
|
||||
function $(id){ return document.getElementById(id); }
|
||||
function _esc(s){
|
||||
if(s == null) return '';
|
||||
return String(s).replace(/[&<>"']/g, ch => ({
|
||||
'&':'&','<':'<','>':'>','"':'"',"'":'''
|
||||
})[ch]);
|
||||
}
|
||||
|
||||
function _metaLine(meta){
|
||||
if(!meta) return '<span class="raw-tag">awaiting enrich</span>';
|
||||
const bits = [];
|
||||
if(meta.cuisine && meta.cuisine !== 'unknown') bits.push(meta.cuisine);
|
||||
if(meta.complexity) bits.push(meta.complexity);
|
||||
if(meta.primary_protein && meta.primary_protein !== 'none') bits.push(meta.primary_protein);
|
||||
if(meta.estimated_minutes) bits.push(meta.estimated_minutes + ' min');
|
||||
if(meta.kid_friendly_score != null) bits.push('kid:' + meta.kid_friendly_score);
|
||||
return _esc(bits.join(' · '));
|
||||
}
|
||||
|
||||
function _renderCard(r){
|
||||
const meta = r.meta_json || null;
|
||||
const quip = meta && meta.hecate_quip ? meta.hecate_quip : '';
|
||||
const desc = r.description || (meta && meta.summary) || '';
|
||||
const imgUrl = r.image_url || '';
|
||||
const klass = 'dcard ' + (r.status === 'imported' ? 'imported' :
|
||||
r.status === 'rejected' ? 'rejected' : '');
|
||||
const imgHtml = imgUrl
|
||||
? `<div class="img" style="background-image:url('${_esc(imgUrl)}')"></div>`
|
||||
: `<div class="img placeholder">🍴</div>`;
|
||||
let actionsHtml = '';
|
||||
if(r.status === 'imported'){
|
||||
actionsHtml = '<span class="muted" style="flex:1; text-align:center; font-family:var(--mono); font-size:11px;">✓ imported</span>';
|
||||
} else if(r.status === 'rejected'){
|
||||
actionsHtml = '<span class="muted" style="flex:1; text-align:center; font-family:var(--mono); font-size:11px;">✗ rejected</span>';
|
||||
} else {
|
||||
actionsHtml = `
|
||||
<button class="btn btn-purple" type="button" onclick="importDiscover(${r.id}, this)">🍳 import</button>
|
||||
<button class="btn" type="button" onclick="rejectDiscover(${r.id}, this)" title="hide from discover">✗ skip</button>
|
||||
`;
|
||||
}
|
||||
return `
|
||||
<div class="${klass}" data-id="${r.id}">
|
||||
${imgHtml}
|
||||
<div class="body">
|
||||
<h3>${_esc(r.name || '(untitled)')}</h3>
|
||||
<div class="meta-line">${_metaLine(meta)}</div>
|
||||
${quip ? `<div class="quip">${_esc(quip)}</div>` : ''}
|
||||
${desc ? `<div class="desc">${_esc(desc)}</div>` : ''}
|
||||
<div class="src"><a href="${_esc(r.source_url)}" target="_blank" rel="noopener noreferrer">${_esc(new URL(r.source_url).host)}</a></div>
|
||||
</div>
|
||||
<div class="actions">${actionsHtml}</div>
|
||||
</div>`;
|
||||
}
|
||||
|
||||
async function refreshSearch(){
|
||||
const params = new URLSearchParams();
|
||||
for(const id of ['q','cuisine','complexity','primary_protein','meal_type','kid_friendly_min','max_minutes','status']){
|
||||
const v = $(id).value;
|
||||
if(v !== '') params.set(id, v);
|
||||
}
|
||||
try {
|
||||
const r = await fetch('/api/discover/search?' + params.toString());
|
||||
const d = await r.json();
|
||||
const recipes = d.recipes || [];
|
||||
$('grid').innerHTML = recipes.map(_renderCard).join('');
|
||||
$('empty-msg').style.display = recipes.length === 0 ? '' : 'none';
|
||||
} catch(e){
|
||||
console.error('search failed', e);
|
||||
}
|
||||
}
|
||||
|
||||
function debouncedSearch(){
|
||||
clearTimeout(searchTimer);
|
||||
searchTimer = setTimeout(refreshSearch, 250);
|
||||
}
|
||||
|
||||
for(const id of ['q','cuisine','complexity','primary_protein','meal_type','kid_friendly_min','max_minutes','status']){
|
||||
const el = $(id);
|
||||
el.addEventListener('input', debouncedSearch);
|
||||
el.addEventListener('change', debouncedSearch);
|
||||
}
|
||||
|
||||
async function importDiscover(id, btn){
|
||||
btn.disabled = true; btn.textContent = 'importing…';
|
||||
try {
|
||||
const r = await fetch('/api/discover/import/' + id, { method:'POST' });
|
||||
const d = await r.json();
|
||||
if(!r.ok) throw new Error(d.error || r.status);
|
||||
// Mark card as imported in-place
|
||||
const card = btn.closest('.dcard');
|
||||
if(card){
|
||||
card.classList.add('imported');
|
||||
card.querySelector('.actions').innerHTML =
|
||||
'<span class="muted" style="flex:1; text-align:center; font-family:var(--mono); font-size:11px;">✓ imported as <code>' + _esc(d.slug) + '</code></span>';
|
||||
}
|
||||
} catch(e){
|
||||
btn.disabled = false; btn.textContent = '🍳 import';
|
||||
alert('import failed: ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function rejectDiscover(id, btn){
|
||||
btn.disabled = true; btn.textContent = '…';
|
||||
try {
|
||||
const r = await fetch('/api/discover/reject/' + id, { method:'POST' });
|
||||
if(!r.ok){ const d = await r.json().catch(()=>({})); throw new Error(d.error || r.status); }
|
||||
const card = btn.closest('.dcard');
|
||||
if(card) card.remove();
|
||||
} catch(e){
|
||||
btn.disabled = false; btn.textContent = '✗ skip';
|
||||
alert('reject failed: ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
function seedSet(name){
|
||||
$('urls-input').value = '__seed:' + name;
|
||||
$('urls-input').setAttribute('data-seed', name);
|
||||
}
|
||||
|
||||
async function startScrape(){
|
||||
const txt = $('urls-input').value.trim();
|
||||
let body;
|
||||
if(txt.startsWith('__seed:')){
|
||||
body = { seed: txt.slice(7).trim() };
|
||||
} else {
|
||||
const urls = txt.split(/\r?\n/).map(s => s.trim()).filter(Boolean);
|
||||
if(urls.length === 0){ alert('paste some urls or pick a seed'); return; }
|
||||
body = { urls };
|
||||
}
|
||||
try {
|
||||
const r = await fetch('/api/discover/scrape-start', {
|
||||
method:'POST', headers:{'Content-Type':'application/json'},
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
const d = await r.json();
|
||||
if(!r.ok) throw new Error(d.error || r.status);
|
||||
scrapeJob = { id: d.job_id, state: 'running' };
|
||||
paintScrape();
|
||||
pollScrape();
|
||||
} catch(e){
|
||||
alert('scrape start failed: ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function cancelScrape(){
|
||||
if(!scrapeJob) return;
|
||||
if(!confirm('cancel scrape?')) return;
|
||||
try {
|
||||
await fetch('/api/discover/scrape-cancel/' + scrapeJob.id, { method:'POST' });
|
||||
await fetchScrapeStatus();
|
||||
} catch(e){
|
||||
alert('cancel failed: ' + e.message);
|
||||
}
|
||||
}
|
||||
|
||||
async function fetchScrapeStatus(){
|
||||
try {
|
||||
const r = await fetch('/api/discover/scrape-status');
|
||||
const d = await r.json();
|
||||
scrapeJob = d.job || null;
|
||||
paintScrape();
|
||||
} catch(e){
|
||||
console.error('scrape status failed', e);
|
||||
}
|
||||
}
|
||||
|
||||
function paintScrape(){
|
||||
const j = scrapeJob;
|
||||
const pp = $('scrape-progress');
|
||||
const cb = $('cancel-btn');
|
||||
if(!j || j.state !== 'running'){
|
||||
pp.style.display = j ? '' : 'none';
|
||||
cb.style.display = 'none';
|
||||
stopPollScrape();
|
||||
} else {
|
||||
pp.style.display = '';
|
||||
cb.style.display = '';
|
||||
}
|
||||
if(!j) return;
|
||||
const total = (j.pages_scraped || 0) + 0; // we don't pre-emit total; pages tracks done
|
||||
$('pages').textContent = j.pages_scraped || 0;
|
||||
$('added').textContent = j.recipes_added || 0;
|
||||
$('skipped').textContent = j.skipped_count || 0;
|
||||
$('errors').textContent = j.error_count || 0;
|
||||
$('last-error').textContent = j.last_error ? '· ' + j.last_error : '';
|
||||
// Bar can't show absolute pct without a known total; show a slow pulse on progress
|
||||
if(j.state === 'running'){
|
||||
const pct = Math.min(95, ((j.pages_scraped || 0) * 7) % 95);
|
||||
$('bar').style.width = pct + '%';
|
||||
} else if(j.state === 'done'){
|
||||
$('bar').style.width = '100%';
|
||||
}
|
||||
if(j.state === 'done' || j.state === 'cancelled' || j.state === 'failed'){
|
||||
// refresh the grid so any new rows appear
|
||||
refreshSearch();
|
||||
}
|
||||
}
|
||||
|
||||
function pollScrape(){ if(!scrapePoll) scrapePoll = setInterval(fetchScrapeStatus, 2000); }
|
||||
function stopPollScrape(){ if(scrapePoll){ clearInterval(scrapePoll); scrapePoll = null; } }
|
||||
|
||||
// Initial paint
|
||||
paintScrape();
|
||||
if(scrapeJob && scrapeJob.state === 'running') pollScrape();
|
||||
refreshSearch();
|
||||
</script>
|
||||
|
||||
{% endblock %}
|
||||
|
|
@ -65,6 +65,9 @@
|
|||
|
||||
<p class="muted" style="margin-top:14px;">have hecate generate per-recipe metadata — cuisine, complexity, macros, primary protein/carb, comfort tier, summary. the plan generator reads this so "high protein week" is a real query, not just a vibe.</p>
|
||||
<p><a class="btn" href="/enrich-recipes">✨ enrich recipes →</a></p>
|
||||
|
||||
<p class="muted" style="margin-top:14px;">browse a cross-household corpus of scraped recipes — search by cuisine / protein / time / kid-friendliness. one click sends a recipe to your mealie library; sterilize+enrich pipelines run on it like any other.</p>
|
||||
<p><a class="btn" href="/discover">🌐 discover recipes →</a></p>
|
||||
</section>
|
||||
{% endif %}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,3 +5,4 @@ Authlib==1.3.2
|
|||
PyMySQL==1.1.1
|
||||
cryptography==43.0.3
|
||||
rapidfuzz==3.10.1
|
||||
recipe-scrapers==15.6.0
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue