"""Email digest scheduler.

Aggregates the last 24h of jobs per project, builds a text + HTML email body,
and sends it via SMTP. Runs daily at 06:00 PT (configurable). Each project's
`notify.on` event filter controls which job outcomes show up in its digest.

Design notes:
- We use a sleep-until-next-window asyncio loop instead of cron — keeps the
  scheduler in-process so we can dry-run via the API for testing.
- Idempotency is enforced by a `digest_runs` table with UNIQUE(date, project_name).
  Calling run_once twice for the same date will only send one email per project.
- SMTP send is done via stdlib smtplib (sync) wrapped in run_in_executor so
  we don't block the loop while postfix grumbles.
- If SMTP isn't configured, the server lifespan logs a warning and skips
  scheduler startup. The /digests endpoints still work for dry-run rendering.
- Patch-drafted / auto-patches / bugs.sulkta.com numbers are zero-state
  placeholders for v0.1 wave 2C — wave 3 / step 9 wires them.
"""
from __future__ import annotations

import asyncio
import json
import logging
import os
import smtplib
import time
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from email.message import EmailMessage
from pathlib import Path
from typing import Iterable
from zoneinfo import ZoneInfo

from .db import DB


log = logging.getLogger("crafting_table.digest")


# Recognized notify.on event tags. Documented for callers; the digest filter
# consults this to map job outcomes to events.
VALID_NOTIFY_EVENTS = (
    "audit_pass",
    "audit_fail",
    "test_fail",
    "lint_warn",
    "cve_found",
    "patch_drafted",
    "nightly_summary",
)

# Default events when a project has notify.email but empty notify.on.
DEFAULT_NOTIFY_ON = ("audit_fail", "cve_found", "patch_drafted")


@dataclass(frozen=True)
class SmtpConfig:
    host: str
    port: int = 587
    username: str | None = None
    password: str | None = None
    from_addr: str = "crafting-table@localhost"
    use_tls: bool = True

    @classmethod
    def from_env(cls) -> "SmtpConfig | None":
        """Read CRAFTING_SMTP_* env vars. Returns None if CRAFTING_SMTP_HOST
        is missing — caller treats that as "digest disabled"."""
        host = os.environ.get("CRAFTING_SMTP_HOST", "").strip()
        if not host:
            return None
        return cls(
            host=host,
            port=int(os.environ.get("CRAFTING_SMTP_PORT", "587")),
            username=os.environ.get("CRAFTING_SMTP_USER") or None,
            password=os.environ.get("CRAFTING_SMTP_PASS") or None,
            from_addr=os.environ.get("CRAFTING_SMTP_FROM", "crafting-table@localhost"),
            use_tls=os.environ.get("CRAFTING_SMTP_TLS", "1").strip() not in ("0", "false", "no", ""),
        )


# --- helpers ----------------------------------------------------------------


def _parse_pr_url(pr_url: str) -> tuple[str, str, int] | None:
    """Pull (owner, repo, number) out of a Gitea-style PR URL.

    Accepts URLs like ``http://192.168.0.5:3001/Sulkta-Coop/clawdforge/pulls/42``.
    Returns None if the URL doesn't look right — caller treats that as
    "can't determine state, assume open".
    """
    try:
        from urllib.parse import urlparse
        u = urlparse(pr_url)
        parts = [p for p in u.path.split("/") if p]
        # owner/repo/pulls/N
        if len(parts) >= 4 and parts[-2] in ("pulls", "issues"):
            return parts[-4], parts[-3], int(parts[-1])
    except (ValueError, TypeError):
        return None
    return None


def _job_event_tags(job: dict, findings: list[dict]) -> set[str]:
    """Map a job + its findings to notify.on event tags.

    Mirrors the spec's vocabulary:
    - audit/test/lint kind + status -> {recipe}_pass / {recipe}_fail / lint_warn
    - any finding with kind=='cve'   -> cve_found
    - patch_drafted is reserved for wave 3 / step 9 (unused here)
    """
    tags: set[str] = set()
    recipe = job["recipe"]
    status = job["status"]
    if status == "succeeded":
        tags.add(f"{recipe}_pass")
    elif status in ("failed", "timed_out"):
        tags.add(f"{recipe}_fail")

    has_warn = any(f.get("severity") == "warn" for f in findings)
    if recipe == "lint" and has_warn:
        tags.add("lint_warn")

    if any(f.get("kind") == "cve" for f in findings):
        tags.add("cve_found")

    return tags


def _outcome_glyph(job: dict, findings: list[dict]) -> str:
    """Choose the leading char per run line. Matches spec's worked example:
    ✓ pass, ✗ fail/timed_out, ⚠ pass-with-warnings (lint warn / non-cve warn).
    """
    if job["status"] in ("failed", "timed_out", "cancelled"):
        return "✗"  # ✗
    has_warn = any(f.get("severity") in ("warn", "warning") for f in findings)
    if has_warn:
        return "⚠"  # ⚠
    return "✓"  # ✓


def _summarize_job(job: dict, findings: list[dict]) -> str:
    """Human-readable trailing summary: '(N tests, M lints, K CVEs)' / fail msg."""
    recipe = job["recipe"]
    status = job["status"]
    if status in ("failed", "timed_out"):
        # Pull a short reason from the first error finding if any exist.
        err = next((f for f in findings if f.get("severity") in ("error", "high", "critical")), None)
        if err:
            msg = (err.get("message") or "").splitlines()[0][:80]
            return msg or f"{recipe} failed"
        return f"{recipe} failed (exit={job.get('exit_code')})"

    n_findings = len(findings)
    n_warn = sum(1 for f in findings if f.get("severity") in ("warn", "warning"))
    n_cve = sum(1 for f in findings if f.get("kind") == "cve")
    if recipe == "lint":
        return f"0 errors, {n_warn} warnings"
    if recipe == "audit":
        return f"{n_cve} CVEs, {n_warn} warnings"
    if recipe == "test":
        return f"{n_findings} findings, {n_warn} warnings"
    return f"{n_findings} findings"


def _filter_for_project(jobs_with_findings: list[tuple[dict, list[dict]]], notify_on: list[str]) -> list[tuple[dict, list[dict]]]:
    """Apply a project's notify.on filter to its jobs.

    nightly_summary -> show everything.
    Empty list -> use DEFAULT_NOTIFY_ON.
    Otherwise -> include jobs whose event-tag set intersects notify_on.
    """
    if not notify_on:
        notify_on = list(DEFAULT_NOTIFY_ON)
    if "nightly_summary" in notify_on:
        return list(jobs_with_findings)
    wanted = set(notify_on)
    out = []
    for job, findings in jobs_with_findings:
        tags = _job_event_tags(job, findings)
        if tags & wanted:
            out.append((job, findings))
    return out


# --- rendering --------------------------------------------------------------


def _render_text(
    date_str: str,
    sections: list[dict],
    full_log_url: str,
    *,
    open_followups: int = 0,
) -> str:
    """Build the text body. Matches the worked example in the spec."""
    total_runs = sum(len(s["runs"]) for s in sections)
    total_drafted = sum(len(s.get("patches", [])) for s in sections)
    total_cves = sum(s["cves"] for s in sections)
    subj_summary = f"{total_runs} build" + ("s" if total_runs != 1 else "")
    lines = []
    lines.append(f"Subject: crafting-table digest — {date_str} ({subj_summary}, {total_drafted} patches drafted, {total_cves} CVEs)")
    lines.append("")
    lines.append("Overnight runs (last 24h):")
    if not sections:
        lines.append("  (no activity)")
    else:
        for s in sections:
            for run in s["runs"]:
                glyph = run["glyph"]
                proj_sub = f"{s['project']}::{run['subproject']}"
                lines.append(
                    f"  {glyph} {proj_sub:<32s} {run['recipe']:<6s} {run['status']:<5s} ({run['summary']})"
                )
            for patch in s.get("patches", []):
                if patch.get("branch_name"):
                    lines.append(
                        f"       → patch drafted: branch {patch['branch_name']}"
                    )
                if patch.get("pr_url"):
                    lines.append(f"       → PR: {patch['pr_url']}")
    lines.append("")
    lines.append("Open follow-ups:")
    lines.append(f"  - {open_followups} unmerged auto-patches")
    lines.append("  - 0 manual review tickets in bugs.sulkta.com")
    lines.append("")
    lines.append(f"Full log: {full_log_url}")
    return "\n".join(lines) + "\n"


def _render_html(
    date_str: str,
    sections: list[dict],
    full_log_url: str,
    *,
    open_followups: int = 0,
) -> str:
    """Build the HTML body. Same content, table styling, monospace font."""
    total_runs = sum(len(s["runs"]) for s in sections)
    total_drafted = sum(len(s.get("patches", [])) for s in sections)
    total_cves = sum(s["cves"] for s in sections)

    rows = []
    for s in sections:
        for run in s["runs"]:
            proj_sub = f"{s['project']}::{run['subproject']}"
            rows.append(
                f"<tr><td>{run['glyph']}</td><td>{proj_sub}</td>"
                f"<td>{run['recipe']}</td><td>{run['status']}</td>"
                f"<td>{run['summary']}</td></tr>"
            )
        for patch in s.get("patches", []):
            cell = ""
            if patch.get("branch_name"):
                cell += f"branch <code>{patch['branch_name']}</code>"
            if patch.get("pr_url"):
                cell += f" — <a href=\"{patch['pr_url']}\">PR</a>"
            if cell:
                rows.append(
                    f'<tr><td>↳</td><td colspan="4">{cell}</td></tr>'
                )
    if not rows:
        rows.append('<tr><td colspan="5"><i>(no activity)</i></td></tr>')

    return f"""<!DOCTYPE html>
<html><head><meta charset="utf-8"><style>
body {{ font-family: ui-monospace, Menlo, Consolas, monospace; }}
h2 {{ font-size: 1.0em; }}
table {{ border-collapse: collapse; }}
td, th {{ padding: 2px 8px; border-bottom: 1px solid #ddd; }}
tr td:first-child {{ font-size: 1.2em; }}
.foot {{ color: #888; font-size: 0.9em; margin-top: 1em; }}
</style></head><body>
<h2>crafting-table digest — {date_str}<br>
({total_runs} runs, {total_drafted} patches drafted, {total_cves} CVEs)</h2>
<h3>Overnight runs (last 24h)</h3>
<table>
<thead><tr><th></th><th>project</th><th>recipe</th><th>status</th><th>summary</th></tr></thead>
<tbody>
{''.join(rows)}
</tbody>
</table>
<h3>Open follow-ups</h3>
<ul>
<li>{open_followups} unmerged auto-patches</li>
<li>0 manual review tickets in bugs.sulkta.com</li>
</ul>
<p class="foot">Full log: <a href="{full_log_url}">{full_log_url}</a></p>
</body></html>
"""


# --- scheduler --------------------------------------------------------------


class DigestScheduler:
    """Daily 06:00 PT digest. Aggregates 24h of jobs per project + sends SMTP.

    Lifecycle:
        scheduler = DigestScheduler(db, smtp_config)
        await scheduler.start()
        ...
        await scheduler.stop()

    Manual trigger via run_once(...). Pass dry_run=True to render without
    sending — the body is still returned so the /digests endpoint works.
    """

    def __init__(
        self,
        db: DB,
        smtp: SmtpConfig | None,
        *,
        time_zone: str = "America/Los_Angeles",
        hour: int = 6,
        minute: int = 0,
        full_log_base_url: str = "http://192.168.0.5:8810/digests",
        gitea_pr_state_check=None,
    ):
        self.db = db
        self.smtp = smtp
        self.tz = ZoneInfo(time_zone)
        self.hour = hour
        self.minute = minute
        self.full_log_base_url = full_log_base_url
        # Optional callable: (owner, repo, number) -> "open" | "closed" | None.
        # Used to count open follow-ups across all PR-opened patches in the
        # window. Tests inject a stub so we don't make real network calls.
        self.gitea_pr_state_check = gitea_pr_state_check

        self._loop_task: asyncio.Task | None = None
        self._stopping = False

    async def start(self) -> None:
        if self._loop_task is not None:
            return
        self._stopping = False
        self._loop_task = asyncio.create_task(self._loop())

    async def stop(self) -> None:
        self._stopping = True
        if self._loop_task is not None:
            self._loop_task.cancel()
            try:
                await self._loop_task
            except asyncio.CancelledError:
                pass
            self._loop_task = None

    async def _loop(self) -> None:
        try:
            while not self._stopping:
                wait = self._seconds_until_next_window()
                try:
                    await asyncio.sleep(wait)
                except asyncio.CancelledError:
                    raise
                if self._stopping:
                    break
                try:
                    await self.run_once()
                except Exception as e:
                    log.exception("digest run_once failed: %s", e)
                # Sleep ~1 minute past the trigger so the next call to
                # _seconds_until_next_window doesn't re-fire this same window.
                try:
                    await asyncio.sleep(60)
                except asyncio.CancelledError:
                    raise
        except asyncio.CancelledError:
            raise

    def _seconds_until_next_window(self) -> float:
        """Compute seconds to the next HH:MM in our timezone. If we're past
        today's window, jumps to tomorrow."""
        now = datetime.now(self.tz)
        target = now.replace(hour=self.hour, minute=self.minute, second=0, microsecond=0)
        if target <= now:
            target = target + timedelta(days=1)
        return max(1.0, (target - now).total_seconds())

    # ---------- digest core ------------------------------------------------

    async def run_once(
        self,
        *,
        target_date: str | None = None,
        dry_run: bool = False,
        now_ts: int | None = None,
    ) -> dict:
        """Build + (maybe) send the digest.

        target_date: 'YYYY-MM-DD' in the configured tz. Defaults to today.
        dry_run:     skip SMTP send, still record nothing in digest_runs.
        now_ts:      override "now" for tests; defaults to time.time().

        Returns a summary:
            {
              "date": "YYYY-MM-DD",
              "dry_run": bool,
              "projects": [
                  {"project": str, "recipients": [...], "job_count": int,
                   "sent": bool, "skipped_reason": str|None}
              ],
              "text": "<full text body, joined>",
              "html": "<full html body, joined>",
              "sections": [...],
            }
        """
        loop = asyncio.get_event_loop()
        return await loop.run_in_executor(
            None,
            lambda: self._run_once_sync(target_date=target_date, dry_run=dry_run, now_ts=now_ts),
        )

    def _run_once_sync(
        self,
        *,
        target_date: str | None,
        dry_run: bool,
        now_ts: int | None,
    ) -> dict:
        if now_ts is None:
            now_ts = int(time.time())

        if target_date is None:
            date_str = datetime.fromtimestamp(now_ts, self.tz).strftime("%Y-%m-%d")
        else:
            date_str = target_date

        # Window: last 24h ending at "now" (or end of target_date if backfilling).
        window_end = now_ts
        if target_date is not None:
            # Anchor to local-midnight + 24h of the target date so backfills
            # are reproducible.
            day = datetime.strptime(target_date, "%Y-%m-%d").replace(tzinfo=self.tz)
            window_end = int((day + timedelta(days=1)).timestamp())
        window_start = window_end - 24 * 3600

        # Pull all projects + their last-24h jobs + findings.
        projects = self.db.list_projects()
        per_project_sections: list[dict] = []
        per_project_meta: list[dict] = []
        full_log_url = f"{self.full_log_base_url}/{date_str}"
        # Total open follow-ups across all projects in the window.
        open_followups_total = 0

        for prow in projects:
            recipe = json.loads(prow.get("recipe_json") or "{}")
            notify = recipe.get("notify") or {}
            recipients: list[str] = list(notify.get("email") or [])
            notify_on: list[str] = list(notify.get("on") or [])

            jobs = self.db.list_jobs(project_name=prow["name"], limit=500)
            # Window filter — finished_at if present, else queued_at.
            in_window = []
            for j in jobs:
                ts = j.get("finished_at") or j.get("queued_at") or 0
                if window_start <= ts <= window_end:
                    in_window.append(j)
            with_findings: list[tuple[dict, list[dict]]] = [
                (j, self.db.list_findings(j["id"])) for j in in_window
            ]
            filtered = _filter_for_project(with_findings, notify_on)

            section_runs: list[dict] = []
            cves = 0
            for job, findings in filtered:
                section_runs.append({
                    "subproject": job["subproject_path"],
                    "recipe": job["recipe"],
                    "status": "pass" if job["status"] == "succeeded" else
                              ("fail" if job["status"] in ("failed", "timed_out", "cancelled") else "warn"),
                    "summary": _summarize_job(job, findings),
                    "glyph": _outcome_glyph(job, findings),
                })
                cves += sum(1 for f in findings if f.get("kind") == "cve")

            # Patch attempts for this project in the same window.
            patch_rows = self.db.list_patch_attempts_in_window(
                window_start=window_start,
                window_end=window_end,
                project_name=prow["name"],
                statuses=("pushed", "pr_opened"),
            )
            patch_entries: list[dict] = []
            for pa in patch_rows:
                patch_entries.append({
                    "branch_name": pa.get("branch_name"),
                    "pr_url": pa.get("pr_url"),
                    "status": pa.get("status"),
                })
                # Count open follow-ups via Gitea state check (when configured).
                if pa.get("status") == "pr_opened" and pa.get("pr_url"):
                    if self.gitea_pr_state_check is not None:
                        owner_repo_n = _parse_pr_url(pa["pr_url"])
                        if owner_repo_n is not None:
                            owner, repo, n = owner_repo_n
                            try:
                                state = self.gitea_pr_state_check(owner, repo, n)
                            except Exception as e:
                                log.warning(
                                    "digest: gitea PR state check failed: %s", e
                                )
                                state = None
                            if state in (None, "open"):
                                open_followups_total += 1
                    else:
                        # Without a checker, treat all pr_opened rows as still open.
                        open_followups_total += 1

            section = {
                "project": prow["name"],
                "runs": section_runs,
                "cves": cves,
                "patches": patch_entries,
            }

            meta = {
                "project": prow["name"],
                "recipients": recipients,
                "job_count": len(filtered),
                "sent": False,
                "skipped_reason": None,
            }

            # Decide whether to include this project's section in the body.
            # "include" decisions:
            #   - no recipients -> never include or send
            #   - recipients + activity -> include + send
            #   - recipients + no activity + nightly_summary -> include "no activity" + send
            #   - recipients + no activity + no nightly_summary -> skip silently
            wants_summary = "nightly_summary" in notify_on
            if not recipients:
                meta["skipped_reason"] = "no_recipients"
                per_project_meta.append(meta)
                continue
            if not section_runs and not patch_entries and not wants_summary:
                meta["skipped_reason"] = "zero_activity"
                per_project_meta.append(meta)
                continue

            per_project_sections.append(section)
            per_project_meta.append(meta)

        text_body = _render_text(
            date_str,
            per_project_sections,
            full_log_url,
            open_followups=open_followups_total,
        )
        html_body = _render_html(
            date_str,
            per_project_sections,
            full_log_url,
            open_followups=open_followups_total,
        )

        # Per-project send loop. Idempotency check via digest_runs UNIQUE.
        for meta, section in zip(
            [m for m in per_project_meta if m["skipped_reason"] is None],
            per_project_sections,
        ):
            project_name = meta["project"]
            if not dry_run:
                already = self.db.digest_run_exists(date_str, project_name)
                if already:
                    meta["skipped_reason"] = "already_sent"
                    continue

            # Build a per-project-scoped body.
            proj_text = _render_text(
                date_str,
                [section],
                full_log_url,
                open_followups=open_followups_total,
            )
            proj_html = _render_html(
                date_str,
                [section],
                full_log_url,
                open_followups=open_followups_total,
            )
            n_patches = len(section.get("patches", []))
            subject = (
                f"crafting-table digest — {date_str} "
                f"({len(section['runs'])} runs, {n_patches} patches drafted, {section['cves']} CVEs)"
            )

            if dry_run or self.smtp is None:
                meta["sent"] = False
                if self.smtp is None:
                    meta["skipped_reason"] = "smtp_disabled"
                continue

            try:
                self._send_email(
                    recipients=meta["recipients"],
                    subject=subject,
                    text=proj_text,
                    html=proj_html,
                )
                meta["sent"] = True
                self.db.record_digest_run(
                    date=date_str,
                    project_name=project_name,
                    sent_at=int(time.time()),
                    recipient_count=len(meta["recipients"]),
                    job_count=meta["job_count"],
                )
            except Exception as e:
                log.exception("digest SMTP send failed for %s: %s", project_name, e)
                meta["sent"] = False
                meta["skipped_reason"] = f"smtp_error:{type(e).__name__}"

        return {
            "date": date_str,
            "dry_run": dry_run,
            "projects": per_project_meta,
            "text": text_body,
            "html": html_body,
            "sections": per_project_sections,
            "full_log_url": full_log_url,
        }

    # ---------- SMTP send --------------------------------------------------

    def _send_email(
        self,
        *,
        recipients: list[str],
        subject: str,
        text: str,
        html: str,
    ) -> None:
        """Send one MIME multipart email to the given list of recipients.

        We send one message with a To: header containing all recipients, then
        loop sendmail per address so each recipient gets a personal copy
        (so the test's assert-once-per-recipient pattern is meaningful, and
        so partial-failures don't poison the whole batch).
        """
        if self.smtp is None:
            raise RuntimeError("smtp not configured")

        with smtplib.SMTP(self.smtp.host, self.smtp.port, timeout=30) as conn:
            if self.smtp.use_tls:
                try:
                    conn.starttls()
                except smtplib.SMTPNotSupportedError:
                    log.warning("SMTP server does not support STARTTLS; sending cleartext")
            if self.smtp.username and self.smtp.password:
                conn.login(self.smtp.username, self.smtp.password)
            for addr in recipients:
                msg = EmailMessage()
                msg["Subject"] = subject
                msg["From"] = self.smtp.from_addr
                msg["To"] = addr
                msg.set_content(text)
                msg.add_alternative(html, subtype="html")
                conn.send_message(msg)