From 4eab869df0fdd55a3faa212bcd82dc374b6f6477 Mon Sep 17 00:00:00 2001 From: Kayos Date: Wed, 29 Apr 2026 09:04:48 -0700 Subject: [PATCH] v0.1 wave 3 (steps 9+10): autonomous patch loop + production recipes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Step 9 — autonomous patch loop: - patcher.py: clawdforge session → unified diff → worktree apply → verify recipe → push branch → open Gitea PR - migration 007: patch_attempts (UNIQUE per finding+attempt, max 3 attempts) - runner.py: post-parse hook fires patcher.maybe_draft_for_job when notify.auto_patch=true - server.py: POST /jobs/{id}/patches, GET /patches, GET /patches/{id} - digest.py: patch-drafted lines + open-follow-up count via Gitea PR state check - mcp: crafting_table_draft_patch stub replaced with real implementation - tests/test_patcher.py + tests/test_patches_api.py: 27 new tests No auto-merge — patches stop at PR-open. Cobb merges. Step 10 — production recipes: - examples/recipes/clawdforge.json: 14 subprojects across all SDKs, audit nightly - examples/recipes/cauldron.json: single Flask subproject, audit nightly - examples/recipes/tradecraft.json: nightly audit, auto_patch=false (manual review) - examples/register-all.sh: bulk-register helper with GITEA_TOKEN substitution - README "Autonomous patch loop" + "First production recipes" sections Tests: server 116→143, mcp 65→67. All green. Spec: memory/spec-crafting-table.md --- .env.example | 11 + README.md | 112 ++- crafting_table/db.py | 152 ++++ crafting_table/digest.py | 134 +++- crafting_table/patcher.py | 1102 ++++++++++++++++++++++++++ crafting_table/server.py | 137 ++++ examples/recipes/cauldron.json | 19 + examples/recipes/clawdforge.json | 24 + examples/recipes/tradecraft.json | 19 + examples/register-all.sh | 48 ++ mcp/src/crafting_table_mcp/client.py | 27 + mcp/src/crafting_table_mcp/server.py | 77 +- mcp/tests/test_tools.py | 132 ++- pyproject.toml | 1 + requirements.txt | 1 + tests/test_patcher.py | 545 +++++++++++++ tests/test_patches_api.py | 289 +++++++ 17 files changed, 2752 insertions(+), 78 deletions(-) create mode 100644 crafting_table/patcher.py create mode 100644 examples/recipes/cauldron.json create mode 100644 examples/recipes/clawdforge.json create mode 100644 examples/recipes/tradecraft.json create mode 100755 examples/register-all.sh create mode 100644 tests/test_patcher.py create mode 100644 tests/test_patches_api.py diff --git a/.env.example b/.env.example index e2318b9..60f4f10 100644 --- a/.env.example +++ b/.env.example @@ -42,3 +42,14 @@ CRAFTING_GC_AGE=86400 # CRAFTING_SMTP_PASS= # CRAFTING_SMTP_FROM=crafting-table@sulkta.com # CRAFTING_SMTP_TLS=1 + +# --- Autonomous patch loop (wave 3, optional) ------------------------------ +# All four CRAFTING_CLAWDFORGE_* + CRAFTING_GITEA_* must be set for the +# patcher to come up. Missing any → patcher disabled, /jobs/{id}/patches +# returns 503. Runner hook silently no-ops. +# CRAFTING_CLAWDFORGE_URL=http://192.168.0.5:8800 +# CRAFTING_CLAWDFORGE_TOKEN=cf_... +# CRAFTING_GITEA_URL=http://192.168.0.5:3001 +# CRAFTING_GITEA_TOKEN= +# CRAFTING_PATCHER_MAX_ATTEMPTS=3 +# CRAFTING_PATCHER_BRANCH_PREFIX=crafting-table/auto/ diff --git a/README.md b/README.md index d3e87c5..0463df7 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ through clawdforge. Spec: `Sulkta-Coop/openclaw-workspace/memory/spec-crafting-table.md` (LAN-only). -## Status — v0.1 step 7 of 10 +## Status — v0.1 complete (10 of 10) - [x] Step 1: Dockerfile + per-language smoke - [x] Step 2: SQLite ledger + project registry @@ -25,8 +25,8 @@ Spec: `Sulkta-Coop/openclaw-workspace/memory/spec-crafting-table.md` (LAN-only). - [x] Step 6: Findings extraction + storage - [x] Step 7: MCP server (stdio JSON-RPC, 8 tools) — see [mcp/README.md](mcp/README.md) - [x] Step 8: Email digest scheduler -- [ ] Step 9: Autonomous patch loop (clawdforge integration) -- [ ] Step 10: Production recipes — clawdforge, cauldron, tradecraft +- [x] Step 9: Autonomous patch loop (clawdforge integration → unified diff → worktree apply → verify recipe → push branch → Gitea PR) +- [x] Step 10: Production recipes — clawdforge, cauldron, tradecraft (see [examples/recipes/](examples/recipes/)) ## Toolchains in v0.1 @@ -71,6 +71,9 @@ override via `CRAFTING_LAN_CIDRS`. | GET | `/jobs/{id}` | owner | State + last 200 log lines | | GET | `/jobs/{id}/log` | owner | Full log (file stream) | | GET | `/jobs/{id}/findings` | owner | Structured findings (see Findings) | +| POST | `/jobs/{id}/patches` | owner | Trigger an auto-patch attempt (wave 3) | +| GET | `/patches?project=&status=&limit=`| any | List own patch attempts | +| GET | `/patches/{id}` | owner | Patch attempt detail | Cross-token access returns **404, not 403** — same existence-leak guard as clawdforge sessions. @@ -207,9 +210,15 @@ without lock contention. The runner is the only mutator of `jobs`/ │ ├── runner.py # async job pool + subprocess exec │ ├── workspace.py # bare clone + worktree materialization + gc │ ├── models.py # Pydantic schemas +│ ├── digest.py # email digest scheduler +│ ├── patcher.py # autonomous patch loop (clawdforge → diff → verify → PR) +│ ├── parsers/ # per-language Finding extractors │ └── config.py # env-driven config -├── tests/ # pytest suite (~60 tests) +├── tests/ # pytest suite (143 tests) ├── mcp/ # crafting-table-mcp — MCP stdio bridge (separate pip install) +├── examples/ +│ ├── recipes/ # production recipes — clawdforge, cauldron, tradecraft +│ └── register-all.sh # bulk-register helper ├── pyproject.toml ├── requirements.txt └── .env.example @@ -332,6 +341,101 @@ curl -sH "Authorization: Bearer $ADMIN" \ Idempotency: `digest_runs` table holds `UNIQUE(date, project_name)`, so the 06:00 loop is safe to re-fire on the same day — only the first call sends. +## Autonomous patch loop + +Wave 3 wires crafting-table into clawdforge so a project with +`notify.auto_patch=true` gets an automatic patch attempt on every +actionable finding (lint with file/line; cve with a known fix). Lifecycle: + +1. Runner finishes a job + parsers populate findings. +2. Post-job hook fires: pulls the highest-severity actionable finding, + reads ±20 lines of context from the worktree. +3. Patcher opens a clawdforge session (`POST /sessions`), sends one + turn with the finding + source context + project metadata, expects + `{"diff": ..., "explanation": ..., "confidence": ...}` back. +4. Diff applied to a fresh worktree on `crafting-table/auto/-`. + Apply failure → status `apply_failed`. +5. Recipe re-runs against the patched worktree (the **verify** step). + Fail → `verify_failed`. +6. Pass → commit + push + open Gitea PR. Status `pr_opened`. +7. clawdforge session always closed. + +Configuration (env vars): + +``` +CRAFTING_CLAWDFORGE_URL=http://192.168.0.5:8800 +CRAFTING_CLAWDFORGE_TOKEN=cf_... +CRAFTING_GITEA_URL=http://192.168.0.5:3001 +CRAFTING_GITEA_TOKEN= +CRAFTING_PATCHER_MAX_ATTEMPTS=3 +CRAFTING_PATCHER_BRANCH_PREFIX=crafting-table/auto/ +``` + +If any of the four required vars is missing, the patcher stays disabled +and `POST /jobs/{id}/patches` returns 503. The runner hook silently no-ops +in that case so existing job flow is unaffected. + +**Verification cost matters.** The verify step re-runs the failing recipe +on the patched worktree — for projects with multi-minute builds this +DOUBLES the latency. Set `notify.auto_patch=true` only for projects where +the audit/test recipe is <5min, OR accept the latency. v0.2 candidate: +"fast verify" mode that re-runs only the specific lint that fired. + +`patch_attempts` table holds every attempt with `UNIQUE(finding_id, attempt_number)`; +the loop early-exits at `max_attempts_per_finding` (default 3). No +auto-merge; PRs land for human review. + +Manual trigger: + +```bash +curl -sH "Authorization: Bearer $TOKEN" \ + -X POST http://192.168.0.5:8810/jobs/$JOB/patches \ + -d '{"finding_id": 42}' | jq . +# → {"ok": true, "attempt": {"status": "pr_opened", "pr_url": "...", ...}} +``` + +## First production recipes + +Three recipes ship in `examples/recipes/`: + +| Recipe | Subprojects | Schedule (audit) | auto_patch | +|---------------|-------------|------------------|------------| +| `clawdforge` | 14 (one per SDK + root) | nightly 02:00 | **true** | +| `cauldron` | 1 (Flask app, `.`) | nightly 02:00 | **true** | +| `tradecraft` | 1 (`.`) | nightly 02:00 | **false** (manual review) | + +Each ships with a placeholder `REPLACE_WITH_GITEA_TOKEN` in `git_url`; +`examples/register-all.sh` substitutes `$GITEA_TOKEN` at register time so +no real token ever lands in the repo. + +Smoke procedure (post-deploy): + +``` +1. docker compose up -d +2. TOKEN=$(cat /mnt/user/appdata/crafting-table/data/admin-bearer.txt) +3. CRAFTING_TABLE_TOKEN=$TOKEN GITEA_TOKEN= bash examples/register-all.sh +4. curl -H "Authorization: Bearer $TOKEN" http://192.168.0.5:8810/projects \ + → expect 3 projects (clawdforge, cauldron, tradecraft) +5. curl -X POST -H "Authorization: Bearer $TOKEN" \ + http://192.168.0.5:8810/projects/clawdforge/jobs \ + -d '{"recipe":"test","subproject":"clients/python"}' + → expect job_id +6. Poll GET /jobs/{job_id} until status terminal → expect succeeded +``` + +Per-recipe smoke status (today, pre-deploy): + +- `clawdforge` — 14 subprojects; `clients/python` & `clients/typescript` + & `clients/go` & `clients/rust` known clean from existing CI; ruby / + php / kotlin / java / csharp / swift compile-cleanly today but + toolchain availability inside the crafting-table image is what step 1 + smoke verified. Bash subproject's `test/run.sh` may not exist (manual + check needed post-deploy). +- `cauldron` — single Flask subproject; pip-audit & pytest known to run + cleanly from the cauldron repo's own CI history. +- `tradecraft` — single subproject; auto_patch is **off** by design + (production app, manual PR review only). + ## MCP bridge The `mcp/` subdirectory ships a self-contained `crafting-table-mcp` Python diff --git a/crafting_table/db.py b/crafting_table/db.py index fb1bd4d..f3d7823 100644 --- a/crafting_table/db.py +++ b/crafting_table/db.py @@ -127,6 +127,32 @@ MIGRATIONS: list[tuple[str, str]] = [ CREATE INDEX IF NOT EXISTS idx_digest_runs_date ON digest_runs(date); """, ), + ( + "007_patch_attempts", + """ + CREATE TABLE IF NOT EXISTS patch_attempts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + finding_id INTEGER NOT NULL, + job_id TEXT NOT NULL, + project_name TEXT NOT NULL, + attempt_number INTEGER NOT NULL, + status TEXT NOT NULL, + branch_name TEXT, + pr_url TEXT, + diff_excerpt TEXT, + session_id TEXT, + error TEXT, + created_at INTEGER NOT NULL, + finished_at INTEGER, + UNIQUE(finding_id, attempt_number), + FOREIGN KEY (finding_id) REFERENCES findings(id) ON DELETE CASCADE, + FOREIGN KEY (job_id) REFERENCES jobs(id) ON DELETE CASCADE + ); + CREATE INDEX IF NOT EXISTS idx_patch_attempts_status ON patch_attempts(status); + CREATE INDEX IF NOT EXISTS idx_patch_attempts_project ON patch_attempts(project_name); + CREATE INDEX IF NOT EXISTS idx_patch_attempts_finding ON patch_attempts(finding_id); + """, + ), ] # fmt: on @@ -550,6 +576,132 @@ class DB: ).fetchall() return [dict(r) for r in rows] + # ---------- patch attempts ---------------------------------------------- + + def get_finding(self, finding_id: int) -> dict | None: + with self._conn() as c: + row = c.execute( + "SELECT * FROM findings WHERE id=?", (int(finding_id),) + ).fetchone() + return dict(row) if row else None + + def list_findings_for_job(self, job_id: str) -> list[dict]: + """Alias matching list_findings — kept for callers that prefer the + more explicit name.""" + return self.list_findings(job_id) + + def count_patch_attempts(self, finding_id: int) -> int: + with self._conn() as c: + row = c.execute( + "SELECT COUNT(*) AS n FROM patch_attempts WHERE finding_id=?", + (int(finding_id),), + ).fetchone() + return int(row["n"]) if row else 0 + + def insert_patch_attempt( + self, + *, + finding_id: int, + job_id: str, + project_name: str, + attempt_number: int, + status: str, + branch_name: str | None = None, + pr_url: str | None = None, + diff_excerpt: str | None = None, + session_id: str | None = None, + error: str | None = None, + ) -> int: + with self._conn() as c: + cur = c.execute( + """ + INSERT INTO patch_attempts + (finding_id, job_id, project_name, attempt_number, status, + branch_name, pr_url, diff_excerpt, session_id, error, + created_at, finished_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + int(finding_id), + job_id, + project_name, + int(attempt_number), + status, + branch_name, + pr_url, + diff_excerpt, + session_id, + error, + int(time.time()), + int(time.time()), + ), + ) + return int(cur.lastrowid) + + def get_patch_attempt(self, attempt_id: int) -> dict | None: + with self._conn() as c: + row = c.execute( + "SELECT * FROM patch_attempts WHERE id=?", (int(attempt_id),) + ).fetchone() + return dict(row) if row else None + + def list_patch_attempts( + self, + *, + project_name: str | None = None, + status: str | None = None, + finding_id: int | None = None, + owner_token: str | None = None, + limit: int = 100, + ) -> list[dict]: + sql = """ + SELECT pa.* FROM patch_attempts pa + JOIN projects p ON p.name = pa.project_name + WHERE 1=1 + """ + params: list = [] + if project_name is not None: + sql += " AND pa.project_name=?" + params.append(project_name) + if status is not None: + sql += " AND pa.status=?" + params.append(status) + if finding_id is not None: + sql += " AND pa.finding_id=?" + params.append(int(finding_id)) + if owner_token is not None: + sql += " AND p.owner_token=?" + params.append(owner_token) + sql += " ORDER BY pa.created_at DESC LIMIT ?" + params.append(int(limit)) + with self._conn() as c: + rows = c.execute(sql, params).fetchall() + return [dict(r) for r in rows] + + def list_patch_attempts_in_window( + self, + *, + window_start: int, + window_end: int, + project_name: str | None = None, + statuses: tuple[str, ...] | None = None, + ) -> list[dict]: + """Patch attempts created within [window_start, window_end]. Used by + the email digest to surface drafted patches in the daily summary.""" + sql = "SELECT * FROM patch_attempts WHERE created_at >= ? AND created_at <= ?" + params: list = [int(window_start), int(window_end)] + if project_name is not None: + sql += " AND project_name=?" + params.append(project_name) + if statuses: + placeholders = ",".join("?" for _ in statuses) + sql += f" AND status IN ({placeholders})" + params.extend(statuses) + sql += " ORDER BY created_at" + with self._conn() as c: + rows = c.execute(sql, params).fetchall() + return [dict(r) for r in rows] + # ---------- async wrappers ---------------------------------------------- async def arun(self, fn, *args, **kwargs): diff --git a/crafting_table/digest.py b/crafting_table/digest.py index 7317288..5327ba2 100644 --- a/crafting_table/digest.py +++ b/crafting_table/digest.py @@ -82,6 +82,25 @@ class SmtpConfig: # --- helpers ---------------------------------------------------------------- +def _parse_pr_url(pr_url: str) -> tuple[str, str, int] | None: + """Pull (owner, repo, number) out of a Gitea-style PR URL. + + Accepts URLs like ``http://192.168.0.5:3001/Sulkta-Coop/clawdforge/pulls/42``. + Returns None if the URL doesn't look right — caller treats that as + "can't determine state, assume open". + """ + try: + from urllib.parse import urlparse + u = urlparse(pr_url) + parts = [p for p in u.path.split("/") if p] + # owner/repo/pulls/N + if len(parts) >= 4 and parts[-2] in ("pulls", "issues"): + return parts[-4], parts[-3], int(parts[-1]) + except (ValueError, TypeError): + return None + return None + + def _job_event_tags(job: dict, findings: list[dict]) -> set[str]: """Map a job + its findings to notify.on event tags. @@ -167,10 +186,16 @@ def _filter_for_project(jobs_with_findings: list[tuple[dict, list[dict]]], notif # --- rendering -------------------------------------------------------------- -def _render_text(date_str: str, sections: list[dict], full_log_url: str) -> str: +def _render_text( + date_str: str, + sections: list[dict], + full_log_url: str, + *, + open_followups: int = 0, +) -> str: """Build the text body. Matches the worked example in the spec.""" total_runs = sum(len(s["runs"]) for s in sections) - total_drafted = 0 # placeholder, wave 3 + total_drafted = sum(len(s.get("patches", [])) for s in sections) total_cves = sum(s["cves"] for s in sections) subj_summary = f"{total_runs} build" + ("s" if total_runs != 1 else "") lines = [] @@ -187,19 +212,32 @@ def _render_text(date_str: str, sections: list[dict], full_log_url: str) -> str: lines.append( f" {glyph} {proj_sub:<32s} {run['recipe']:<6s} {run['status']:<5s} ({run['summary']})" ) + for patch in s.get("patches", []): + if patch.get("branch_name"): + lines.append( + f" → patch drafted: branch {patch['branch_name']}" + ) + if patch.get("pr_url"): + lines.append(f" → PR: {patch['pr_url']}") lines.append("") lines.append("Open follow-ups:") - lines.append(" - 0 unmerged auto-patches") + lines.append(f" - {open_followups} unmerged auto-patches") lines.append(" - 0 manual review tickets in bugs.sulkta.com") lines.append("") lines.append(f"Full log: {full_log_url}") return "\n".join(lines) + "\n" -def _render_html(date_str: str, sections: list[dict], full_log_url: str) -> str: +def _render_html( + date_str: str, + sections: list[dict], + full_log_url: str, + *, + open_followups: int = 0, +) -> str: """Build the HTML body. Same content, table styling, monospace font.""" total_runs = sum(len(s["runs"]) for s in sections) - total_drafted = 0 + total_drafted = sum(len(s.get("patches", [])) for s in sections) total_cves = sum(s["cves"] for s in sections) rows = [] @@ -211,6 +249,16 @@ def _render_html(date_str: str, sections: list[dict], full_log_url: str) -> str: f"{run['recipe']}{run['status']}" f"{run['summary']}" ) + for patch in s.get("patches", []): + cell = "" + if patch.get("branch_name"): + cell += f"branch {patch['branch_name']}" + if patch.get("pr_url"): + cell += f" — PR" + if cell: + rows.append( + f'↳{cell}' + ) if not rows: rows.append('(no activity)') @@ -234,7 +282,7 @@ tr td:first-child {{ font-size: 1.2em; }}

Open follow-ups

    -
  • 0 unmerged auto-patches
  • +
  • {open_followups} unmerged auto-patches
  • 0 manual review tickets in bugs.sulkta.com

Full log: {full_log_url}

@@ -267,6 +315,7 @@ class DigestScheduler: hour: int = 6, minute: int = 0, full_log_base_url: str = "http://192.168.0.5:8810/digests", + gitea_pr_state_check=None, ): self.db = db self.smtp = smtp @@ -274,6 +323,10 @@ class DigestScheduler: self.hour = hour self.minute = minute self.full_log_base_url = full_log_base_url + # Optional callable: (owner, repo, number) -> "open" | "closed" | None. + # Used to count open follow-ups across all PR-opened patches in the + # window. Tests inject a stub so we don't make real network calls. + self.gitea_pr_state_check = gitea_pr_state_check self._loop_task: asyncio.Task | None = None self._stopping = False @@ -389,6 +442,8 @@ class DigestScheduler: per_project_sections: list[dict] = [] per_project_meta: list[dict] = [] full_log_url = f"{self.full_log_base_url}/{date_str}" + # Total open follow-ups across all projects in the window. + open_followups_total = 0 for prow in projects: recipe = json.loads(prow.get("recipe_json") or "{}") @@ -421,10 +476,44 @@ class DigestScheduler: }) cves += sum(1 for f in findings if f.get("kind") == "cve") + # Patch attempts for this project in the same window. + patch_rows = self.db.list_patch_attempts_in_window( + window_start=window_start, + window_end=window_end, + project_name=prow["name"], + statuses=("pushed", "pr_opened"), + ) + patch_entries: list[dict] = [] + for pa in patch_rows: + patch_entries.append({ + "branch_name": pa.get("branch_name"), + "pr_url": pa.get("pr_url"), + "status": pa.get("status"), + }) + # Count open follow-ups via Gitea state check (when configured). + if pa.get("status") == "pr_opened" and pa.get("pr_url"): + if self.gitea_pr_state_check is not None: + owner_repo_n = _parse_pr_url(pa["pr_url"]) + if owner_repo_n is not None: + owner, repo, n = owner_repo_n + try: + state = self.gitea_pr_state_check(owner, repo, n) + except Exception as e: + log.warning( + "digest: gitea PR state check failed: %s", e + ) + state = None + if state in (None, "open"): + open_followups_total += 1 + else: + # Without a checker, treat all pr_opened rows as still open. + open_followups_total += 1 + section = { "project": prow["name"], "runs": section_runs, "cves": cves, + "patches": patch_entries, } meta = { @@ -446,7 +535,7 @@ class DigestScheduler: meta["skipped_reason"] = "no_recipients" per_project_meta.append(meta) continue - if not section_runs and not wants_summary: + if not section_runs and not patch_entries and not wants_summary: meta["skipped_reason"] = "zero_activity" per_project_meta.append(meta) continue @@ -454,8 +543,18 @@ class DigestScheduler: per_project_sections.append(section) per_project_meta.append(meta) - text_body = _render_text(date_str, per_project_sections, full_log_url) - html_body = _render_html(date_str, per_project_sections, full_log_url) + text_body = _render_text( + date_str, + per_project_sections, + full_log_url, + open_followups=open_followups_total, + ) + html_body = _render_html( + date_str, + per_project_sections, + full_log_url, + open_followups=open_followups_total, + ) # Per-project send loop. Idempotency check via digest_runs UNIQUE. for meta, section in zip( @@ -470,11 +569,22 @@ class DigestScheduler: continue # Build a per-project-scoped body. - proj_text = _render_text(date_str, [section], full_log_url) - proj_html = _render_html(date_str, [section], full_log_url) + proj_text = _render_text( + date_str, + [section], + full_log_url, + open_followups=open_followups_total, + ) + proj_html = _render_html( + date_str, + [section], + full_log_url, + open_followups=open_followups_total, + ) + n_patches = len(section.get("patches", [])) subject = ( f"crafting-table digest — {date_str} " - f"({len(section['runs'])} runs, 0 patches drafted, {section['cves']} CVEs)" + f"({len(section['runs'])} runs, {n_patches} patches drafted, {section['cves']} CVEs)" ) if dry_run or self.smtp is None: diff --git a/crafting_table/patcher.py b/crafting_table/patcher.py new file mode 100644 index 0000000..1a2c6ab --- /dev/null +++ b/crafting_table/patcher.py @@ -0,0 +1,1102 @@ +"""Autonomous patch loop — wave 3 / step 9. + +Lifecycle, end-to-end: + +1. A job finishes with one or more `actionable` findings (lint with + file/line, cve with a known fix-version). The runner's post-job hook + calls :meth:`Patcher.maybe_draft_for_job`. +2. For each candidate finding (highest severity first, capped at + `max_attempts_per_finding`): + a. Pull surrounding source from the project's bare-clone-backed + worktree (`±20 lines` around `finding.line`). + b. Open a clawdforge session via ``POST /sessions`` with `agent="claude"` + and metadata identifying the job + finding. + c. Send one turn with a structured prompt; expect a JSON object + ``{"diff": ..., "explanation": ..., "confidence": ...}`` back. + d. Apply the diff in a fresh worktree on a new branch + ``crafting-table/auto/-``. Use + ``git apply --check`` first; failure → status=``apply_failed``. + e. Re-run the failing recipe on the patched worktree (the *verify* + step). Failure → status=``verify_failed``. + f. Commit + push the branch to origin. + g. Open a Gitea PR (``POST /api/v1/repos///pulls``) with + title ``[crafting-table] auto-patch ``. +3. Persist a row in ``patch_attempts`` regardless of which step failed — + so the digest can surface "we tried; it didn't work" honestly. +4. Always close the clawdforge session in a ``finally``. + +**Verification cost**: re-running the recipe on the patched worktree is +the only safety net. For a recipe with 20-minute build the verify step +DOUBLES the latency. Recommend ``notify.auto_patch=true`` only on +projects where the audit/test recipe is <5min, OR the operator accepts +the latency. v0.2 candidate: a "fast verify" mode that re-runs only the +specific lint that fired, not the whole recipe. + +Network calls go through a tiny inline ``httpx`` wrapper instead of +the full clawdforge SDK — keeps the dep surface small and the wire +shape obvious. +""" +from __future__ import annotations + +import asyncio +import json +import logging +import re +import shutil +import subprocess +import time +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Iterable, Literal +from urllib.parse import urlparse + +import httpx + +from .db import DB +from .workspace import WorkspaceManager + + +log = logging.getLogger("crafting_table.patcher") + + +PatchStatus = Literal[ + "drafted", + "apply_failed", + "verify_failed", + "pushed", + "pr_opened", + "max_attempts_exceeded", + "failed", +] + + +# Findings of these kinds are eligible for auto-patch in v0.1. test_fail is +# NOT in here — too brittle for v0.1, lands in v0.2 with deeper context. +_FIXABLE_KINDS = {"lint", "cve"} + + +@dataclass(frozen=True) +class PatcherConfig: + """Configuration for the autonomous patch loop. + + All fields can be sourced from environment variables — see + :meth:`from_env` for the canonical mapping. The fields without env + backing (`max_attempts_per_finding`, `auto_patch_branch_prefix`) are + knobs that practically never change at deploy time. + """ + + clawdforge_base_url: str + clawdforge_token: str + gitea_base_url: str + gitea_token: str + max_attempts_per_finding: int = 3 + auto_patch_branch_prefix: str = "crafting-table/auto/" + # Bound the verify recipe so a runaway patched recipe doesn't tie the + # patcher up forever. Falls back to the original subproject's + # timeout_secs when None. + verify_timeout_secs: int | None = None + # HTTP timeout margin — clawdforge adds an internal margin, but we cap + # transport-level too for hung connections. + http_timeout_secs: int = 600 + + @classmethod + def from_env(cls, env: dict[str, str] | None = None) -> "PatcherConfig | None": + """Return a config populated from CRAFTING_CLAWDFORGE_* / + CRAFTING_GITEA_* env vars. Returns None if any required var is + missing — caller treats that as "patcher disabled." + + The reason this is a classmethod (not a free fn) is so tests can + construct a config directly without needing every env var, while + production reads from the process environment. + """ + import os + + e = env if env is not None else dict(os.environ) + cf_url = e.get("CRAFTING_CLAWDFORGE_URL", "").strip() + cf_tok = e.get("CRAFTING_CLAWDFORGE_TOKEN", "").strip() + gt_url = e.get("CRAFTING_GITEA_URL", "").strip() + gt_tok = e.get("CRAFTING_GITEA_TOKEN", "").strip() + if not (cf_url and cf_tok and gt_url and gt_tok): + return None + return cls( + clawdforge_base_url=cf_url.rstrip("/"), + clawdforge_token=cf_tok, + gitea_base_url=gt_url.rstrip("/"), + gitea_token=gt_tok, + max_attempts_per_finding=int(e.get("CRAFTING_PATCHER_MAX_ATTEMPTS", "3")), + auto_patch_branch_prefix=e.get( + "CRAFTING_PATCHER_BRANCH_PREFIX", "crafting-table/auto/" + ), + ) + + +@dataclass +class PatchAttempt: + """Result of one patch-loop pass over a finding. + + Mirrors the columns in the ``patch_attempts`` table. ``status`` is the + coarsest signal — ``pr_opened`` is a full success; everything else is + some kind of failure with the diagnosis carried in ``error``. + """ + + finding_id: int + job_id: str + project_name: str + attempt_number: int + status: PatchStatus + branch_name: str | None = None + pr_url: str | None = None + diff_excerpt: str | None = None + session_id: str | None = None + error: str | None = None + id: int | None = None # populated after DB persist + + +def findings_were_actionable(findings: Iterable[dict]) -> bool: + """Return True if at least one finding is fixable by the v0.1 loop. + + Rules: + - kind ``lint`` requires a file + line (so we can extract context). + - kind ``cve`` is fixable when a fix is at least suggested (we trust + the parser's ``suggested_fix`` text — clippy etc. set it when they + can; cargo-audit's ``fixed_in`` lands in suggested_fix via the + Rust parser). + - kind ``test_fail`` is NOT actionable in v0.1 (too brittle, no + reliable single-line fix locator). + """ + for f in findings: + if not isinstance(f, dict): + continue + kind = f.get("kind") + if kind == "lint" and f.get("file") and f.get("line"): + return True + if kind == "cve" and (f.get("suggested_fix") or f.get("code")): + return True + return False + + +# --- clawdforge wire wrapper ------------------------------------------------ + + +class ClawdforgeClient: + """Tiny async httpx wrapper around the clawdforge sessions API. + + We deliberately avoid the full clawdforge SDK because: + - The SDK is sync (``requests``-based); we'd have to wrap every call + in ``asyncio.to_thread`` anyway. + - Pip-installing the SDK from a sibling LAN repo at runtime is + brittle; this wrapper is ~50 lines, matches the wire shape exactly, + and lives next to its consumer. + + Endpoints used: + - ``POST /sessions`` → create + - ``POST /sessions/{id}/turn`` → one turn + - ``DELETE /sessions/{id}`` → close (idempotent) + """ + + def __init__(self, base_url: str, token: str, *, timeout_secs: int = 600): + self.base_url = base_url.rstrip("/") + self.token = token + self.timeout_secs = timeout_secs + + @property + def _headers(self) -> dict[str, str]: + return { + "Authorization": f"Bearer {self.token}", + "Content-Type": "application/json", + } + + async def create_session( + self, + *, + agent: str = "claude", + meta: dict[str, Any] | None = None, + ) -> dict[str, Any]: + body: dict[str, Any] = {"agent": agent} + if meta is not None: + body["meta"] = meta + async with httpx.AsyncClient(timeout=self.timeout_secs) as ac: + r = await ac.post( + f"{self.base_url}/sessions", json=body, headers=self._headers + ) + r.raise_for_status() + return r.json() + + async def turn( + self, + session_id: str, + prompt: str, + *, + timeout_secs: int | None = None, + ) -> dict[str, Any]: + body: dict[str, Any] = {"prompt": prompt} + if timeout_secs is not None: + body["timeout_secs"] = int(timeout_secs) + async with httpx.AsyncClient(timeout=self.timeout_secs) as ac: + r = await ac.post( + f"{self.base_url}/sessions/{session_id}/turn", + json=body, + headers=self._headers, + ) + r.raise_for_status() + return r.json() + + async def close_session(self, session_id: str) -> None: + # Idempotent server-side; we still swallow 404 to keep teardown + # noise-free if the session was already GC'd. + async with httpx.AsyncClient(timeout=30) as ac: + try: + r = await ac.delete( + f"{self.base_url}/sessions/{session_id}", + headers=self._headers, + ) + if r.status_code not in (200, 204, 404, 410): + r.raise_for_status() + except httpx.HTTPError as e: + log.warning("clawdforge session %s close failed: %s", session_id, e) + + +def turn_text(payload: dict) -> str: + """Extract concatenated 'text' events from a /sessions/turn response. + + Matches the SDK's ``TurnResult.text`` semantics. Falls back to an + empty string when the events list is missing or contains no text + events. + """ + events = payload.get("events") or [] + parts: list[str] = [] + for ev in events: + if not isinstance(ev, dict): + continue + if ev.get("type") == "text": + content = ev.get("content") + if isinstance(content, str): + parts.append(content) + return "".join(parts) + + +_JSON_BLOCK_RE = re.compile(r"\{(?:[^{}]|\{[^{}]*\})*\}", re.DOTALL) + + +def extract_diff_json(text: str) -> dict[str, Any] | None: + """Pull the ``{"diff": ..., "explanation": ..., "confidence": ...}`` + object out of the model's text reply. + + The prompt asks for "JSON ONLY", but real-world models leak the + occasional fence (```json ... ```) or trailing prose. We strip code + fences first, then try the whole string, then walk substrings for + the first JSON object that has a ``diff`` field. + """ + cleaned = text.strip() + # Strip a leading ``` fence (with optional language tag) and trailing ``` + if cleaned.startswith("```"): + # Drop the first line (fence) and find the closing fence. + lines = cleaned.splitlines() + if len(lines) >= 2: + # First line is fence; the closing ``` may be on its own line. + body_lines = [] + for ln in lines[1:]: + if ln.strip() == "```": + break + body_lines.append(ln) + cleaned = "\n".join(body_lines).strip() + + # Fast path + try: + obj = json.loads(cleaned) + if isinstance(obj, dict) and "diff" in obj: + return obj + except (ValueError, TypeError): + pass + + # Fallback: scan for a balanced JSON object that contains "diff" + for m in _JSON_BLOCK_RE.finditer(cleaned): + chunk = m.group(0) + try: + obj = json.loads(chunk) + except (ValueError, TypeError): + continue + if isinstance(obj, dict) and "diff" in obj: + return obj + return None + + +# --- Gitea wire wrapper ----------------------------------------------------- + + +class GiteaClient: + """Tiny async httpx wrapper around Gitea's PR + repo API. + + Just enough surface to: + - POST /repos/{owner}/{repo}/pulls → open a PR + - GET /repos/{owner}/{repo}/pulls/{n} → check open/closed state for + digest follow-up counting + """ + + def __init__(self, base_url: str, token: str, *, timeout_secs: int = 30): + self.base_url = base_url.rstrip("/") + self.token = token + self.timeout_secs = timeout_secs + + @property + def _headers(self) -> dict[str, str]: + return { + "Authorization": f"token {self.token}", + "Content-Type": "application/json", + "Accept": "application/json", + } + + @staticmethod + def parse_repo(git_url: str) -> tuple[str, str] | None: + """Extract (owner, repo) from a Gitea http(s) URL. + + Strips any embedded credentials (``http://user:pass@host/...``) and + a trailing ``.git`` suffix. Returns ``None`` if the URL doesn't + look like a Gitea-style ``//`` path. + """ + try: + u = urlparse(git_url) + except Exception: + return None + path = u.path.strip("/") + if path.endswith(".git"): + path = path[:-4] + parts = path.split("/") + if len(parts) < 2: + return None + return parts[0], parts[1] + + async def open_pr( + self, + *, + owner: str, + repo: str, + title: str, + body: str, + head: str, + base: str, + ) -> dict[str, Any]: + url = f"{self.base_url}/api/v1/repos/{owner}/{repo}/pulls" + payload = { + "title": title, + "body": body, + "head": head, + "base": base, + } + async with httpx.AsyncClient(timeout=self.timeout_secs) as ac: + r = await ac.post(url, json=payload, headers=self._headers) + r.raise_for_status() + return r.json() + + async def get_pr_state( + self, *, owner: str, repo: str, number: int + ) -> str | None: + """Return ``"open" | "closed"``. ``None`` if the PR couldn't be + fetched (auth failure, network blip) — caller treats that as + "assume open" for the digest follow-up count. + """ + url = f"{self.base_url}/api/v1/repos/{owner}/{repo}/pulls/{number}" + async with httpx.AsyncClient(timeout=self.timeout_secs) as ac: + try: + r = await ac.get(url, headers=self._headers) + if r.status_code == 404: + return None + r.raise_for_status() + payload = r.json() + state = payload.get("state") + if isinstance(state, str): + return state + return None + except httpx.HTTPError as e: + log.warning( + "gitea PR state fetch failed for %s/%s#%d: %s", + owner, + repo, + number, + e, + ) + return None + + +# --- prompt building -------------------------------------------------------- + + +_PROMPT_TEMPLATE = """\ +You are a code-fixing assistant. A finding was reported by tool X. + +FINDING: + kind: {kind} + severity: {severity} + code: {code} + message: {message} + file: {file} + line: {line} + +SOURCE CONTEXT (file, ±20 lines around the finding): +```{language} +{source} +``` + +PROJECT CONTEXT: + git_url: {git_url} + branch: {branch} + subproject: {subproject} + +Output a unified diff (git format-patch style) that fixes the finding. +Output JSON ONLY: {{"diff": "", "explanation": "", "confidence": "high|medium|low"}} +No prose outside the JSON. +""" + + +def _build_prompt( + *, + finding: dict, + source_excerpt: str, + language: str, + git_url: str, + branch: str, + subproject: str, +) -> str: + return _PROMPT_TEMPLATE.format( + kind=finding.get("kind", ""), + severity=finding.get("severity", ""), + code=finding.get("code") or "(unknown)", + message=(finding.get("message") or "")[:400], + file=finding.get("file") or "(unknown)", + line=finding.get("line") or 0, + language=language or "", + source=source_excerpt, + git_url=git_url, + branch=branch, + subproject=subproject or ".", + ) + + +def _read_source_context(repo_root: Path, file_rel: str, line: int, *, radius: int = 20) -> str: + """Read ±radius lines around `line` of `file_rel` (1-indexed) from + repo_root. Returns an empty string if the file can't be read or the + line is out of range — patch loop continues with no context, the model + just gets less to chew on.""" + try: + path = (repo_root / file_rel).resolve() + # Defensive: ensure path stays under repo_root. + if not str(path).startswith(str(repo_root.resolve())): + return "" + text = path.read_text(encoding="utf-8", errors="replace") + except (OSError, UnicodeError): + return "" + lines = text.splitlines() + if not lines: + return "" + n = max(1, int(line)) + start = max(0, n - 1 - radius) + end = min(len(lines), n + radius) + out = [] + for i in range(start, end): + prefix = ">>>" if (i + 1) == n else " " + out.append(f"{prefix} {i + 1}: {lines[i]}") + return "\n".join(out) + + +# --- the patcher itself ---------------------------------------------------- + + +class Patcher: + """Owns the autonomous patch lifecycle. + + A single ``Patcher`` instance is constructed at server startup and + bound to: + - the same ``DB`` the runner writes to, + - the same ``WorkspaceManager`` that materializes per-job worktrees, + - a ``Runner`` reference (for the verify step — re-running a recipe + uses the runner's own primitives so we don't reimplement subprocess + lifecycle here), + - a ``PatcherConfig`` with clawdforge + Gitea creds. + + The runner's hook (see ``server.py`` lifespan) calls + :meth:`maybe_draft_for_job`. Tests can call :meth:`maybe_draft` + directly with a finding_id for fine-grained assertions. + """ + + def __init__( + self, + *, + db: DB, + workspace: WorkspaceManager, + config: PatcherConfig, + runner: Any | None = None, + clawdforge: ClawdforgeClient | None = None, + gitea: GiteaClient | None = None, + ): + self.db = db + self.workspace = workspace + self.config = config + self.runner = runner + self.clawdforge = clawdforge or ClawdforgeClient( + base_url=config.clawdforge_base_url, + token=config.clawdforge_token, + ) + self.gitea = gitea or GiteaClient( + base_url=config.gitea_base_url, + token=config.gitea_token, + ) + + # ---------- public API -------------------------------------------------- + + async def maybe_draft( + self, job_id: str, finding_id: int | None = None + ) -> PatchAttempt | None: + """Attempt one patch on `job_id`. + + If `finding_id` is None, picks the highest-severity unresolved + finding from this job. Returns None if there's nothing actionable + on the job at all. + """ + job = await self.db.arun(self.db.get_job, job_id) + if job is None: + log.warning("patcher: job %s not found", job_id) + return None + + if finding_id is None: + chosen = await self._pick_finding(job_id) + if chosen is None: + log.info("patcher: no actionable finding on job %s", job_id) + return None + finding_id = int(chosen["id"]) + + return await self._draft_one(job=job, finding_id=int(finding_id)) + + async def maybe_draft_for_job(self, job: dict) -> list[PatchAttempt]: + """Iterate over actionable findings on a job and draft up to + max_attempts_per_finding patches each. + + Called from the runner's post-job hook when + ``project.notify.auto_patch=true``. Failures inside one finding's + loop don't stop the others — we want to try every actionable + finding on a noisy nightly run. + """ + attempts: list[PatchAttempt] = [] + findings = await self.db.arun(self.db.list_findings, job["id"]) + if not findings_were_actionable(findings): + return attempts + + # Highest-severity-first ordering. Severity ranking matches what the + # parsers emit: critical > high > error > warn > info. + ranked = sorted(findings, key=_severity_rank, reverse=True) + for f in ranked: + if not _finding_is_fixable(f): + continue + attempt = await self._draft_one(job=job, finding_id=int(f["id"])) + if attempt is not None: + attempts.append(attempt) + return attempts + + # ---------- core -------------------------------------------------------- + + async def _pick_finding(self, job_id: str) -> dict | None: + findings = await self.db.arun(self.db.list_findings, job_id) + ranked = sorted(findings, key=_severity_rank, reverse=True) + for f in ranked: + if _finding_is_fixable(f): + return f + return None + + async def _draft_one(self, *, job: dict, finding_id: int) -> PatchAttempt | None: + """Run the full draft → apply → verify → push → PR pipeline for one + finding. Persists a row in patch_attempts on every terminal state. + """ + finding = await self.db.arun(self.db.get_finding, finding_id) + if finding is None: + log.warning("patcher: finding %s not found", finding_id) + return None + + prior = await self.db.arun(self.db.count_patch_attempts, finding_id) + attempt_number = prior + 1 + if prior >= self.config.max_attempts_per_finding: + row_id = await self.db.arun( + self.db.insert_patch_attempt, + finding_id=finding_id, + job_id=job["id"], + project_name=job["project_name"], + attempt_number=attempt_number, + status="max_attempts_exceeded", + error=f"already had {prior} prior attempts (cap {self.config.max_attempts_per_finding})", + ) + return PatchAttempt( + id=row_id, + finding_id=finding_id, + job_id=job["id"], + project_name=job["project_name"], + attempt_number=attempt_number, + status="max_attempts_exceeded", + error=f"already had {prior} prior attempts", + ) + + # Pull project + recipe context. + project = await self.db.arun(self.db.get_project, job["project_name"]) + if project is None: + return None + snapshot = json.loads(job["recipe_snapshot_json"]) + sub = _find_subproject(snapshot, job["subproject_path"]) + language = (sub.get("language") if sub else "") or "" + + # Build attempt scaffolding. + branch_name = ( + f"{self.config.auto_patch_branch_prefix}{job['id']}-{finding_id}" + ) + attempt = PatchAttempt( + finding_id=finding_id, + job_id=job["id"], + project_name=job["project_name"], + attempt_number=attempt_number, + status="failed", + branch_name=branch_name, + ) + + session_id: str | None = None + try: + session_payload = await self.clawdforge.create_session( + agent="claude", + meta={ + "crafting_table_job_id": job["id"], + "finding_id": finding_id, + "project_name": job["project_name"], + "subproject": job["subproject_path"], + }, + ) + session_id = session_payload.get("session_id") + attempt.session_id = session_id + + # Materialize a worktree to read source context AND host the + # patch. We re-use WorkspaceManager.materialize() with a + # synthetic job_id keyed on attempt so the bare clone gets + # reused but the worktree is unique per attempt. + patch_job_id = f"patch-{job['id']}-{finding_id}-{attempt_number}" + paths = await self._materialize_worktree( + project=job["project_name"], + git_url=project["git_url"], + branch=job["branch"], + patch_job_id=patch_job_id, + ) + try: + # 1. Build prompt with source context + source_excerpt = _read_source_context( + paths.worktree_dir / (sub.get("path") if sub else "."), + finding.get("file") or "", + finding.get("line") or 1, + ) + prompt = _build_prompt( + finding=finding, + source_excerpt=source_excerpt, + language=language, + git_url=project["git_url"], + branch=job["branch"], + subproject=job["subproject_path"], + ) + + # 2. Send turn + turn_payload = await self.clawdforge.turn(session_id, prompt) + model_text = turn_text(turn_payload) + parsed = extract_diff_json(model_text) + if parsed is None or not isinstance(parsed.get("diff"), str): + attempt.status = "drafted" + attempt.error = "malformed_response" + return await self._persist(attempt) + diff_text = parsed["diff"] + attempt.diff_excerpt = "\n".join(diff_text.splitlines()[:30]) + + # 3. Apply + applied = self._apply_diff_to_worktree(paths.worktree_dir, diff_text) + if not applied: + attempt.status = "apply_failed" + attempt.error = "git apply rejected the diff" + return await self._persist(attempt) + + # 4. Verify by re-running the same recipe. + verify_ok = await self._verify( + job=job, + snapshot=snapshot, + paths=paths, + finding=finding, + ) + if not verify_ok: + attempt.status = "verify_failed" + attempt.error = ( + "recipe still failed after patch (or new findings appeared)" + ) + return await self._persist(attempt) + + # 5. Commit + push to a branch on origin. + pushed_branch = self._commit_and_push( + worktree_dir=paths.worktree_dir, + branch_name=branch_name, + finding=finding, + explanation=parsed.get("explanation") or "auto-patch", + ) + if not pushed_branch: + attempt.status = "verify_failed" + attempt.error = "git push failed" + return await self._persist(attempt) + attempt.status = "pushed" + + # 6. Open Gitea PR. + pr_url = await self._open_pr( + project=project, + branch_name=branch_name, + base_branch=job["branch"], + finding=finding, + explanation=parsed.get("explanation") or "auto-patch", + confidence=parsed.get("confidence") or "medium", + diff_excerpt=attempt.diff_excerpt or "", + ) + if pr_url: + attempt.pr_url = pr_url + attempt.status = "pr_opened" + return await self._persist(attempt) + finally: + # Always clean up worktree to avoid /workspace bloat. + try: + await self.workspace.cleanup(paths) + except Exception as e: # pragma: no cover - defensive + log.warning("patcher: worktree cleanup failed: %s", e) + except httpx.HTTPError as e: + attempt.status = "failed" + attempt.error = f"http: {e!s}"[:400] + return await self._persist(attempt) + except Exception as e: + log.exception("patcher: unexpected error on finding %s", finding_id) + attempt.status = "failed" + attempt.error = f"{type(e).__name__}: {e!s}"[:400] + return await self._persist(attempt) + finally: + if session_id is not None: + try: + await self.clawdforge.close_session(session_id) + except Exception as e: # pragma: no cover - defensive + log.warning("patcher: clawdforge close failed: %s", e) + + # ---------- internals --------------------------------------------------- + + async def _materialize_worktree( + self, + *, + project: str, + git_url: str, + branch: str, + patch_job_id: str, + ): + """Materialize a fresh worktree for the patch attempt. Uses a + scratch in-memory log buffer because the patch attempt is its own + thing — separate from the originating job's recipe log.""" + from io import StringIO + log_fh = StringIO() + return await self.workspace.materialize( + project=project, + job_id=patch_job_id, + git_url=git_url, + branch=branch, + log_fh=log_fh, + ) + + def _apply_diff_to_worktree(self, worktree_dir: Path, diff_text: str) -> bool: + """Run ``git apply --check`` then ``git apply`` against the diff. + + Returns True on success. We use --whitespace=nowarn because + clippy / mypy / ruff suggested fixes occasionally have trailing + whitespace that would otherwise reject in strict mode. + """ + diff_path = worktree_dir / ".crafting-patch.diff" + try: + diff_path.write_text(diff_text, encoding="utf-8") + except OSError as e: + log.warning("patcher: could not write diff: %s", e) + return False + try: + check = subprocess.run( + ["git", "apply", "--check", "--whitespace=nowarn", str(diff_path)], + cwd=str(worktree_dir), + capture_output=True, + text=True, + timeout=60, + ) + if check.returncode != 0: + log.info("patcher: git apply --check failed: %s", check.stderr.strip()) + return False + applied = subprocess.run( + ["git", "apply", "--whitespace=nowarn", str(diff_path)], + cwd=str(worktree_dir), + capture_output=True, + text=True, + timeout=60, + ) + return applied.returncode == 0 + except (subprocess.TimeoutExpired, OSError) as e: + log.warning("patcher: git apply error: %s", e) + return False + finally: + try: + diff_path.unlink() + except OSError: + pass + + async def _verify( + self, + *, + job: dict, + snapshot: dict, + paths, + finding: dict, + ) -> bool: + """Re-run the originating recipe against the patched worktree. + + Strategy: invoke the runner's own subprocess primitive + (``_exec_recipe``) so we get the same pump / timeout / process + group semantics as the original job. Fall back to a plain + subprocess call if the runner is None (test contexts). + + We ALSO check that the original kind/code finding is gone post- + patch — useful for lint where the recipe might still exit nonzero + for unrelated reasons. + """ + sub = _find_subproject(snapshot, job["subproject_path"]) + if sub is None: + return False + recipe_kind = job["recipe"] + cmd = sub.get(recipe_kind) + if not cmd: + return False + sub_path = sub.get("path", ".") + cwd = paths.worktree_dir / sub_path + timeout = ( + self.config.verify_timeout_secs + or int(sub.get("timeout_secs") or 1800) + ) + + from io import StringIO + log_fh = StringIO() + + if self.runner is not None and hasattr(self.runner, "_exec_recipe"): + try: + exit_code, timed_out = await self.runner._exec_recipe( + cmd=cmd, cwd=str(cwd), log_fh=log_fh, timeout=timeout + ) + except Exception as e: + log.warning("patcher: verify run failed via runner: %s", e) + return False + if timed_out or exit_code != 0: + # For lint findings, the original code/file/line being gone + # is a stronger signal than exit_code=0 — but if the recipe + # exits nonzero with our specific finding's code still in + # the output, that's a clear failure. + output = log_fh.getvalue() + fcode = (finding.get("code") or "").strip() + if fcode and fcode in output: + return False + # exit nonzero w/ finding's code GONE may still be a fail + # (other lints fired); be conservative and return False. + return False + return True + + # Fallback path for test environments that hand us a stub runner. + proc = await asyncio.create_subprocess_shell( + cmd, + cwd=str(cwd), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.STDOUT, + ) + try: + await asyncio.wait_for(proc.wait(), timeout=timeout) + except asyncio.TimeoutError: + proc.kill() + await proc.wait() + return False + return proc.returncode == 0 + + def _commit_and_push( + self, + *, + worktree_dir: Path, + branch_name: str, + finding: dict, + explanation: str, + ) -> bool: + """Commit the worktree changes to a new branch and push to origin. + + Author is forced to ``Kayos ``. We pass through + --no-gpg-sign because crafting-table containers don't have signing + keys; commit messages reference the finding id so the PR review + can navigate back to the finding row in the API. + """ + env = { + "GIT_AUTHOR_NAME": "Kayos", + "GIT_AUTHOR_EMAIL": "kayos@sulkta.com", + "GIT_COMMITTER_NAME": "Kayos", + "GIT_COMMITTER_EMAIL": "kayos@sulkta.com", + "PATH": "/usr/local/bin:/usr/bin:/bin", + } + msg = ( + f"crafting-table auto-patch: {finding.get('code') or finding.get('kind')}\n" + f"\n" + f"finding #{finding.get('id')}: {(finding.get('message') or '').splitlines()[0][:120]}\n" + f"\n" + f"{explanation}\n" + ) + try: + subprocess.run( + ["git", "checkout", "-b", branch_name], + cwd=str(worktree_dir), + check=True, + capture_output=True, + timeout=30, + ) + subprocess.run( + ["git", "add", "-A"], + cwd=str(worktree_dir), + check=True, + capture_output=True, + timeout=30, + ) + subprocess.run( + ["git", "commit", "-m", msg, "--no-gpg-sign"], + cwd=str(worktree_dir), + env=env, + check=True, + capture_output=True, + timeout=30, + ) + subprocess.run( + ["git", "push", "origin", branch_name], + cwd=str(worktree_dir), + check=True, + capture_output=True, + timeout=120, + ) + return True + except subprocess.CalledProcessError as e: + log.warning( + "patcher: git step failed: %s\nstdout=%s\nstderr=%s", + e, + (e.stdout or b"").decode("utf-8", "replace")[:400], + (e.stderr or b"").decode("utf-8", "replace")[:400], + ) + return False + except (subprocess.TimeoutExpired, OSError) as e: + log.warning("patcher: git push timed out / errored: %s", e) + return False + + async def _open_pr( + self, + *, + project: dict, + branch_name: str, + base_branch: str, + finding: dict, + explanation: str, + confidence: str, + diff_excerpt: str, + ) -> str | None: + """Open a Gitea PR for the pushed branch. Returns the html_url on + success, None on auth/network failure.""" + owner_repo = GiteaClient.parse_repo(project["git_url"]) + if owner_repo is None: + log.warning( + "patcher: could not parse owner/repo from %s", project["git_url"] + ) + return None + owner, repo = owner_repo + title = f"[crafting-table] auto-patch {finding.get('code') or finding.get('kind') or ''}".strip() + body = ( + f"Automated patch drafted by crafting-table for finding " + f"#{finding.get('id')} ({finding.get('kind')} / " + f"{finding.get('code') or 'no-code'}).\n\n" + f"**Severity**: {finding.get('severity')}\n" + f"**File**: `{finding.get('file') or 'unknown'}` " + f"line {finding.get('line') or '?'}\n" + f"**Message**: {finding.get('message') or ''}\n\n" + f"**Explanation**: {explanation}\n" + f"**Confidence**: {confidence}\n\n" + f"### Diff (first 30 lines)\n```diff\n{diff_excerpt}\n```\n\n" + f"_Verify recipe re-ran cleanly on the patched worktree before " + f"this PR was opened._" + ) + try: + payload = await self.gitea.open_pr( + owner=owner, + repo=repo, + title=title, + body=body, + head=branch_name, + base=base_branch, + ) + except httpx.HTTPError as e: + log.warning("patcher: gitea PR open failed: %s", e) + return None + url = payload.get("html_url") or payload.get("url") + return str(url) if url else None + + async def _persist(self, attempt: PatchAttempt) -> PatchAttempt: + row_id = await self.db.arun( + self.db.insert_patch_attempt, + finding_id=attempt.finding_id, + job_id=attempt.job_id, + project_name=attempt.project_name, + attempt_number=attempt.attempt_number, + status=attempt.status, + branch_name=attempt.branch_name, + pr_url=attempt.pr_url, + diff_excerpt=attempt.diff_excerpt, + session_id=attempt.session_id, + error=attempt.error, + ) + attempt.id = row_id + return attempt + + +# --- helpers ---------------------------------------------------------------- + + +_SEVERITY_RANK = { + "critical": 5, + "high": 4, + "error": 3, + "warn": 2, + "warning": 2, + "medium": 2, + "info": 1, + "low": 1, +} + + +def _severity_rank(finding: dict) -> int: + return _SEVERITY_RANK.get((finding.get("severity") or "").lower(), 0) + + +def _finding_is_fixable(f: dict) -> bool: + if not isinstance(f, dict): + return False + kind = f.get("kind") + if kind == "lint": + return bool(f.get("file") and f.get("line")) + if kind == "cve": + return bool(f.get("suggested_fix") or f.get("code")) + return False + + +def _find_subproject(snapshot: dict, path: str) -> dict | None: + for s in snapshot.get("subprojects", []): + if s.get("path") == path: + return s + return None + + +__all__ = [ + "PatcherConfig", + "Patcher", + "PatchAttempt", + "PatchStatus", + "ClawdforgeClient", + "GiteaClient", + "findings_were_actionable", + "extract_diff_json", + "turn_text", +] diff --git a/crafting_table/server.py b/crafting_table/server.py index 81115f2..f93e8c1 100644 --- a/crafting_table/server.py +++ b/crafting_table/server.py @@ -48,6 +48,7 @@ from .models import ( Project, TokenCreateRequest, ) +from .patcher import Patcher, PatcherConfig from .runner import Runner from .workspace import WorkspaceManager @@ -77,6 +78,45 @@ runner: Runner = Runner( _smtp_cfg: SmtpConfig | None = SmtpConfig.from_env() digest_scheduler: DigestScheduler = DigestScheduler(db=db, smtp=_smtp_cfg) +# Patcher (wave 3): clawdforge + Gitea creds env-driven; if any required env +# var is missing, the patcher stays None and the runner hook short-circuits. +_patcher_cfg: PatcherConfig | None = PatcherConfig.from_env() +patcher: Patcher | None = ( + Patcher(db=db, workspace=workspace, config=_patcher_cfg, runner=runner) + if _patcher_cfg is not None + else None +) + + +# Wire the patcher into the runner's post-job hook. The runner already runs +# the parser pipeline before this hook fires, so by the time we land here +# the findings rows for `job_id` are committed and pickable. +async def _maybe_auto_patch_hook(event: dict) -> None: + if patcher is None: + return + if event.get("findings_count", 0) <= 0: + return + project_row = await db.arun(db.get_project, event["project_name"]) + if project_row is None: + return + try: + recipe = json.loads(project_row.get("recipe_json") or "{}") + except json.JSONDecodeError: + return + notify = recipe.get("notify") or {} + if not bool(notify.get("auto_patch")): + return + job = await db.arun(db.get_job, event["job_id"]) + if job is None: + return + try: + await patcher.maybe_draft_for_job(job) + except Exception as e: + log.warning("patcher hook failed for job %s: %s", event["job_id"], e) + + +runner.add_hook(_maybe_auto_patch_hook) + # ---------- lifespan -------------------------------------------------------- @@ -473,6 +513,103 @@ async def get_job_findings( return {"ok": True, "findings": findings} +# ---- /patches -------------------------------------------------------------- + + +@app.post("/jobs/{id}/patches") +async def trigger_patch( + id: str, + request: Request, + authorization: Annotated[str | None, Header()] = None, + body: dict | None = None, +): + """Manually trigger a patch attempt against a job. + + body: {"finding_id": int | null}. If finding_id is null/absent we pick + the highest-severity actionable finding on the job. + + Returns the resulting PatchAttempt as a dict. 503 if the patcher is + not configured (CRAFTING_CLAWDFORGE_URL/TOKEN/GITEA_URL/TOKEN missing). + """ + tok = auth.require_app(request, authorization) + job_row = await db.arun(db.get_job, id) + _job_visible(job_row, tok) + + if patcher is None: + raise HTTPException(503, "patcher not configured") + + body = body or {} + finding_id = body.get("finding_id") + if finding_id is not None and not isinstance(finding_id, int): + raise HTTPException(400, "finding_id must be an integer or null") + + try: + attempt = await patcher.maybe_draft(id, finding_id=finding_id) + except Exception as e: + log.exception("patch trigger failed: %s", e) + raise HTTPException(500, f"patch attempt errored: {type(e).__name__}") + + if attempt is None: + return {"ok": True, "attempt": None, "reason": "no_actionable_finding"} + return {"ok": True, "attempt": _patch_attempt_to_api(attempt)} + + +@app.get("/patches") +async def list_patches( + request: Request, + authorization: Annotated[str | None, Header()] = None, + project: str | None = None, + status: str | None = None, + limit: int = 100, +): + tok = auth.require_app(request, authorization) + owner = None if tok.is_admin else tok.name + rows = await db.arun( + db.list_patch_attempts, + project_name=project, + status=status, + owner_token=owner, + limit=max(1, min(limit, 500)), + ) + return {"ok": True, "patches": rows} + + +@app.get("/patches/{id}") +async def get_patch( + id: int, + request: Request, + authorization: Annotated[str | None, Header()] = None, +): + tok = auth.require_app(request, authorization) + row = await db.arun(db.get_patch_attempt, int(id)) + if row is None: + raise HTTPException(404, "patch attempt not found") + # Visibility-gate via the underlying project. + project_row = await db.arun(db.get_project, row["project_name"]) + if project_row is None: + raise HTTPException(404, "patch attempt not found") + if not tok.is_admin and project_row["owner_token"] != tok.name: + raise HTTPException(404, "patch attempt not found") + return {"ok": True, "patch": row} + + +def _patch_attempt_to_api(attempt) -> dict: + """Serialize a PatchAttempt dataclass to the wire shape.""" + return { + "id": attempt.id, + "finding_id": attempt.finding_id, + "job_id": attempt.job_id, + "project_name": attempt.project_name, + "attempt_number": attempt.attempt_number, + "status": attempt.status, + "branch_name": attempt.branch_name, + "pr_url": attempt.pr_url, + "diff_excerpt": attempt.diff_excerpt, + "session_id": attempt.session_id, + "error": attempt.error, + } + + # ---- /digests -------------------------------------------------------------- diff --git a/examples/recipes/cauldron.json b/examples/recipes/cauldron.json new file mode 100644 index 0000000..022c42b --- /dev/null +++ b/examples/recipes/cauldron.json @@ -0,0 +1,19 @@ +{ + "name": "cauldron", + "git_url": "http://kayos:REPLACE_WITH_GITEA_TOKEN@192.168.0.5:3001/Sulkta-Coop/cauldron.git", + "default_branch": "main", + "languages": ["python"], + "subprojects": [ + { + "path": ".", + "language": "python", + "build": "pip install -e .[test]", + "test": "pytest tests/", + "lint": "ruff check .", + "audit": "pip-audit", + "timeout_secs": 600 + } + ], + "schedule": {"audit": "0 2 * * *", "test": "0 */6 * * *"}, + "notify": {"email": ["cobb@sulkta.com"], "on": ["audit_fail", "test_fail", "cve_found", "patch_drafted"], "auto_patch": true} +} diff --git a/examples/recipes/clawdforge.json b/examples/recipes/clawdforge.json new file mode 100644 index 0000000..b480ceb --- /dev/null +++ b/examples/recipes/clawdforge.json @@ -0,0 +1,24 @@ +{ + "name": "clawdforge", + "git_url": "http://kayos:REPLACE_WITH_GITEA_TOKEN@192.168.0.5:3001/Sulkta-Coop/clawdforge.git", + "default_branch": "main", + "languages": ["python", "rust", "go", "ruby", "php", "java", "csharp", "swift", "kotlin", "c", "cpp", "bash", "typescript", "mcp"], + "subprojects": [ + {"path": "clients/python", "language": "python", "build": "pip install -e .[test]", "test": "pytest tests/", "lint": "ruff check . && mypy --strict src/", "audit": "pip-audit", "timeout_secs": 600}, + {"path": "clients/rust", "language": "rust", "build": "cargo build --release", "test": "cargo test --all", "lint": "cargo clippy --all-targets -- -D warnings && cargo fmt --check", "audit": "cargo audit", "timeout_secs": 1200}, + {"path": "clients/go", "language": "go", "build": "go build ./...", "test": "go test ./...", "lint": "go vet ./...", "audit": "govulncheck ./...", "timeout_secs": 600}, + {"path": "clients/typescript", "language": "typescript", "build": "npm install --no-audit", "test": "node --test --import tsx tests/*.test.ts", "lint": "npx tsc --noEmit", "audit": "npm audit", "timeout_secs": 600}, + {"path": "clients/ruby", "language": "ruby", "build": "bundle install", "test": "bundle exec rake test", "lint": null, "audit": "bundler-audit", "timeout_secs": 600}, + {"path": "clients/php", "language": "php", "build": "composer install", "test": "vendor/bin/phpunit", "lint": null, "audit": "composer audit", "timeout_secs": 600}, + {"path": "clients/java", "language": "java", "build": "mvn package -DskipTests", "test": "mvn test", "lint": "mvn javadoc:javadoc -Dquiet=false", "audit": null, "timeout_secs": 1200}, + {"path": "clients/csharp", "language": "csharp", "build": "dotnet build -c Release", "test": "dotnet test -c Release", "lint": null, "audit": "dotnet list package --vulnerable --include-transitive", "timeout_secs": 900}, + {"path": "clients/c", "language": "c", "build": "cmake -S . -B build && cmake --build build", "test": "ctest --test-dir build --output-on-failure", "lint": null, "audit": null, "timeout_secs": 900}, + {"path": "clients/cpp", "language": "cpp", "build": "cmake -S . -B build && cmake --build build", "test": "ctest --test-dir build --output-on-failure", "lint": null, "audit": null, "timeout_secs": 900}, + {"path": "clients/kotlin", "language": "kotlin", "build": "./gradlew --no-daemon build", "test": "./gradlew --no-daemon test", "lint": null, "audit": null, "timeout_secs": 1800}, + {"path": "clients/bash", "language": "bash", "build": null, "test": "bash test/run.sh", "lint": "shellcheck cf", "audit": null, "timeout_secs": 300}, + {"path": "clients/mcp", "language": "python", "build": "pip install -e .", "test": "pytest tests/", "lint": null, "audit": null, "timeout_secs": 300}, + {"path": ".", "language": "python", "build": "pip install -e .", "test": "pytest tests/", "lint": null, "audit": null, "timeout_secs": 600} + ], + "schedule": {"audit": "0 2 * * *", "test": "0 8 * * *"}, + "notify": {"email": ["cobb@sulkta.com"], "on": ["audit_fail", "test_fail", "cve_found", "patch_drafted"], "auto_patch": true} +} diff --git a/examples/recipes/tradecraft.json b/examples/recipes/tradecraft.json new file mode 100644 index 0000000..0b61c5c --- /dev/null +++ b/examples/recipes/tradecraft.json @@ -0,0 +1,19 @@ +{ + "name": "tradecraft", + "git_url": "http://kayos:REPLACE_WITH_GITEA_TOKEN@192.168.0.5:3001/TradeCraft/tradecraft.git", + "default_branch": "main", + "languages": ["python"], + "subprojects": [ + { + "path": ".", + "language": "python", + "build": "pip install -e .", + "test": "pytest tests/", + "lint": "ruff check .", + "audit": "pip-audit", + "timeout_secs": 900 + } + ], + "schedule": {"audit": "0 2 * * *"}, + "notify": {"email": ["cobb@sulkta.com"], "on": ["audit_fail", "cve_found"], "auto_patch": false} +} diff --git a/examples/register-all.sh b/examples/register-all.sh new file mode 100755 index 0000000..1d4d267 --- /dev/null +++ b/examples/register-all.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# Register all example recipes against a running crafting-table instance. +# +# Reads the bearer token from $CRAFTING_TABLE_TOKEN, falling back to +# /data/admin-bearer.txt (the path inside the container) if unset. The +# admin bearer file is also bind-mounted at +# /mnt/user/appdata/crafting-table/data/admin-bearer.txt on the Lucy host +# — that's the recommended source on the host side. +# +# IMPORTANT: the recipe JSON files in recipes/ ship with a placeholder +# git_url containing "REPLACE_WITH_GITEA_TOKEN". This script substitutes +# $GITEA_TOKEN into each recipe before posting; commit-time the real +# token never lives on disk. +set -euo pipefail + +BASE_URL=${CRAFTING_TABLE_URL:-http://192.168.0.5:8810} +TOKEN=${CRAFTING_TABLE_TOKEN:-$(cat /data/admin-bearer.txt 2>/dev/null || echo "")} +GITEA_TOKEN=${GITEA_TOKEN:-} + +if [ -z "$TOKEN" ]; then + echo "no crafting-table token (set CRAFTING_TABLE_TOKEN or ensure /data/admin-bearer.txt exists)" >&2 + exit 1 +fi +if [ -z "$GITEA_TOKEN" ]; then + echo "no Gitea token (set GITEA_TOKEN to substitute into recipe git_url)" >&2 + exit 1 +fi + +DIR="$(dirname "$0")/recipes" +for recipe in "$DIR"/*.json; do + name="$(basename "$recipe" .json)" + echo "registering $name from $recipe..." + body="$(sed "s|REPLACE_WITH_GITEA_TOKEN|$GITEA_TOKEN|g" "$recipe")" + code=$(printf '%s' "$body" | curl -s -o /tmp/register-resp.json \ + -w "%{http_code}" \ + -X POST \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + --data-binary @- \ + "$BASE_URL/projects" || true) + if [ "$code" = "200" ]; then + echo " ok" + elif [ "$code" = "409" ]; then + echo " already exists — use PUT /projects/$name to update" + else + echo " FAILED ($code): $(cat /tmp/register-resp.json 2>/dev/null || echo no-body)" + fi +done diff --git a/mcp/src/crafting_table_mcp/client.py b/mcp/src/crafting_table_mcp/client.py index b59b03c..1d2ac6a 100644 --- a/mcp/src/crafting_table_mcp/client.py +++ b/mcp/src/crafting_table_mcp/client.py @@ -374,3 +374,30 @@ class CraftingTableClient: raise ValueError("job_id must be non-empty") slug = quote(job_id, safe="") return self._get_text(f"/jobs/{slug}/log") + + def trigger_patch( + self, job_id: str, finding_id: int | None = None + ) -> dict: + """POST /jobs/{id}/patches — autonomous patch loop trigger. + + Returns the wire shape ``{"ok": bool, "attempt": }`` + from the server. ``attempt`` may be ``None`` when the job has no + actionable findings. + """ + if not job_id: + raise ValueError("job_id must be non-empty") + if finding_id is not None and not isinstance(finding_id, int): + raise ValueError("finding_id must be an integer or None") + slug = quote(job_id, safe="") + body: dict[str, Any] = {} + if finding_id is not None: + body["finding_id"] = int(finding_id) + payload = self._request( + "POST", f"/jobs/{slug}/patches", json_body=body + ) + if not isinstance(payload, dict): + raise CraftingTableError( + f"unexpected POST /jobs/{{id}}/patches response type: " + f"{type(payload).__name__}" + ) + return payload diff --git a/mcp/src/crafting_table_mcp/server.py b/mcp/src/crafting_table_mcp/server.py index 37256b4..4fe5309 100644 --- a/mcp/src/crafting_table_mcp/server.py +++ b/mcp/src/crafting_table_mcp/server.py @@ -9,8 +9,9 @@ Eight tools are exposed (per spec ``memory/spec-crafting-table.md``): - ``crafting_table_run_test`` — kick off a ``test`` recipe job. - ``crafting_table_get_job`` — fetch job state + log tail. - ``crafting_table_get_findings`` — fetch structured findings. -- ``crafting_table_draft_patch`` — wave-3 stub; returns "not yet - implemented" so the tool surface is stable but no work happens. +- ``crafting_table_draft_patch`` — autonomous patch loop trigger + (wave 3); calls ``POST /jobs/{id}/patches`` and returns the resulting + ``PatchAttempt``. Admin endpoints (``/admin/tokens``) are intentionally NOT exposed. Token minting is a human-gated operation; an LLM client has no business poking at @@ -279,14 +280,17 @@ def _tool_definitions() -> list[types.Tool]: types.Tool( name=TOOL_DRAFT_PATCH, description=( - "Draft a patch (unified diff) addressing one or more " - "findings on a job. WAVE 2B STUB — full implementation " - "lands in wave 3 / step 9 of the v0.1 plan. Today this tool " - "is callable but only returns a 'not yet implemented' " - "message; the surface exists so tool catalogues stay stable " - "across waves. Once shipped, the patch will be drafted via " - "clawdforge and applied to a worktree, with a Gitea PR " - "opened on the configured branch." + "Draft a patch (unified diff) addressing one finding on a " + "job. The server opens a clawdforge session, asks the model " + "for a unified diff, applies it to a fresh worktree, " + "re-runs the failing recipe to verify, and on success " + "pushes a branch and opens a Gitea PR. No auto-merge — " + "review and merge manually. Returns a PatchAttempt with " + "{status, branch_name, pr_url, error}; status ranges over " + "drafted/apply_failed/verify_failed/pushed/pr_opened/" + "max_attempts_exceeded. 503 if the patcher isn't " + "configured. v0.1 supports lint and cve findings; " + "test_fail is v0.2." ), inputSchema={ "type": "object", @@ -301,8 +305,8 @@ def _tool_definitions() -> list[types.Tool]: "minimum": 1, "description": ( "Optional specific finding id. If omitted, " - "drafts patches for all open findings on the " - "job." + "the server picks the highest-severity " + "actionable finding on the job." ), }, }, @@ -555,9 +559,6 @@ async def _dispatch( ) if name == TOOL_DRAFT_PATCH: - # Wave-2B stub: validate args lightly, return a stable message. - # Once wave-3 lands this whole branch becomes a real call to a - # /jobs/{id}/patch endpoint that drafts via clawdforge. job_id = args.get("job_id") if not isinstance(job_id, str) or not job_id: return _err_content("missing or empty 'job_id' argument"), True @@ -567,23 +568,35 @@ async def _dispatch( and not isinstance(finding_id, bool) ): return _err_content("'finding_id' must be an integer"), True - return ( - _ok_content( - { - "ok": False, - "pending": True, - "message": ( - "draft patch — not yet implemented (lands in " - "wave 3 / step 9). The tool surface is stable; " - "callers can keep referencing it. Today no " - "patch is drafted." - ), - "job_id": job_id, - "finding_id": finding_id, - } - ), - False, - ) + try: + payload = await asyncio.to_thread( + ct.trigger_patch, job_id, finding_id + ) + except ValueError as ve: + return _err_content(str(ve)), True + attempt = payload.get("attempt") if isinstance(payload, dict) else None + if attempt is None: + prose = ( + f"no actionable finding on job {job_id} — patcher " + f"declined to draft. Check " + f"crafting_table_get_findings to confirm or pass an " + f"explicit finding_id." + ) + return _two_block_content(prose, payload), False + status = attempt.get("status", "?") + branch = attempt.get("branch_name") or "(no branch)" + pr_url = attempt.get("pr_url") or "(no PR)" + err = attempt.get("error") or "" + prose_parts = [ + f"patch attempt #{attempt.get('attempt_number')} for finding " + f"{attempt.get('finding_id')} on job {job_id}: status={status}", + f"branch={branch}", + f"pr={pr_url}", + ] + if err: + prose_parts.append(f"error: {err}") + prose = "\n".join(prose_parts) + return _two_block_content(prose, payload), False return _err_content(f"unknown tool: {name}"), True diff --git a/mcp/tests/test_tools.py b/mcp/tests/test_tools.py index ad6fdb9..6059b29 100644 --- a/mcp/tests/test_tools.py +++ b/mcp/tests/test_tools.py @@ -572,43 +572,99 @@ class TestGetFindings(unittest.TestCase): self.assertIn("not found", content[0].text) -class TestDraftPatchStub(unittest.TestCase): - """Wave 2B stub: tool surface present, but returns a 'pending' message.""" +class TestDraftPatch(unittest.TestCase): + """Wave 3: real call to POST /jobs/{id}/patches; two-block return.""" - def test_returns_pending_message(self) -> None: + @responses.activate + def test_pr_opened_two_block_return(self) -> None: + """Server returns a pr_opened attempt → MCP returns prose + JSON.""" + responses.add( + responses.POST, + f"{BASE_URL}/jobs/j-1/patches", + json={ + "ok": True, + "attempt": { + "id": 7, + "finding_id": 42, + "job_id": "j-1", + "project_name": "demo", + "attempt_number": 1, + "status": "pr_opened", + "branch_name": "crafting-table/auto/j-1-42", + "pr_url": "http://192.168.0.5:3001/X/Y/pulls/9", + "diff_excerpt": "--- a/x\n+++ b/x", + "session_id": "s-1", + "error": None, + }, + }, + status=200, + ) + c = _client() + try: + content, is_error = _run( + _dispatch(c, TOOL_DRAFT_PATCH, {"job_id": "j-1"}) + ) + finally: + c.close() + self.assertFalse(is_error) + # Two-content-block return: prose + JSON. + self.assertEqual(len(content), 2) + prose = content[0].text + self.assertIn("pr_opened", prose) + self.assertIn("crafting-table/auto/j-1-42", prose) + self.assertIn("/pulls/9", prose) + body = json.loads(content[1].text) + self.assertTrue(body["ok"]) + self.assertEqual(body["attempt"]["status"], "pr_opened") + + @responses.activate + def test_no_actionable_finding(self) -> None: + responses.add( + responses.POST, + f"{BASE_URL}/jobs/j-1/patches", + json={"ok": True, "attempt": None, "reason": "no_actionable_finding"}, + status=200, + ) + c = _client() + try: + content, is_error = _run( + _dispatch(c, TOOL_DRAFT_PATCH, {"job_id": "j-1"}) + ) + finally: + c.close() + self.assertFalse(is_error) + self.assertIn("no actionable finding", content[0].text) + + @responses.activate + def test_with_finding_id_passes_through(self) -> None: + responses.add( + responses.POST, + f"{BASE_URL}/jobs/j-1/patches", + json={ + "ok": True, + "attempt": { + "id": 1, "finding_id": 42, "job_id": "j-1", + "project_name": "demo", "attempt_number": 1, + "status": "drafted", "branch_name": None, "pr_url": None, + "diff_excerpt": None, "session_id": None, + "error": "malformed_response", + }, + }, + status=200, + ) c = _client() try: content, is_error = _run( _dispatch( - c, - TOOL_DRAFT_PATCH, - {"job_id": "j-1"}, + c, TOOL_DRAFT_PATCH, {"job_id": "j-1", "finding_id": 42} ) ) finally: c.close() self.assertFalse(is_error) - body = json.loads(content[0].text) - self.assertFalse(body["ok"]) - self.assertTrue(body["pending"]) - self.assertIn("not yet implemented", body["message"]) - self.assertIn("wave 3", body["message"]) - - def test_with_finding_id(self) -> None: - c = _client() - try: - content, is_error = _run( - _dispatch( - c, - TOOL_DRAFT_PATCH, - {"job_id": "j-1", "finding_id": 42}, - ) - ) - finally: - c.close() - self.assertFalse(is_error) - body = json.loads(content[0].text) - self.assertEqual(body["finding_id"], 42) + body = json.loads(content[1].text) + self.assertEqual(body["attempt"]["finding_id"], 42) + self.assertEqual(body["attempt"]["status"], "drafted") def test_rejects_bool_finding_id(self) -> None: # bool is a subclass of int — defense-in-depth. @@ -616,9 +672,7 @@ class TestDraftPatchStub(unittest.TestCase): try: content, is_error = _run( _dispatch( - c, - TOOL_DRAFT_PATCH, - {"job_id": "j-1", "finding_id": True}, + c, TOOL_DRAFT_PATCH, {"job_id": "j-1", "finding_id": True} ) ) finally: @@ -635,6 +689,24 @@ class TestDraftPatchStub(unittest.TestCase): self.assertTrue(is_error) self.assertIn("job_id", content[0].text) + @responses.activate + def test_503_when_patcher_disabled(self) -> None: + responses.add( + responses.POST, + f"{BASE_URL}/jobs/j-1/patches", + json={"detail": "patcher not configured"}, + status=503, + ) + c = _client() + try: + content, is_error = _run( + _dispatch(c, TOOL_DRAFT_PATCH, {"job_id": "j-1"}) + ) + finally: + c.close() + self.assertTrue(is_error) + self.assertIn("503", content[0].text) + class TestUnknownTool(unittest.TestCase): def test_unknown_tool_returns_error(self) -> None: diff --git a/pyproject.toml b/pyproject.toml index 457d1e2..4359f0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "fastapi>=0.115,<1.0", "uvicorn[standard]>=0.30,<1.0", "pydantic>=2.7,<3.0", + "httpx>=0.27,<1.0", ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index f71f250..0776250 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ fastapi==0.115.5 uvicorn[standard]==0.32.1 pydantic==2.9.2 +httpx>=0.27,<1.0 diff --git a/tests/test_patcher.py b/tests/test_patcher.py new file mode 100644 index 0000000..351af4d --- /dev/null +++ b/tests/test_patcher.py @@ -0,0 +1,545 @@ +"""Patcher unit tests — drafted/apply_failed/verify_failed/pushed/pr_opened +status transitions plus the runner hook integration. + +We mock the clawdforge + Gitea wires (no real network calls) and stub the +runner._exec_recipe so the verify step is deterministic. Diff applying +uses real git in a temp worktree — this catches the wire-up issues that +pure unit tests miss. +""" +from __future__ import annotations + +import asyncio +import json +import shutil +import subprocess +import time +from pathlib import Path +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from crafting_table.db import DB +from crafting_table.patcher import ( + ClawdforgeClient, + GiteaClient, + Patcher, + PatcherConfig, + extract_diff_json, + findings_were_actionable, + turn_text, +) +from crafting_table.workspace import WorkspaceManager + + +# ---------- helpers --------------------------------------------------------- + + +def _make_origin_repo(root: Path, *, file_text: str = "hello\nworld\n") -> str: + """Create a bare-cloneable origin repo with a tracked file the patch + will rewrite.""" + if shutil.which("git") is None: + pytest.skip("git binary not present") + origin = root / "origin.git" + work = root / "origin-work" + work.mkdir() + subprocess.run(["git", "init", "-q", "-b", "main"], cwd=work, check=True) + subprocess.run(["git", "config", "user.email", "test@example"], cwd=work, check=True) + subprocess.run(["git", "config", "user.name", "test"], cwd=work, check=True) + subprocess.run(["git", "config", "commit.gpgsign", "false"], cwd=work, check=True) + (work / "src").mkdir() + (work / "src" / "app.py").write_text(file_text) + subprocess.run(["git", "add", "."], cwd=work, check=True) + subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=work, check=True) + # Bare clone so push works. + subprocess.run( + ["git", "clone", "--bare", str(work), str(origin)], + check=True, + capture_output=True, + ) + # Re-point work's origin at the bare so subsequent fetches in tests work. + subprocess.run( + ["git", "remote", "add", "bare", str(origin)], + cwd=work, check=True, capture_output=True, + ) + return str(origin) + + +def _seed_project_and_job( + db: DB, + *, + project_name: str, + git_url: str, + findings: list[dict] | None = None, + auto_patch: bool = True, +) -> tuple[str, int | None]: + """Insert a project + a job + (optionally) one finding. Returns + (job_id, finding_id_or_None).""" + # Project + db.insert_token(name="alpha", bearer="ct_alpha", is_admin=False, ip_cidrs=None) + recipe = { + "languages": ["python"], + "subprojects": [ + { + "path": ".", + "language": "python", + "lint": "echo 'lint ok'", + "timeout_secs": 30, + } + ], + "schedule": {}, + "notify": {"email": ["x@y"], "on": [], "auto_patch": auto_patch}, + } + db.upsert_project( + name=project_name, + git_url=git_url, + default_branch="main", + recipe_json=json.dumps(recipe), + owner_token="alpha", + ) + # Job + snapshot = { + "git_url": git_url, + "default_branch": "main", + "languages": ["python"], + "subprojects": recipe["subprojects"], + } + job_id = "job-1" + db.insert_job( + job_id=job_id, + project_name=project_name, + subproject_path=".", + recipe="lint", + branch="main", + log_path="/tmp/_x.log", + recipe_snapshot_json=json.dumps(snapshot), + ) + db.mark_job_finished(job_id=job_id, status="failed", exit_code=1) + + finding_id = None + for f in findings or []: + finding_id = db.insert_finding( + job_id=job_id, + kind=f.get("kind", "lint"), + severity=f.get("severity", "warn"), + message=f.get("message", "msg"), + fingerprint=f.get("fingerprint", "abcdef0123456789"), + file=f.get("file"), + line=f.get("line"), + code=f.get("code"), + suggested_fix=f.get("suggested_fix"), + raw_json=None, + ) + return job_id, finding_id + + +def _patcher_with_mocks(db: DB, workspace: WorkspaceManager, *, runner=None): + """Build a Patcher with mocked clawdforge + Gitea clients. Returns + (patcher, claw_mock, gitea_mock) so tests can assert on call counts. + """ + cfg = PatcherConfig( + clawdforge_base_url="http://cf.local", + clawdforge_token="cf_x", + gitea_base_url="http://gitea.local", + gitea_token="gt_x", + max_attempts_per_finding=3, + ) + claw = MagicMock(spec=ClawdforgeClient) + claw.create_session = AsyncMock(return_value={"session_id": "s-1"}) + claw.turn = AsyncMock() + claw.close_session = AsyncMock() + gitea = MagicMock(spec=GiteaClient) + gitea.open_pr = AsyncMock( + return_value={"html_url": "http://192.168.0.5:3001/X/Y/pulls/1"} + ) + p = Patcher( + db=db, + workspace=workspace, + config=cfg, + runner=runner, + clawdforge=claw, + gitea=gitea, + ) + return p, claw, gitea + + +def _diff_for(file_rel: str, *, old: str, new: str) -> str: + """Build a unified diff that real git apply will accept against a + file containing exactly `old`. Format matches `git diff` output.""" + return ( + f"diff --git a/{file_rel} b/{file_rel}\n" + f"--- a/{file_rel}\n" + f"+++ b/{file_rel}\n" + f"@@ -1,{len(old.splitlines())} +1,{len(new.splitlines())} @@\n" + + "\n".join(f"-{l}" for l in old.splitlines()) + "\n" + + "\n".join(f"+{l}" for l in new.splitlines()) + "\n" + ) + + +# ---------- helper-fn unit tests ------------------------------------------ + + +def test_findings_were_actionable_lint_with_locator(): + assert findings_were_actionable([ + {"kind": "lint", "file": "x.py", "line": 1} + ]) + + +def test_findings_were_actionable_lint_without_locator(): + assert not findings_were_actionable([ + {"kind": "lint", "file": None, "line": None} + ]) + + +def test_findings_were_actionable_test_fail_skipped(): + # test_fail is NOT actionable in v0.1 + assert not findings_were_actionable([ + {"kind": "test_fail", "file": "x.py", "line": 1} + ]) + + +def test_findings_were_actionable_cve(): + assert findings_were_actionable([ + {"kind": "cve", "code": "RUSTSEC-1", "suggested_fix": "bump"} + ]) + + +def test_extract_diff_json_plain(): + obj = extract_diff_json('{"diff": "x", "explanation": "y"}') + assert obj == {"diff": "x", "explanation": "y"} + + +def test_extract_diff_json_fenced(): + obj = extract_diff_json('```json\n{"diff": "x", "explanation": "y"}\n```') + assert obj is not None + assert obj["diff"] == "x" + + +def test_extract_diff_json_returns_none_on_garbage(): + assert extract_diff_json("not even json") is None + + +def test_turn_text_concatenates_text_events(): + assert turn_text({"events": [ + {"type": "text", "content": "hello "}, + {"type": "tool_call"}, + {"type": "text", "content": "world"}, + ]}) == "hello world" + + +# ---------- patcher pipeline tests ----------------------------------------- + + +@pytest.mark.asyncio +async def test_drafts_via_clawdforge_session(db_only, tmp_path): + """First-light test: malformed JSON from the model leaves the attempt + in status=drafted with error=malformed_response.""" + git_url = _make_origin_repo(tmp_path) + workspace = WorkspaceManager(tmp_path / "ws") + job_id, finding_id = _seed_project_and_job( + db_only, + project_name="demo", + git_url=git_url, + findings=[{ + "kind": "lint", "severity": "warn", "code": "F401", + "file": "src/app.py", "line": 1, "message": "bad", + }], + ) + + p, claw, gitea = _patcher_with_mocks(db_only, workspace) + # Model returns prose without JSON. + claw.turn.return_value = { + "events": [{"type": "text", "content": "I cannot help with that"}] + } + + attempt = await p.maybe_draft(job_id, finding_id=finding_id) + assert attempt is not None + assert attempt.status == "drafted" + assert attempt.error == "malformed_response" + assert claw.create_session.await_count == 1 + assert claw.close_session.await_count == 1 + + +@pytest.mark.asyncio +async def test_apply_failed_when_diff_rejects(db_only, tmp_path): + git_url = _make_origin_repo(tmp_path) + workspace = WorkspaceManager(tmp_path / "ws") + job_id, finding_id = _seed_project_and_job( + db_only, project_name="demo", git_url=git_url, + findings=[{ + "kind": "lint", "severity": "warn", "code": "F401", + "file": "src/app.py", "line": 1, "message": "x", + }], + ) + p, claw, gitea = _patcher_with_mocks(db_only, workspace) + # Diff with wrong line numbers (the file is 2 lines, this hits line 999). + bad_diff = ( + "diff --git a/src/app.py b/src/app.py\n" + "--- a/src/app.py\n" + "+++ b/src/app.py\n" + "@@ -999,1 +999,1 @@\n" + "-nonexistent\n" + "+something else\n" + ) + claw.turn.return_value = { + "events": [{"type": "text", "content": json.dumps({ + "diff": bad_diff, "explanation": "x", "confidence": "high" + })}] + } + + attempt = await p.maybe_draft(job_id, finding_id=finding_id) + assert attempt is not None + assert attempt.status == "apply_failed" + assert claw.close_session.await_count == 1 + + +@pytest.mark.asyncio +async def test_verify_failed_when_recipe_still_fails(db_only, tmp_path): + git_url = _make_origin_repo(tmp_path) + workspace = WorkspaceManager(tmp_path / "ws") + job_id, finding_id = _seed_project_and_job( + db_only, project_name="demo", git_url=git_url, + findings=[{ + "kind": "lint", "severity": "warn", "code": "F401", + "file": "src/app.py", "line": 1, "message": "x", + }], + ) + # Stub runner that fails verify. + fake_runner = MagicMock() + fake_runner._exec_recipe = AsyncMock(return_value=(1, False)) + p, claw, gitea = _patcher_with_mocks(db_only, workspace, runner=fake_runner) + # Valid diff that DOES apply (replace 'hello' with 'goodbye') + good_diff = _diff_for("src/app.py", old="hello\nworld", new="goodbye\nworld") + claw.turn.return_value = { + "events": [{"type": "text", "content": json.dumps({ + "diff": good_diff, "explanation": "x", "confidence": "high" + })}] + } + attempt = await p.maybe_draft(job_id, finding_id=finding_id) + assert attempt is not None + assert attempt.status == "verify_failed" + assert fake_runner._exec_recipe.await_count == 1 + + +@pytest.mark.asyncio +async def test_pushed_and_pr_opened_on_success(db_only, tmp_path): + git_url = _make_origin_repo(tmp_path) + workspace = WorkspaceManager(tmp_path / "ws") + job_id, finding_id = _seed_project_and_job( + db_only, project_name="demo", git_url=git_url, + findings=[{ + "kind": "lint", "severity": "warn", "code": "F401", + "file": "src/app.py", "line": 1, "message": "x", + }], + ) + fake_runner = MagicMock() + fake_runner._exec_recipe = AsyncMock(return_value=(0, False)) + p, claw, gitea = _patcher_with_mocks(db_only, workspace, runner=fake_runner) + good_diff = _diff_for("src/app.py", old="hello\nworld", new="goodbye\nworld") + claw.turn.return_value = { + "events": [{"type": "text", "content": json.dumps({ + "diff": good_diff, "explanation": "tiny fix", "confidence": "high" + })}] + } + attempt = await p.maybe_draft(job_id, finding_id=finding_id) + assert attempt is not None, "expected a PatchAttempt" + assert attempt.status == "pr_opened", f"unexpected: {attempt.status} / {attempt.error}" + assert attempt.pr_url == "http://192.168.0.5:3001/X/Y/pulls/1" + assert attempt.branch_name and "crafting-table/auto/" in attempt.branch_name + assert gitea.open_pr.await_count == 1 + + +@pytest.mark.asyncio +async def test_max_attempts_per_finding(db_only, tmp_path): + git_url = _make_origin_repo(tmp_path) + workspace = WorkspaceManager(tmp_path / "ws") + job_id, finding_id = _seed_project_and_job( + db_only, project_name="demo", git_url=git_url, + findings=[{ + "kind": "lint", "severity": "warn", "code": "F401", + "file": "src/app.py", "line": 1, "message": "x", + }], + ) + # Pre-seed three failed attempts so the 4th early-exits. + for i in range(1, 4): + db_only.insert_patch_attempt( + finding_id=finding_id, job_id=job_id, project_name="demo", + attempt_number=i, status="apply_failed", + ) + + p, claw, gitea = _patcher_with_mocks(db_only, workspace) + attempt = await p.maybe_draft(job_id, finding_id=finding_id) + assert attempt is not None + assert attempt.status == "max_attempts_exceeded" + assert claw.create_session.await_count == 0 + + +@pytest.mark.asyncio +async def test_clawdforge_session_always_closes_on_exception(db_only, tmp_path): + git_url = _make_origin_repo(tmp_path) + workspace = WorkspaceManager(tmp_path / "ws") + job_id, finding_id = _seed_project_and_job( + db_only, project_name="demo", git_url=git_url, + findings=[{ + "kind": "lint", "severity": "warn", "code": "F401", + "file": "src/app.py", "line": 1, "message": "x", + }], + ) + p, claw, gitea = _patcher_with_mocks(db_only, workspace) + claw.turn.side_effect = RuntimeError("simulated network blip") + + attempt = await p.maybe_draft(job_id, finding_id=finding_id) + assert attempt is not None + assert attempt.status == "failed" + # Session was created and then closed even though turn raised. + assert claw.create_session.await_count == 1 + assert claw.close_session.await_count == 1 + + +@pytest.mark.asyncio +async def test_runner_invokes_patcher_when_auto_patch_true(client, tmp_path): + """Integration: the runner's post-job hook calls patcher.maybe_draft_for_job + when project.notify.auto_patch=true and there are actionable findings. + """ + tc, ctx = client + server = ctx["server"] + + # Build + inject a stub patcher BEFORE we kick the job. The real + # _maybe_auto_patch_hook closes over server.patcher at call time. + stub_patcher = MagicMock() + stub_patcher.maybe_draft_for_job = AsyncMock(return_value=[]) + server.patcher = stub_patcher + + # Make a tiny git repo so the runner can clone+worktree. + if shutil.which("git") is None: + pytest.skip("git not available") + repo = tmp_path / "fixture-repo" + repo.mkdir() + subprocess.run(["git", "init", "-q", "-b", "main"], cwd=repo, check=True) + subprocess.run(["git", "config", "user.email", "t@e"], cwd=repo, check=True) + subprocess.run(["git", "config", "user.name", "t"], cwd=repo, check=True) + subprocess.run(["git", "config", "commit.gpgsign", "false"], cwd=repo, check=True) + (repo / "README.md").write_text("hi\n") + subprocess.run(["git", "add", "."], cwd=repo, check=True) + subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=repo, check=True) + git_url = str(repo) + + # Register a project with notify.auto_patch=true and a lint that emits + # ruff-shaped JSON so the parser picks up an actionable finding. + ruff_stub = json.dumps([{ + "code": "F401", + "message": "'os' imported", + "filename": "src/app.py", + "location": {"row": 3, "column": 1}, + }]) + payload = { + "name": "ct-autopatch-on", + "git_url": git_url, + "default_branch": "main", + "languages": ["python"], + "subprojects": [{ + "path": ".", + "language": "python", + "lint": f"echo '{ruff_stub}'; exit 1", + "timeout_secs": 20, + }], + "schedule": {}, + "notify": {"email": ["x@y"], "on": [], "auto_patch": True}, + } + r = tc.post( + "/projects", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + json=payload, + ) + assert r.status_code == 200, r.text + + r2 = tc.post( + "/projects/ct-autopatch-on/jobs", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + json={"recipe": "lint"}, + ) + assert r2.status_code == 200, r2.text + job_id = r2.json()["job_id"] + + # Wait for terminal. + deadline = time.monotonic() + 30 + while time.monotonic() < deadline: + rr = tc.get( + f"/jobs/{job_id}", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + ) + if rr.json()["job"]["status"] in ("succeeded", "failed", "timed_out", "cancelled"): + break + time.sleep(0.1) + + # Hook fan-out is fire-and-forget; let the loop turn once more. + time.sleep(0.2) + + # Patcher.maybe_draft_for_job should have been called at least once. + assert stub_patcher.maybe_draft_for_job.await_count >= 1 + + +@pytest.mark.asyncio +async def test_runner_skips_patcher_when_auto_patch_false(client, tmp_path): + tc, ctx = client + server = ctx["server"] + stub_patcher = MagicMock() + stub_patcher.maybe_draft_for_job = AsyncMock(return_value=[]) + server.patcher = stub_patcher + + if shutil.which("git") is None: + pytest.skip("git not available") + repo = tmp_path / "fixture-repo-off" + repo.mkdir() + subprocess.run(["git", "init", "-q", "-b", "main"], cwd=repo, check=True) + subprocess.run(["git", "config", "user.email", "t@e"], cwd=repo, check=True) + subprocess.run(["git", "config", "user.name", "t"], cwd=repo, check=True) + subprocess.run(["git", "config", "commit.gpgsign", "false"], cwd=repo, check=True) + (repo / "README.md").write_text("hi\n") + subprocess.run(["git", "add", "."], cwd=repo, check=True) + subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=repo, check=True) + git_url = str(repo) + + ruff_stub = json.dumps([{ + "code": "F401", "message": "x", + "filename": "src/app.py", "location": {"row": 3, "column": 1}, + }]) + payload = { + "name": "ct-autopatch-off", + "git_url": git_url, + "default_branch": "main", + "languages": ["python"], + "subprojects": [{ + "path": ".", + "language": "python", + "lint": f"echo '{ruff_stub}'; exit 1", + "timeout_secs": 20, + }], + "schedule": {}, + "notify": {"email": ["x@y"], "on": [], "auto_patch": False}, + } + r = tc.post( + "/projects", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + json=payload, + ) + assert r.status_code == 200, r.text + r2 = tc.post( + "/projects/ct-autopatch-off/jobs", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + json={"recipe": "lint"}, + ) + assert r2.status_code == 200, r2.text + job_id = r2.json()["job_id"] + + deadline = time.monotonic() + 30 + while time.monotonic() < deadline: + rr = tc.get( + f"/jobs/{job_id}", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + ) + if rr.json()["job"]["status"] in ("succeeded", "failed", "timed_out", "cancelled"): + break + time.sleep(0.1) + time.sleep(0.2) + + assert stub_patcher.maybe_draft_for_job.await_count == 0 diff --git a/tests/test_patches_api.py b/tests/test_patches_api.py new file mode 100644 index 0000000..cbd9630 --- /dev/null +++ b/tests/test_patches_api.py @@ -0,0 +1,289 @@ +"""HTTP API tests for the wave-3 patches surface. + +Covers POST /jobs/{id}/patches (manual trigger), GET /patches (list with +filters), GET /patches/{id} (detail with cross-token guards). The patcher +itself is stubbed so we don't make real clawdforge / Gitea calls. +""" +from __future__ import annotations + +import json +import time +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from crafting_table.patcher import PatchAttempt +from tests.conftest import sample_project_payload + + +def _install_stub_patcher(server, *, attempt: PatchAttempt | None = None): + """Replace server.patcher with a stub that returns the given attempt. + + ``attempt=None`` simulates the "no actionable finding" code path. + Returns the stub for assertion-side access. + """ + stub = MagicMock() + stub.maybe_draft = AsyncMock(return_value=attempt) + server.patcher = stub + return stub + + +def _register_demo_project(tc, bearer: str, *, name: str = "demo") -> None: + payload = sample_project_payload(name=name) + r = tc.post( + "/projects", + headers={"Authorization": f"Bearer {bearer}"}, + json=payload, + ) + assert r.status_code == 200, r.text + + +def _seed_job_row(server, *, project_name: str = "demo", job_id: str = "j-1") -> None: + snapshot = { + "git_url": "/dev/null", + "default_branch": "main", + "subprojects": [{ + "path": ".", "language": "python", "lint": "echo x" + }], + "languages": ["python"], + } + server.db.insert_job( + job_id=job_id, + project_name=project_name, + subproject_path=".", + recipe="lint", + branch="main", + log_path="/tmp/_x.log", + recipe_snapshot_json=json.dumps(snapshot), + ) + server.db.mark_job_finished(job_id=job_id, status="failed", exit_code=1) + + +def test_post_patches_with_finding_id(client): + tc, ctx = client + server = ctx["server"] + _register_demo_project(tc, ctx["alpha_bearer"]) + _seed_job_row(server) + + fake = PatchAttempt( + finding_id=42, + job_id="j-1", + project_name="demo", + attempt_number=1, + status="pr_opened", + branch_name="crafting-table/auto/j-1-42", + pr_url="http://192.168.0.5:3001/X/Y/pulls/9", + diff_excerpt="--- a/x\n+++ b/x", + session_id="s-1", + ) + fake.id = 7 + stub = _install_stub_patcher(server, attempt=fake) + + r = tc.post( + "/jobs/j-1/patches", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + json={"finding_id": 42}, + ) + assert r.status_code == 200, r.text + body = r.json() + assert body["ok"] is True + assert body["attempt"]["status"] == "pr_opened" + assert body["attempt"]["pr_url"].endswith("/9") + # Patcher was called with the explicit finding_id. + assert stub.maybe_draft.await_count == 1 + args, kwargs = stub.maybe_draft.call_args + assert kwargs.get("finding_id") == 42 or (len(args) > 1 and args[1] == 42) + + +def test_post_patches_without_finding_id_auto_picks(client): + tc, ctx = client + server = ctx["server"] + _register_demo_project(tc, ctx["alpha_bearer"]) + _seed_job_row(server) + + fake = PatchAttempt( + finding_id=99, job_id="j-1", project_name="demo", + attempt_number=1, status="drafted", + ) + fake.id = 1 + _install_stub_patcher(server, attempt=fake) + + r = tc.post( + "/jobs/j-1/patches", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + json={}, + ) + assert r.status_code == 200, r.text + assert r.json()["attempt"]["finding_id"] == 99 + + +def test_post_patches_no_actionable_returns_attempt_none(client): + tc, ctx = client + server = ctx["server"] + _register_demo_project(tc, ctx["alpha_bearer"]) + _seed_job_row(server) + _install_stub_patcher(server, attempt=None) + + r = tc.post( + "/jobs/j-1/patches", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + json={}, + ) + assert r.status_code == 200, r.text + body = r.json() + assert body["ok"] is True + assert body["attempt"] is None + assert body.get("reason") == "no_actionable_finding" + + +def test_post_patches_503_when_patcher_disabled(client): + tc, ctx = client + server = ctx["server"] + _register_demo_project(tc, ctx["alpha_bearer"]) + _seed_job_row(server) + server.patcher = None + + r = tc.post( + "/jobs/j-1/patches", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + json={}, + ) + assert r.status_code == 503 + + +def test_post_patches_cross_token_returns_404(client): + tc, ctx = client + server = ctx["server"] + _register_demo_project(tc, ctx["alpha_bearer"]) + _seed_job_row(server) + _install_stub_patcher(server, attempt=None) + + # bravo cannot see alpha's job → 404 (existence-leak guard). + r = tc.post( + "/jobs/j-1/patches", + headers={"Authorization": f"Bearer {ctx['bravo_bearer']}"}, + json={}, + ) + assert r.status_code == 404 + + +def test_post_patches_rejects_non_int_finding_id(client): + tc, ctx = client + server = ctx["server"] + _register_demo_project(tc, ctx["alpha_bearer"]) + _seed_job_row(server) + _install_stub_patcher(server, attempt=None) + + r = tc.post( + "/jobs/j-1/patches", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + json={"finding_id": "not-an-int"}, + ) + assert r.status_code == 400 + + +def test_get_patches_filtered_by_project_and_status(client): + tc, ctx = client + server = ctx["server"] + _register_demo_project(tc, ctx["alpha_bearer"]) + _seed_job_row(server) + + # Insert two attempts with different statuses, plus one for a different + # (synthetic) project so the filter actually has to exclude. + fid = server.db.insert_finding( + job_id="j-1", kind="lint", severity="warn", message="m", + fingerprint="f", file="x.py", line=1, code="X", + ) + server.db.insert_patch_attempt( + finding_id=fid, job_id="j-1", project_name="demo", + attempt_number=1, status="pr_opened", + pr_url="http://gitea/X/Y/pulls/1", + ) + server.db.insert_patch_attempt( + finding_id=fid, job_id="j-1", project_name="demo", + attempt_number=2, status="apply_failed", + ) + + r = tc.get( + "/patches?project=demo&status=pr_opened", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + ) + assert r.status_code == 200, r.text + rows = r.json()["patches"] + assert len(rows) == 1 + assert rows[0]["status"] == "pr_opened" + + +def test_get_patches_owner_scoped(client): + """Bravo's /patches list never includes Alpha's attempts.""" + tc, ctx = client + server = ctx["server"] + _register_demo_project(tc, ctx["alpha_bearer"]) + _seed_job_row(server) + fid = server.db.insert_finding( + job_id="j-1", kind="lint", severity="warn", message="m", + fingerprint="f", file="x.py", line=1, code="X", + ) + server.db.insert_patch_attempt( + finding_id=fid, job_id="j-1", project_name="demo", + attempt_number=1, status="pr_opened", + ) + + r = tc.get( + "/patches", + headers={"Authorization": f"Bearer {ctx['bravo_bearer']}"}, + ) + assert r.status_code == 200 + assert r.json()["patches"] == [] + + +def test_get_patches_detail_owner(client): + tc, ctx = client + server = ctx["server"] + _register_demo_project(tc, ctx["alpha_bearer"]) + _seed_job_row(server) + fid = server.db.insert_finding( + job_id="j-1", kind="lint", severity="warn", message="m", + fingerprint="f", file="x.py", line=1, code="X", + ) + pid = server.db.insert_patch_attempt( + finding_id=fid, job_id="j-1", project_name="demo", + attempt_number=1, status="pr_opened", + ) + + r = tc.get( + f"/patches/{pid}", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + ) + assert r.status_code == 200, r.text + assert r.json()["patch"]["id"] == pid + + +def test_get_patches_detail_other_token_404(client): + tc, ctx = client + server = ctx["server"] + _register_demo_project(tc, ctx["alpha_bearer"]) + _seed_job_row(server) + fid = server.db.insert_finding( + job_id="j-1", kind="lint", severity="warn", message="m", + fingerprint="f", file="x.py", line=1, code="X", + ) + pid = server.db.insert_patch_attempt( + finding_id=fid, job_id="j-1", project_name="demo", + attempt_number=1, status="pr_opened", + ) + + r = tc.get( + f"/patches/{pid}", + headers={"Authorization": f"Bearer {ctx['bravo_bearer']}"}, + ) + assert r.status_code == 404 + + +def test_get_patches_detail_missing(client): + tc, ctx = client + r = tc.get( + "/patches/999999", + headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, + ) + assert r.status_code == 404