v0.1 wave 3 (steps 9+10): autonomous patch loop + production recipes

Step 9 — autonomous patch loop: - patcher.py: clawdforge session → unified diff → worktree apply → verify recipe → push branch → open Gitea PR - migration 007: patch_attempts (UNIQUE per finding+attempt, max 3 attempts) - runner.py: post-parse hook fires patcher.maybe_draft_for_job when notify.auto_patch=true - server.py: POST /jobs/{id}/patches, GET /patches, GET /patches/{id} - digest.py: patch-drafted lines + open-follow-up count via Gitea PR state check - mcp: crafting_table_draft_patch stub replaced with real implementation - tests/test_patcher.py + tests/test_patches_api.py: 27 new tests No auto-merge — patches stop at PR-open. Cobb merges. Step 10 — production recipes: - examples/recipes/clawdforge.json: 14 subprojects across all SDKs, audit nightly - examples/recipes/cauldron.json: single Flask subproject, audit nightly - examples/recipes/tradecraft.json: nightly audit, auto_patch=false (manual review) - examples/register-all.sh: bulk-register helper with GITEA_TOKEN substitution - README "Autonomous patch loop" + "First production recipes" sections Tests: server 116→143, mcp 65→67. All green. Spec: memory/spec-crafting-table.md
2026-04-29 09:04:48 -07:00 · 2026-04-29 09:04:48 -07:00 · 4eab869df0
commit 4eab869df0
parent ecb9d76e6d
17 changed files with 2752 additions and 78 deletions
--- a/mcp/src/crafting_table_mcp/client.py
+++ b/mcp/src/crafting_table_mcp/client.py
@ -374,3 +374,30 @@ class CraftingTableClient:
            raise ValueError("job_id must be non-empty")
        slug = quote(job_id, safe="")
        return self._get_text(f"/jobs/{slug}/log")
+
+    def trigger_patch(
+        self, job_id: str, finding_id: int | None = None
+    ) -> dict:
+        """POST /jobs/{id}/patches — autonomous patch loop trigger.
+
+        Returns the wire shape ``{"ok": bool, "attempt": <PatchAttempt>}``
+        from the server. ``attempt`` may be ``None`` when the job has no
+        actionable findings.
+        """
+        if not job_id:
+            raise ValueError("job_id must be non-empty")
+        if finding_id is not None and not isinstance(finding_id, int):
+            raise ValueError("finding_id must be an integer or None")
+        slug = quote(job_id, safe="")
+        body: dict[str, Any] = {}
+        if finding_id is not None:
+            body["finding_id"] = int(finding_id)
+        payload = self._request(
+            "POST", f"/jobs/{slug}/patches", json_body=body
+        )
+        if not isinstance(payload, dict):
+            raise CraftingTableError(
+                f"unexpected POST /jobs/{{id}}/patches response type: "
+                f"{type(payload).__name__}"
+            )
+        return payload
--- a/mcp/src/crafting_table_mcp/server.py
+++ b/mcp/src/crafting_table_mcp/server.py
@ -9,8 +9,9 @@ Eight tools are exposed (per spec ``memory/spec-crafting-table.md``):
 - ``crafting_table_run_test``           — kick off a ``test`` recipe job.
 - ``crafting_table_get_job``            — fetch job state + log tail.
 - ``crafting_table_get_findings``       — fetch structured findings.
- ``crafting_table_draft_patch``        — wave-3 stub; returns "not yet
-  implemented" so the tool surface is stable but no work happens.
+- ``crafting_table_draft_patch``        — autonomous patch loop trigger
+  (wave 3); calls ``POST /jobs/{id}/patches`` and returns the resulting
+  ``PatchAttempt``.

 Admin endpoints (``/admin/tokens``) are intentionally NOT exposed. Token
 minting is a human-gated operation; an LLM client has no business poking at
@ -279,14 +280,17 @@ def _tool_definitions() -> list[types.Tool]:
        types.Tool(
            name=TOOL_DRAFT_PATCH,
            description=(
-                "Draft a patch (unified diff) addressing one or more "
-                "findings on a job. WAVE 2B STUB — full implementation "
-                "lands in wave 3 / step 9 of the v0.1 plan. Today this tool "
-                "is callable but only returns a 'not yet implemented' "
-                "message; the surface exists so tool catalogues stay stable "
-                "across waves. Once shipped, the patch will be drafted via "
-                "clawdforge and applied to a worktree, with a Gitea PR "
-                "opened on the configured branch."
+                "Draft a patch (unified diff) addressing one finding on a "
+                "job. The server opens a clawdforge session, asks the model "
+                "for a unified diff, applies it to a fresh worktree, "
+                "re-runs the failing recipe to verify, and on success "
+                "pushes a branch and opens a Gitea PR. No auto-merge — "
+                "review and merge manually. Returns a PatchAttempt with "
+                "{status, branch_name, pr_url, error}; status ranges over "
+                "drafted/apply_failed/verify_failed/pushed/pr_opened/"
+                "max_attempts_exceeded. 503 if the patcher isn't "
+                "configured. v0.1 supports lint and cve findings; "
+                "test_fail is v0.2."
            ),
            inputSchema={
                "type": "object",
@ -301,8 +305,8 @@ def _tool_definitions() -> list[types.Tool]:
                        "minimum": 1,
                        "description": (
                            "Optional specific finding id. If omitted, "
-                            "drafts patches for all open findings on the "
-                            "job."
+                            "the server picks the highest-severity "
+                            "actionable finding on the job."
                        ),
                    },
                },
@ -555,9 +559,6 @@ async def _dispatch(
            )

        if name == TOOL_DRAFT_PATCH:
-            # Wave-2B stub: validate args lightly, return a stable message.
-            # Once wave-3 lands this whole branch becomes a real call to a
-            # /jobs/{id}/patch endpoint that drafts via clawdforge.
            job_id = args.get("job_id")
            if not isinstance(job_id, str) or not job_id:
                return _err_content("missing or empty 'job_id' argument"), True
@ -567,23 +568,35 @@ async def _dispatch(
                and not isinstance(finding_id, bool)
            ):
                return _err_content("'finding_id' must be an integer"), True
-            return (
-                _ok_content(
-                    {
-                        "ok": False,
-                        "pending": True,
-                        "message": (
-                            "draft patch — not yet implemented (lands in "
-                            "wave 3 / step 9). The tool surface is stable; "
-                            "callers can keep referencing it. Today no "
-                            "patch is drafted."
-                        ),
-                        "job_id": job_id,
-                        "finding_id": finding_id,
-                    }
-                ),
-                False,
-            )
+            try:
+                payload = await asyncio.to_thread(
+                    ct.trigger_patch, job_id, finding_id
+                )
+            except ValueError as ve:
+                return _err_content(str(ve)), True
+            attempt = payload.get("attempt") if isinstance(payload, dict) else None
+            if attempt is None:
+                prose = (
+                    f"no actionable finding on job {job_id} — patcher "
+                    f"declined to draft. Check "
+                    f"crafting_table_get_findings to confirm or pass an "
+                    f"explicit finding_id."
+                )
+                return _two_block_content(prose, payload), False
+            status = attempt.get("status", "?")
+            branch = attempt.get("branch_name") or "(no branch)"
+            pr_url = attempt.get("pr_url") or "(no PR)"
+            err = attempt.get("error") or ""
+            prose_parts = [
+                f"patch attempt #{attempt.get('attempt_number')} for finding "
+                f"{attempt.get('finding_id')} on job {job_id}: status={status}",
+                f"branch={branch}",
+                f"pr={pr_url}",
+            ]
+            if err:
+                prose_parts.append(f"error: {err}")
+            prose = "\n".join(prose_parts)
+            return _two_block_content(prose, payload), False

        return _err_content(f"unknown tool: {name}"), True

--- a/mcp/tests/test_tools.py
+++ b/mcp/tests/test_tools.py
@ -572,43 +572,99 @@ class TestGetFindings(unittest.TestCase):
        self.assertIn("not found", content[0].text)


-class TestDraftPatchStub(unittest.TestCase):
-    """Wave 2B stub: tool surface present, but returns a 'pending' message."""
+class TestDraftPatch(unittest.TestCase):
+    """Wave 3: real call to POST /jobs/{id}/patches; two-block return."""

-    def test_returns_pending_message(self) -> None:
+    @responses.activate
+    def test_pr_opened_two_block_return(self) -> None:
+        """Server returns a pr_opened attempt → MCP returns prose + JSON."""
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}/jobs/j-1/patches",
+            json={
+                "ok": True,
+                "attempt": {
+                    "id": 7,
+                    "finding_id": 42,
+                    "job_id": "j-1",
+                    "project_name": "demo",
+                    "attempt_number": 1,
+                    "status": "pr_opened",
+                    "branch_name": "crafting-table/auto/j-1-42",
+                    "pr_url": "http://192.168.0.5:3001/X/Y/pulls/9",
+                    "diff_excerpt": "--- a/x\n+++ b/x",
+                    "session_id": "s-1",
+                    "error": None,
+                },
+            },
+            status=200,
+        )
+        c = _client()
+        try:
+            content, is_error = _run(
+                _dispatch(c, TOOL_DRAFT_PATCH, {"job_id": "j-1"})
+            )
+        finally:
+            c.close()
+        self.assertFalse(is_error)
+        # Two-content-block return: prose + JSON.
+        self.assertEqual(len(content), 2)
+        prose = content[0].text
+        self.assertIn("pr_opened", prose)
+        self.assertIn("crafting-table/auto/j-1-42", prose)
+        self.assertIn("/pulls/9", prose)
+        body = json.loads(content[1].text)
+        self.assertTrue(body["ok"])
+        self.assertEqual(body["attempt"]["status"], "pr_opened")
+
+    @responses.activate
+    def test_no_actionable_finding(self) -> None:
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}/jobs/j-1/patches",
+            json={"ok": True, "attempt": None, "reason": "no_actionable_finding"},
+            status=200,
+        )
+        c = _client()
+        try:
+            content, is_error = _run(
+                _dispatch(c, TOOL_DRAFT_PATCH, {"job_id": "j-1"})
+            )
+        finally:
+            c.close()
+        self.assertFalse(is_error)
+        self.assertIn("no actionable finding", content[0].text)
+
+    @responses.activate
+    def test_with_finding_id_passes_through(self) -> None:
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}/jobs/j-1/patches",
+            json={
+                "ok": True,
+                "attempt": {
+                    "id": 1, "finding_id": 42, "job_id": "j-1",
+                    "project_name": "demo", "attempt_number": 1,
+                    "status": "drafted", "branch_name": None, "pr_url": None,
+                    "diff_excerpt": None, "session_id": None,
+                    "error": "malformed_response",
+                },
+            },
+            status=200,
+        )
        c = _client()
        try:
            content, is_error = _run(
                _dispatch(
-                    c,
-                    TOOL_DRAFT_PATCH,
-                    {"job_id": "j-1"},
+                    c, TOOL_DRAFT_PATCH, {"job_id": "j-1", "finding_id": 42}
                )
            )
        finally:
            c.close()
        self.assertFalse(is_error)
-        body = json.loads(content[0].text)
-        self.assertFalse(body["ok"])
-        self.assertTrue(body["pending"])
-        self.assertIn("not yet implemented", body["message"])
-        self.assertIn("wave 3", body["message"])
-
-    def test_with_finding_id(self) -> None:
-        c = _client()
-        try:
-            content, is_error = _run(
-                _dispatch(
-                    c,
-                    TOOL_DRAFT_PATCH,
-                    {"job_id": "j-1", "finding_id": 42},
-                )
-            )
-        finally:
-            c.close()
-        self.assertFalse(is_error)
-        body = json.loads(content[0].text)
-        self.assertEqual(body["finding_id"], 42)
+        body = json.loads(content[1].text)
+        self.assertEqual(body["attempt"]["finding_id"], 42)
+        self.assertEqual(body["attempt"]["status"], "drafted")

    def test_rejects_bool_finding_id(self) -> None:
        # bool is a subclass of int — defense-in-depth.
@ -616,9 +672,7 @@ class TestDraftPatchStub(unittest.TestCase):
        try:
            content, is_error = _run(
                _dispatch(
-                    c,
-                    TOOL_DRAFT_PATCH,
-                    {"job_id": "j-1", "finding_id": True},
+                    c, TOOL_DRAFT_PATCH, {"job_id": "j-1", "finding_id": True}
                )
            )
        finally:
@ -635,6 +689,24 @@ class TestDraftPatchStub(unittest.TestCase):
        self.assertTrue(is_error)
        self.assertIn("job_id", content[0].text)

+    @responses.activate
+    def test_503_when_patcher_disabled(self) -> None:
+        responses.add(
+            responses.POST,
+            f"{BASE_URL}/jobs/j-1/patches",
+            json={"detail": "patcher not configured"},
+            status=503,
+        )
+        c = _client()
+        try:
+            content, is_error = _run(
+                _dispatch(c, TOOL_DRAFT_PATCH, {"job_id": "j-1"})
+            )
+        finally:
+            c.close()
+        self.assertTrue(is_error)
+        self.assertIn("503", content[0].text)
+

 class TestUnknownTool(unittest.TestCase):
    def test_unknown_tool_returns_error(self) -> None: