v0.1 wave 3 (steps 9+10): autonomous patch loop + production recipes

Step 9 — autonomous patch loop:
- patcher.py: clawdforge session → unified diff → worktree apply → verify recipe → push branch → open Gitea PR
- migration 007: patch_attempts (UNIQUE per finding+attempt, max 3 attempts)
- runner.py: post-parse hook fires patcher.maybe_draft_for_job when notify.auto_patch=true
- server.py: POST /jobs/{id}/patches, GET /patches, GET /patches/{id}
- digest.py: patch-drafted lines + open-follow-up count via Gitea PR state check
- mcp: crafting_table_draft_patch stub replaced with real implementation
- tests/test_patcher.py + tests/test_patches_api.py: 27 new tests

No auto-merge — patches stop at PR-open. Cobb merges.

Step 10 — production recipes:
- examples/recipes/clawdforge.json: 14 subprojects across all SDKs, audit nightly
- examples/recipes/cauldron.json: single Flask subproject, audit nightly
- examples/recipes/tradecraft.json: nightly audit, auto_patch=false (manual review)
- examples/register-all.sh: bulk-register helper with GITEA_TOKEN substitution
- README "Autonomous patch loop" + "First production recipes" sections

Tests: server 116→143, mcp 65→67. All green.

Spec: memory/spec-crafting-table.md
This commit is contained in:
Kayos 2026-04-29 09:04:48 -07:00
parent ecb9d76e6d
commit 4eab869df0
17 changed files with 2752 additions and 78 deletions

View file

@ -374,3 +374,30 @@ class CraftingTableClient:
raise ValueError("job_id must be non-empty")
slug = quote(job_id, safe="")
return self._get_text(f"/jobs/{slug}/log")
def trigger_patch(
self, job_id: str, finding_id: int | None = None
) -> dict:
"""POST /jobs/{id}/patches — autonomous patch loop trigger.
Returns the wire shape ``{"ok": bool, "attempt": <PatchAttempt>}``
from the server. ``attempt`` may be ``None`` when the job has no
actionable findings.
"""
if not job_id:
raise ValueError("job_id must be non-empty")
if finding_id is not None and not isinstance(finding_id, int):
raise ValueError("finding_id must be an integer or None")
slug = quote(job_id, safe="")
body: dict[str, Any] = {}
if finding_id is not None:
body["finding_id"] = int(finding_id)
payload = self._request(
"POST", f"/jobs/{slug}/patches", json_body=body
)
if not isinstance(payload, dict):
raise CraftingTableError(
f"unexpected POST /jobs/{{id}}/patches response type: "
f"{type(payload).__name__}"
)
return payload

View file

@ -9,8 +9,9 @@ Eight tools are exposed (per spec ``memory/spec-crafting-table.md``):
- ``crafting_table_run_test`` kick off a ``test`` recipe job.
- ``crafting_table_get_job`` fetch job state + log tail.
- ``crafting_table_get_findings`` fetch structured findings.
- ``crafting_table_draft_patch`` wave-3 stub; returns "not yet
implemented" so the tool surface is stable but no work happens.
- ``crafting_table_draft_patch`` autonomous patch loop trigger
(wave 3); calls ``POST /jobs/{id}/patches`` and returns the resulting
``PatchAttempt``.
Admin endpoints (``/admin/tokens``) are intentionally NOT exposed. Token
minting is a human-gated operation; an LLM client has no business poking at
@ -279,14 +280,17 @@ def _tool_definitions() -> list[types.Tool]:
types.Tool(
name=TOOL_DRAFT_PATCH,
description=(
"Draft a patch (unified diff) addressing one or more "
"findings on a job. WAVE 2B STUB — full implementation "
"lands in wave 3 / step 9 of the v0.1 plan. Today this tool "
"is callable but only returns a 'not yet implemented' "
"message; the surface exists so tool catalogues stay stable "
"across waves. Once shipped, the patch will be drafted via "
"clawdforge and applied to a worktree, with a Gitea PR "
"opened on the configured branch."
"Draft a patch (unified diff) addressing one finding on a "
"job. The server opens a clawdforge session, asks the model "
"for a unified diff, applies it to a fresh worktree, "
"re-runs the failing recipe to verify, and on success "
"pushes a branch and opens a Gitea PR. No auto-merge — "
"review and merge manually. Returns a PatchAttempt with "
"{status, branch_name, pr_url, error}; status ranges over "
"drafted/apply_failed/verify_failed/pushed/pr_opened/"
"max_attempts_exceeded. 503 if the patcher isn't "
"configured. v0.1 supports lint and cve findings; "
"test_fail is v0.2."
),
inputSchema={
"type": "object",
@ -301,8 +305,8 @@ def _tool_definitions() -> list[types.Tool]:
"minimum": 1,
"description": (
"Optional specific finding id. If omitted, "
"drafts patches for all open findings on the "
"job."
"the server picks the highest-severity "
"actionable finding on the job."
),
},
},
@ -555,9 +559,6 @@ async def _dispatch(
)
if name == TOOL_DRAFT_PATCH:
# Wave-2B stub: validate args lightly, return a stable message.
# Once wave-3 lands this whole branch becomes a real call to a
# /jobs/{id}/patch endpoint that drafts via clawdforge.
job_id = args.get("job_id")
if not isinstance(job_id, str) or not job_id:
return _err_content("missing or empty 'job_id' argument"), True
@ -567,23 +568,35 @@ async def _dispatch(
and not isinstance(finding_id, bool)
):
return _err_content("'finding_id' must be an integer"), True
return (
_ok_content(
{
"ok": False,
"pending": True,
"message": (
"draft patch — not yet implemented (lands in "
"wave 3 / step 9). The tool surface is stable; "
"callers can keep referencing it. Today no "
"patch is drafted."
),
"job_id": job_id,
"finding_id": finding_id,
}
),
False,
)
try:
payload = await asyncio.to_thread(
ct.trigger_patch, job_id, finding_id
)
except ValueError as ve:
return _err_content(str(ve)), True
attempt = payload.get("attempt") if isinstance(payload, dict) else None
if attempt is None:
prose = (
f"no actionable finding on job {job_id} — patcher "
f"declined to draft. Check "
f"crafting_table_get_findings to confirm or pass an "
f"explicit finding_id."
)
return _two_block_content(prose, payload), False
status = attempt.get("status", "?")
branch = attempt.get("branch_name") or "(no branch)"
pr_url = attempt.get("pr_url") or "(no PR)"
err = attempt.get("error") or ""
prose_parts = [
f"patch attempt #{attempt.get('attempt_number')} for finding "
f"{attempt.get('finding_id')} on job {job_id}: status={status}",
f"branch={branch}",
f"pr={pr_url}",
]
if err:
prose_parts.append(f"error: {err}")
prose = "\n".join(prose_parts)
return _two_block_content(prose, payload), False
return _err_content(f"unknown tool: {name}"), True

View file

@ -572,43 +572,99 @@ class TestGetFindings(unittest.TestCase):
self.assertIn("not found", content[0].text)
class TestDraftPatchStub(unittest.TestCase):
"""Wave 2B stub: tool surface present, but returns a 'pending' message."""
class TestDraftPatch(unittest.TestCase):
"""Wave 3: real call to POST /jobs/{id}/patches; two-block return."""
def test_returns_pending_message(self) -> None:
@responses.activate
def test_pr_opened_two_block_return(self) -> None:
"""Server returns a pr_opened attempt → MCP returns prose + JSON."""
responses.add(
responses.POST,
f"{BASE_URL}/jobs/j-1/patches",
json={
"ok": True,
"attempt": {
"id": 7,
"finding_id": 42,
"job_id": "j-1",
"project_name": "demo",
"attempt_number": 1,
"status": "pr_opened",
"branch_name": "crafting-table/auto/j-1-42",
"pr_url": "http://192.168.0.5:3001/X/Y/pulls/9",
"diff_excerpt": "--- a/x\n+++ b/x",
"session_id": "s-1",
"error": None,
},
},
status=200,
)
c = _client()
try:
content, is_error = _run(
_dispatch(c, TOOL_DRAFT_PATCH, {"job_id": "j-1"})
)
finally:
c.close()
self.assertFalse(is_error)
# Two-content-block return: prose + JSON.
self.assertEqual(len(content), 2)
prose = content[0].text
self.assertIn("pr_opened", prose)
self.assertIn("crafting-table/auto/j-1-42", prose)
self.assertIn("/pulls/9", prose)
body = json.loads(content[1].text)
self.assertTrue(body["ok"])
self.assertEqual(body["attempt"]["status"], "pr_opened")
@responses.activate
def test_no_actionable_finding(self) -> None:
responses.add(
responses.POST,
f"{BASE_URL}/jobs/j-1/patches",
json={"ok": True, "attempt": None, "reason": "no_actionable_finding"},
status=200,
)
c = _client()
try:
content, is_error = _run(
_dispatch(c, TOOL_DRAFT_PATCH, {"job_id": "j-1"})
)
finally:
c.close()
self.assertFalse(is_error)
self.assertIn("no actionable finding", content[0].text)
@responses.activate
def test_with_finding_id_passes_through(self) -> None:
responses.add(
responses.POST,
f"{BASE_URL}/jobs/j-1/patches",
json={
"ok": True,
"attempt": {
"id": 1, "finding_id": 42, "job_id": "j-1",
"project_name": "demo", "attempt_number": 1,
"status": "drafted", "branch_name": None, "pr_url": None,
"diff_excerpt": None, "session_id": None,
"error": "malformed_response",
},
},
status=200,
)
c = _client()
try:
content, is_error = _run(
_dispatch(
c,
TOOL_DRAFT_PATCH,
{"job_id": "j-1"},
c, TOOL_DRAFT_PATCH, {"job_id": "j-1", "finding_id": 42}
)
)
finally:
c.close()
self.assertFalse(is_error)
body = json.loads(content[0].text)
self.assertFalse(body["ok"])
self.assertTrue(body["pending"])
self.assertIn("not yet implemented", body["message"])
self.assertIn("wave 3", body["message"])
def test_with_finding_id(self) -> None:
c = _client()
try:
content, is_error = _run(
_dispatch(
c,
TOOL_DRAFT_PATCH,
{"job_id": "j-1", "finding_id": 42},
)
)
finally:
c.close()
self.assertFalse(is_error)
body = json.loads(content[0].text)
self.assertEqual(body["finding_id"], 42)
body = json.loads(content[1].text)
self.assertEqual(body["attempt"]["finding_id"], 42)
self.assertEqual(body["attempt"]["status"], "drafted")
def test_rejects_bool_finding_id(self) -> None:
# bool is a subclass of int — defense-in-depth.
@ -616,9 +672,7 @@ class TestDraftPatchStub(unittest.TestCase):
try:
content, is_error = _run(
_dispatch(
c,
TOOL_DRAFT_PATCH,
{"job_id": "j-1", "finding_id": True},
c, TOOL_DRAFT_PATCH, {"job_id": "j-1", "finding_id": True}
)
)
finally:
@ -635,6 +689,24 @@ class TestDraftPatchStub(unittest.TestCase):
self.assertTrue(is_error)
self.assertIn("job_id", content[0].text)
@responses.activate
def test_503_when_patcher_disabled(self) -> None:
responses.add(
responses.POST,
f"{BASE_URL}/jobs/j-1/patches",
json={"detail": "patcher not configured"},
status=503,
)
c = _client()
try:
content, is_error = _run(
_dispatch(c, TOOL_DRAFT_PATCH, {"job_id": "j-1"})
)
finally:
c.close()
self.assertTrue(is_error)
self.assertIn("503", content[0].text)
class TestUnknownTool(unittest.TestCase):
def test_unknown_tool_returns_error(self) -> None: