crafting-table/tests/test_patcher.py
Kayos 4eab869df0 v0.1 wave 3 (steps 9+10): autonomous patch loop + production recipes
Step 9 — autonomous patch loop:
- patcher.py: clawdforge session → unified diff → worktree apply → verify recipe → push branch → open Gitea PR
- migration 007: patch_attempts (UNIQUE per finding+attempt, max 3 attempts)
- runner.py: post-parse hook fires patcher.maybe_draft_for_job when notify.auto_patch=true
- server.py: POST /jobs/{id}/patches, GET /patches, GET /patches/{id}
- digest.py: patch-drafted lines + open-follow-up count via Gitea PR state check
- mcp: crafting_table_draft_patch stub replaced with real implementation
- tests/test_patcher.py + tests/test_patches_api.py: 27 new tests

No auto-merge — patches stop at PR-open. Cobb merges.

Step 10 — production recipes:
- examples/recipes/clawdforge.json: 14 subprojects across all SDKs, audit nightly
- examples/recipes/cauldron.json: single Flask subproject, audit nightly
- examples/recipes/tradecraft.json: nightly audit, auto_patch=false (manual review)
- examples/register-all.sh: bulk-register helper with GITEA_TOKEN substitution
- README "Autonomous patch loop" + "First production recipes" sections

Tests: server 116→143, mcp 65→67. All green.

Spec: memory/spec-crafting-table.md
2026-04-29 09:04:48 -07:00

545 lines
19 KiB
Python

"""Patcher unit tests — drafted/apply_failed/verify_failed/pushed/pr_opened
status transitions plus the runner hook integration.
We mock the clawdforge + Gitea wires (no real network calls) and stub the
runner._exec_recipe so the verify step is deterministic. Diff applying
uses real git in a temp worktree — this catches the wire-up issues that
pure unit tests miss.
"""
from __future__ import annotations
import asyncio
import json
import shutil
import subprocess
import time
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock
import pytest
from crafting_table.db import DB
from crafting_table.patcher import (
ClawdforgeClient,
GiteaClient,
Patcher,
PatcherConfig,
extract_diff_json,
findings_were_actionable,
turn_text,
)
from crafting_table.workspace import WorkspaceManager
# ---------- helpers ---------------------------------------------------------
def _make_origin_repo(root: Path, *, file_text: str = "hello\nworld\n") -> str:
"""Create a bare-cloneable origin repo with a tracked file the patch
will rewrite."""
if shutil.which("git") is None:
pytest.skip("git binary not present")
origin = root / "origin.git"
work = root / "origin-work"
work.mkdir()
subprocess.run(["git", "init", "-q", "-b", "main"], cwd=work, check=True)
subprocess.run(["git", "config", "user.email", "test@example"], cwd=work, check=True)
subprocess.run(["git", "config", "user.name", "test"], cwd=work, check=True)
subprocess.run(["git", "config", "commit.gpgsign", "false"], cwd=work, check=True)
(work / "src").mkdir()
(work / "src" / "app.py").write_text(file_text)
subprocess.run(["git", "add", "."], cwd=work, check=True)
subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=work, check=True)
# Bare clone so push works.
subprocess.run(
["git", "clone", "--bare", str(work), str(origin)],
check=True,
capture_output=True,
)
# Re-point work's origin at the bare so subsequent fetches in tests work.
subprocess.run(
["git", "remote", "add", "bare", str(origin)],
cwd=work, check=True, capture_output=True,
)
return str(origin)
def _seed_project_and_job(
db: DB,
*,
project_name: str,
git_url: str,
findings: list[dict] | None = None,
auto_patch: bool = True,
) -> tuple[str, int | None]:
"""Insert a project + a job + (optionally) one finding. Returns
(job_id, finding_id_or_None)."""
# Project
db.insert_token(name="alpha", bearer="ct_alpha", is_admin=False, ip_cidrs=None)
recipe = {
"languages": ["python"],
"subprojects": [
{
"path": ".",
"language": "python",
"lint": "echo 'lint ok'",
"timeout_secs": 30,
}
],
"schedule": {},
"notify": {"email": ["x@y"], "on": [], "auto_patch": auto_patch},
}
db.upsert_project(
name=project_name,
git_url=git_url,
default_branch="main",
recipe_json=json.dumps(recipe),
owner_token="alpha",
)
# Job
snapshot = {
"git_url": git_url,
"default_branch": "main",
"languages": ["python"],
"subprojects": recipe["subprojects"],
}
job_id = "job-1"
db.insert_job(
job_id=job_id,
project_name=project_name,
subproject_path=".",
recipe="lint",
branch="main",
log_path="/tmp/_x.log",
recipe_snapshot_json=json.dumps(snapshot),
)
db.mark_job_finished(job_id=job_id, status="failed", exit_code=1)
finding_id = None
for f in findings or []:
finding_id = db.insert_finding(
job_id=job_id,
kind=f.get("kind", "lint"),
severity=f.get("severity", "warn"),
message=f.get("message", "msg"),
fingerprint=f.get("fingerprint", "abcdef0123456789"),
file=f.get("file"),
line=f.get("line"),
code=f.get("code"),
suggested_fix=f.get("suggested_fix"),
raw_json=None,
)
return job_id, finding_id
def _patcher_with_mocks(db: DB, workspace: WorkspaceManager, *, runner=None):
"""Build a Patcher with mocked clawdforge + Gitea clients. Returns
(patcher, claw_mock, gitea_mock) so tests can assert on call counts.
"""
cfg = PatcherConfig(
clawdforge_base_url="http://cf.local",
clawdforge_token="cf_x",
gitea_base_url="http://gitea.local",
gitea_token="gt_x",
max_attempts_per_finding=3,
)
claw = MagicMock(spec=ClawdforgeClient)
claw.create_session = AsyncMock(return_value={"session_id": "s-1"})
claw.turn = AsyncMock()
claw.close_session = AsyncMock()
gitea = MagicMock(spec=GiteaClient)
gitea.open_pr = AsyncMock(
return_value={"html_url": "http://192.168.0.5:3001/X/Y/pulls/1"}
)
p = Patcher(
db=db,
workspace=workspace,
config=cfg,
runner=runner,
clawdforge=claw,
gitea=gitea,
)
return p, claw, gitea
def _diff_for(file_rel: str, *, old: str, new: str) -> str:
"""Build a unified diff that real git apply will accept against a
file containing exactly `old`. Format matches `git diff` output."""
return (
f"diff --git a/{file_rel} b/{file_rel}\n"
f"--- a/{file_rel}\n"
f"+++ b/{file_rel}\n"
f"@@ -1,{len(old.splitlines())} +1,{len(new.splitlines())} @@\n"
+ "\n".join(f"-{l}" for l in old.splitlines()) + "\n"
+ "\n".join(f"+{l}" for l in new.splitlines()) + "\n"
)
# ---------- helper-fn unit tests ------------------------------------------
def test_findings_were_actionable_lint_with_locator():
assert findings_were_actionable([
{"kind": "lint", "file": "x.py", "line": 1}
])
def test_findings_were_actionable_lint_without_locator():
assert not findings_were_actionable([
{"kind": "lint", "file": None, "line": None}
])
def test_findings_were_actionable_test_fail_skipped():
# test_fail is NOT actionable in v0.1
assert not findings_were_actionable([
{"kind": "test_fail", "file": "x.py", "line": 1}
])
def test_findings_were_actionable_cve():
assert findings_were_actionable([
{"kind": "cve", "code": "RUSTSEC-1", "suggested_fix": "bump"}
])
def test_extract_diff_json_plain():
obj = extract_diff_json('{"diff": "x", "explanation": "y"}')
assert obj == {"diff": "x", "explanation": "y"}
def test_extract_diff_json_fenced():
obj = extract_diff_json('```json\n{"diff": "x", "explanation": "y"}\n```')
assert obj is not None
assert obj["diff"] == "x"
def test_extract_diff_json_returns_none_on_garbage():
assert extract_diff_json("not even json") is None
def test_turn_text_concatenates_text_events():
assert turn_text({"events": [
{"type": "text", "content": "hello "},
{"type": "tool_call"},
{"type": "text", "content": "world"},
]}) == "hello world"
# ---------- patcher pipeline tests -----------------------------------------
@pytest.mark.asyncio
async def test_drafts_via_clawdforge_session(db_only, tmp_path):
"""First-light test: malformed JSON from the model leaves the attempt
in status=drafted with error=malformed_response."""
git_url = _make_origin_repo(tmp_path)
workspace = WorkspaceManager(tmp_path / "ws")
job_id, finding_id = _seed_project_and_job(
db_only,
project_name="demo",
git_url=git_url,
findings=[{
"kind": "lint", "severity": "warn", "code": "F401",
"file": "src/app.py", "line": 1, "message": "bad",
}],
)
p, claw, gitea = _patcher_with_mocks(db_only, workspace)
# Model returns prose without JSON.
claw.turn.return_value = {
"events": [{"type": "text", "content": "I cannot help with that"}]
}
attempt = await p.maybe_draft(job_id, finding_id=finding_id)
assert attempt is not None
assert attempt.status == "drafted"
assert attempt.error == "malformed_response"
assert claw.create_session.await_count == 1
assert claw.close_session.await_count == 1
@pytest.mark.asyncio
async def test_apply_failed_when_diff_rejects(db_only, tmp_path):
git_url = _make_origin_repo(tmp_path)
workspace = WorkspaceManager(tmp_path / "ws")
job_id, finding_id = _seed_project_and_job(
db_only, project_name="demo", git_url=git_url,
findings=[{
"kind": "lint", "severity": "warn", "code": "F401",
"file": "src/app.py", "line": 1, "message": "x",
}],
)
p, claw, gitea = _patcher_with_mocks(db_only, workspace)
# Diff with wrong line numbers (the file is 2 lines, this hits line 999).
bad_diff = (
"diff --git a/src/app.py b/src/app.py\n"
"--- a/src/app.py\n"
"+++ b/src/app.py\n"
"@@ -999,1 +999,1 @@\n"
"-nonexistent\n"
"+something else\n"
)
claw.turn.return_value = {
"events": [{"type": "text", "content": json.dumps({
"diff": bad_diff, "explanation": "x", "confidence": "high"
})}]
}
attempt = await p.maybe_draft(job_id, finding_id=finding_id)
assert attempt is not None
assert attempt.status == "apply_failed"
assert claw.close_session.await_count == 1
@pytest.mark.asyncio
async def test_verify_failed_when_recipe_still_fails(db_only, tmp_path):
git_url = _make_origin_repo(tmp_path)
workspace = WorkspaceManager(tmp_path / "ws")
job_id, finding_id = _seed_project_and_job(
db_only, project_name="demo", git_url=git_url,
findings=[{
"kind": "lint", "severity": "warn", "code": "F401",
"file": "src/app.py", "line": 1, "message": "x",
}],
)
# Stub runner that fails verify.
fake_runner = MagicMock()
fake_runner._exec_recipe = AsyncMock(return_value=(1, False))
p, claw, gitea = _patcher_with_mocks(db_only, workspace, runner=fake_runner)
# Valid diff that DOES apply (replace 'hello' with 'goodbye')
good_diff = _diff_for("src/app.py", old="hello\nworld", new="goodbye\nworld")
claw.turn.return_value = {
"events": [{"type": "text", "content": json.dumps({
"diff": good_diff, "explanation": "x", "confidence": "high"
})}]
}
attempt = await p.maybe_draft(job_id, finding_id=finding_id)
assert attempt is not None
assert attempt.status == "verify_failed"
assert fake_runner._exec_recipe.await_count == 1
@pytest.mark.asyncio
async def test_pushed_and_pr_opened_on_success(db_only, tmp_path):
git_url = _make_origin_repo(tmp_path)
workspace = WorkspaceManager(tmp_path / "ws")
job_id, finding_id = _seed_project_and_job(
db_only, project_name="demo", git_url=git_url,
findings=[{
"kind": "lint", "severity": "warn", "code": "F401",
"file": "src/app.py", "line": 1, "message": "x",
}],
)
fake_runner = MagicMock()
fake_runner._exec_recipe = AsyncMock(return_value=(0, False))
p, claw, gitea = _patcher_with_mocks(db_only, workspace, runner=fake_runner)
good_diff = _diff_for("src/app.py", old="hello\nworld", new="goodbye\nworld")
claw.turn.return_value = {
"events": [{"type": "text", "content": json.dumps({
"diff": good_diff, "explanation": "tiny fix", "confidence": "high"
})}]
}
attempt = await p.maybe_draft(job_id, finding_id=finding_id)
assert attempt is not None, "expected a PatchAttempt"
assert attempt.status == "pr_opened", f"unexpected: {attempt.status} / {attempt.error}"
assert attempt.pr_url == "http://192.168.0.5:3001/X/Y/pulls/1"
assert attempt.branch_name and "crafting-table/auto/" in attempt.branch_name
assert gitea.open_pr.await_count == 1
@pytest.mark.asyncio
async def test_max_attempts_per_finding(db_only, tmp_path):
git_url = _make_origin_repo(tmp_path)
workspace = WorkspaceManager(tmp_path / "ws")
job_id, finding_id = _seed_project_and_job(
db_only, project_name="demo", git_url=git_url,
findings=[{
"kind": "lint", "severity": "warn", "code": "F401",
"file": "src/app.py", "line": 1, "message": "x",
}],
)
# Pre-seed three failed attempts so the 4th early-exits.
for i in range(1, 4):
db_only.insert_patch_attempt(
finding_id=finding_id, job_id=job_id, project_name="demo",
attempt_number=i, status="apply_failed",
)
p, claw, gitea = _patcher_with_mocks(db_only, workspace)
attempt = await p.maybe_draft(job_id, finding_id=finding_id)
assert attempt is not None
assert attempt.status == "max_attempts_exceeded"
assert claw.create_session.await_count == 0
@pytest.mark.asyncio
async def test_clawdforge_session_always_closes_on_exception(db_only, tmp_path):
git_url = _make_origin_repo(tmp_path)
workspace = WorkspaceManager(tmp_path / "ws")
job_id, finding_id = _seed_project_and_job(
db_only, project_name="demo", git_url=git_url,
findings=[{
"kind": "lint", "severity": "warn", "code": "F401",
"file": "src/app.py", "line": 1, "message": "x",
}],
)
p, claw, gitea = _patcher_with_mocks(db_only, workspace)
claw.turn.side_effect = RuntimeError("simulated network blip")
attempt = await p.maybe_draft(job_id, finding_id=finding_id)
assert attempt is not None
assert attempt.status == "failed"
# Session was created and then closed even though turn raised.
assert claw.create_session.await_count == 1
assert claw.close_session.await_count == 1
@pytest.mark.asyncio
async def test_runner_invokes_patcher_when_auto_patch_true(client, tmp_path):
"""Integration: the runner's post-job hook calls patcher.maybe_draft_for_job
when project.notify.auto_patch=true and there are actionable findings.
"""
tc, ctx = client
server = ctx["server"]
# Build + inject a stub patcher BEFORE we kick the job. The real
# _maybe_auto_patch_hook closes over server.patcher at call time.
stub_patcher = MagicMock()
stub_patcher.maybe_draft_for_job = AsyncMock(return_value=[])
server.patcher = stub_patcher
# Make a tiny git repo so the runner can clone+worktree.
if shutil.which("git") is None:
pytest.skip("git not available")
repo = tmp_path / "fixture-repo"
repo.mkdir()
subprocess.run(["git", "init", "-q", "-b", "main"], cwd=repo, check=True)
subprocess.run(["git", "config", "user.email", "t@e"], cwd=repo, check=True)
subprocess.run(["git", "config", "user.name", "t"], cwd=repo, check=True)
subprocess.run(["git", "config", "commit.gpgsign", "false"], cwd=repo, check=True)
(repo / "README.md").write_text("hi\n")
subprocess.run(["git", "add", "."], cwd=repo, check=True)
subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=repo, check=True)
git_url = str(repo)
# Register a project with notify.auto_patch=true and a lint that emits
# ruff-shaped JSON so the parser picks up an actionable finding.
ruff_stub = json.dumps([{
"code": "F401",
"message": "'os' imported",
"filename": "src/app.py",
"location": {"row": 3, "column": 1},
}])
payload = {
"name": "ct-autopatch-on",
"git_url": git_url,
"default_branch": "main",
"languages": ["python"],
"subprojects": [{
"path": ".",
"language": "python",
"lint": f"echo '{ruff_stub}'; exit 1",
"timeout_secs": 20,
}],
"schedule": {},
"notify": {"email": ["x@y"], "on": [], "auto_patch": True},
}
r = tc.post(
"/projects",
headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"},
json=payload,
)
assert r.status_code == 200, r.text
r2 = tc.post(
"/projects/ct-autopatch-on/jobs",
headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"},
json={"recipe": "lint"},
)
assert r2.status_code == 200, r2.text
job_id = r2.json()["job_id"]
# Wait for terminal.
deadline = time.monotonic() + 30
while time.monotonic() < deadline:
rr = tc.get(
f"/jobs/{job_id}",
headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"},
)
if rr.json()["job"]["status"] in ("succeeded", "failed", "timed_out", "cancelled"):
break
time.sleep(0.1)
# Hook fan-out is fire-and-forget; let the loop turn once more.
time.sleep(0.2)
# Patcher.maybe_draft_for_job should have been called at least once.
assert stub_patcher.maybe_draft_for_job.await_count >= 1
@pytest.mark.asyncio
async def test_runner_skips_patcher_when_auto_patch_false(client, tmp_path):
tc, ctx = client
server = ctx["server"]
stub_patcher = MagicMock()
stub_patcher.maybe_draft_for_job = AsyncMock(return_value=[])
server.patcher = stub_patcher
if shutil.which("git") is None:
pytest.skip("git not available")
repo = tmp_path / "fixture-repo-off"
repo.mkdir()
subprocess.run(["git", "init", "-q", "-b", "main"], cwd=repo, check=True)
subprocess.run(["git", "config", "user.email", "t@e"], cwd=repo, check=True)
subprocess.run(["git", "config", "user.name", "t"], cwd=repo, check=True)
subprocess.run(["git", "config", "commit.gpgsign", "false"], cwd=repo, check=True)
(repo / "README.md").write_text("hi\n")
subprocess.run(["git", "add", "."], cwd=repo, check=True)
subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=repo, check=True)
git_url = str(repo)
ruff_stub = json.dumps([{
"code": "F401", "message": "x",
"filename": "src/app.py", "location": {"row": 3, "column": 1},
}])
payload = {
"name": "ct-autopatch-off",
"git_url": git_url,
"default_branch": "main",
"languages": ["python"],
"subprojects": [{
"path": ".",
"language": "python",
"lint": f"echo '{ruff_stub}'; exit 1",
"timeout_secs": 20,
}],
"schedule": {},
"notify": {"email": ["x@y"], "on": [], "auto_patch": False},
}
r = tc.post(
"/projects",
headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"},
json=payload,
)
assert r.status_code == 200, r.text
r2 = tc.post(
"/projects/ct-autopatch-off/jobs",
headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"},
json={"recipe": "lint"},
)
assert r2.status_code == 200, r2.text
job_id = r2.json()["job_id"]
deadline = time.monotonic() + 30
while time.monotonic() < deadline:
rr = tc.get(
f"/jobs/{job_id}",
headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"},
)
if rr.json()["job"]["status"] in ("succeeded", "failed", "timed_out", "cancelled"):
break
time.sleep(0.1)
time.sleep(0.2)
assert stub_patcher.maybe_draft_for_job.await_count == 0