"""Patcher unit tests — drafted/apply_failed/verify_failed/pushed/pr_opened status transitions plus the runner hook integration. We mock the clawdforge + Gitea wires (no real network calls) and stub the runner._exec_recipe so the verify step is deterministic. Diff applying uses real git in a temp worktree — this catches the wire-up issues that pure unit tests miss. """ from __future__ import annotations import asyncio import json import shutil import subprocess import time from pathlib import Path from unittest.mock import AsyncMock, MagicMock import pytest from crafting_table.db import DB from crafting_table.patcher import ( ClawdforgeClient, GiteaClient, Patcher, PatcherConfig, extract_diff_json, findings_were_actionable, turn_text, ) from crafting_table.workspace import WorkspaceManager # ---------- helpers --------------------------------------------------------- def _make_origin_repo(root: Path, *, file_text: str = "hello\nworld\n") -> str: """Create a bare-cloneable origin repo with a tracked file the patch will rewrite.""" if shutil.which("git") is None: pytest.skip("git binary not present") origin = root / "origin.git" work = root / "origin-work" work.mkdir() subprocess.run(["git", "init", "-q", "-b", "main"], cwd=work, check=True) subprocess.run(["git", "config", "user.email", "test@example"], cwd=work, check=True) subprocess.run(["git", "config", "user.name", "test"], cwd=work, check=True) subprocess.run(["git", "config", "commit.gpgsign", "false"], cwd=work, check=True) (work / "src").mkdir() (work / "src" / "app.py").write_text(file_text) subprocess.run(["git", "add", "."], cwd=work, check=True) subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=work, check=True) # Bare clone so push works. subprocess.run( ["git", "clone", "--bare", str(work), str(origin)], check=True, capture_output=True, ) # Re-point work's origin at the bare so subsequent fetches in tests work. subprocess.run( ["git", "remote", "add", "bare", str(origin)], cwd=work, check=True, capture_output=True, ) return str(origin) def _seed_project_and_job( db: DB, *, project_name: str, git_url: str, findings: list[dict] | None = None, auto_patch: bool = True, ) -> tuple[str, int | None]: """Insert a project + a job + (optionally) one finding. Returns (job_id, finding_id_or_None).""" # Project db.insert_token(name="alpha", bearer="ct_alpha", is_admin=False, ip_cidrs=None) recipe = { "languages": ["python"], "subprojects": [ { "path": ".", "language": "python", "lint": "echo 'lint ok'", "timeout_secs": 30, } ], "schedule": {}, "notify": {"email": ["x@y"], "on": [], "auto_patch": auto_patch}, } db.upsert_project( name=project_name, git_url=git_url, default_branch="main", recipe_json=json.dumps(recipe), owner_token="alpha", ) # Job snapshot = { "git_url": git_url, "default_branch": "main", "languages": ["python"], "subprojects": recipe["subprojects"], } job_id = "job-1" db.insert_job( job_id=job_id, project_name=project_name, subproject_path=".", recipe="lint", branch="main", log_path="/tmp/_x.log", recipe_snapshot_json=json.dumps(snapshot), ) db.mark_job_finished(job_id=job_id, status="failed", exit_code=1) finding_id = None for f in findings or []: finding_id = db.insert_finding( job_id=job_id, kind=f.get("kind", "lint"), severity=f.get("severity", "warn"), message=f.get("message", "msg"), fingerprint=f.get("fingerprint", "abcdef0123456789"), file=f.get("file"), line=f.get("line"), code=f.get("code"), suggested_fix=f.get("suggested_fix"), raw_json=None, ) return job_id, finding_id def _patcher_with_mocks(db: DB, workspace: WorkspaceManager, *, runner=None): """Build a Patcher with mocked clawdforge + Gitea clients. Returns (patcher, claw_mock, gitea_mock) so tests can assert on call counts. """ cfg = PatcherConfig( clawdforge_base_url="http://cf.local", clawdforge_token="cf_x", gitea_base_url="http://gitea.local", gitea_token="gt_x", max_attempts_per_finding=3, ) claw = MagicMock(spec=ClawdforgeClient) claw.create_session = AsyncMock(return_value={"session_id": "s-1"}) claw.turn = AsyncMock() claw.close_session = AsyncMock() gitea = MagicMock(spec=GiteaClient) gitea.open_pr = AsyncMock( return_value={"html_url": "http://git.example.com/X/Y/pulls/1"} ) p = Patcher( db=db, workspace=workspace, config=cfg, runner=runner, clawdforge=claw, gitea=gitea, ) return p, claw, gitea def _diff_for(file_rel: str, *, old: str, new: str) -> str: """Build a unified diff that real git apply will accept against a file containing exactly `old`. Format matches `git diff` output.""" return ( f"diff --git a/{file_rel} b/{file_rel}\n" f"--- a/{file_rel}\n" f"+++ b/{file_rel}\n" f"@@ -1,{len(old.splitlines())} +1,{len(new.splitlines())} @@\n" + "\n".join(f"-{l}" for l in old.splitlines()) + "\n" + "\n".join(f"+{l}" for l in new.splitlines()) + "\n" ) # ---------- helper-fn unit tests ------------------------------------------ def test_findings_were_actionable_lint_with_locator(): assert findings_were_actionable([ {"kind": "lint", "file": "x.py", "line": 1} ]) def test_findings_were_actionable_lint_without_locator(): assert not findings_were_actionable([ {"kind": "lint", "file": None, "line": None} ]) def test_findings_were_actionable_test_fail_skipped(): # test_fail is NOT actionable in v0.1 assert not findings_were_actionable([ {"kind": "test_fail", "file": "x.py", "line": 1} ]) def test_findings_were_actionable_cve(): assert findings_were_actionable([ {"kind": "cve", "code": "RUSTSEC-1", "suggested_fix": "bump"} ]) def test_extract_diff_json_plain(): obj = extract_diff_json('{"diff": "x", "explanation": "y"}') # Parser normalizes — confidence defaults to "medium" when absent so # downstream code can rely on the field always being present. assert obj is not None assert obj["diff"] == "x" assert obj["explanation"] == "y" assert obj["confidence"] == "medium" def test_extract_diff_json_fenced(): obj = extract_diff_json('```json\n{"diff": "x", "explanation": "y"}\n```') assert obj is not None assert obj["diff"] == "x" def test_extract_diff_json_returns_none_on_garbage(): assert extract_diff_json("not even json") is None def test_extract_diff_json_fenced_diff_block(): """Real-world Opus shape: prose + a fenced ```diff block, no JSON wrapper.""" text = ( "Here is the fix:\n\n" "```diff\n" "--- a/src/lib.rs\n" "+++ b/src/lib.rs\n" "@@ -1 +1 @@\n" "-old\n" "+new\n" "```\n\n" "That should resolve the off-by-one." ) obj = extract_diff_json(text) assert obj is not None assert "lib.rs" in obj["diff"] assert "off-by-one" in obj["explanation"] def test_extract_diff_json_bare_unified_diff(): """No fence, no JSON wrapper — just the diff body.""" text = "--- a/x\n+++ b/x\n@@ -1 +1 @@\n-old\n+new\n" obj = extract_diff_json(text) assert obj is not None assert obj["diff"].rstrip() == text.rstrip() # parser strips trailing whitespace; semantic equivalence assert obj["confidence"] == "low" # bare diff is low-confidence — no model commentary to weigh def test_extract_diff_json_deeply_nested_braces_in_diff(): """The old regex was capped at one level of brace nesting; real diffs contain struct literals etc. with arbitrary depth.""" deep = ( '{"diff": "--- a/x.rs\\n+++ b/x.rs\\n@@\\n' '-fn x() { Some(Foo { a: 1 }) }\\n' '+fn x() { Some(Foo { a: 2 }) }", ' '"explanation": "depth-2 nesting", "confidence": "high"}' ) obj = extract_diff_json(deep) assert obj is not None assert obj["explanation"] == "depth-2 nesting" def test_extract_diff_json_alt_key(): """Models sometimes use 'patch' instead of 'diff'.""" obj = extract_diff_json('{"patch": "--- a\\n+++ b\\n@@\\n-x\\n+y", "explanation": "via alt key"}') assert obj is not None # Normalizer copies the alt key into the canonical 'diff' field assert obj["diff"].startswith("--- a") def test_turn_text_concatenates_text_events(): assert turn_text({"events": [ {"type": "text", "content": "hello "}, {"type": "tool_call"}, {"type": "text", "content": "world"}, ]}) == "hello world" # ---------- patcher pipeline tests ----------------------------------------- @pytest.mark.asyncio async def test_drafts_via_clawdforge_session(db_only, tmp_path): """First-light test: malformed JSON from the model leaves the attempt in status=drafted with error=malformed_response.""" git_url = _make_origin_repo(tmp_path) workspace = WorkspaceManager(tmp_path / "ws") job_id, finding_id = _seed_project_and_job( db_only, project_name="demo", git_url=git_url, findings=[{ "kind": "lint", "severity": "warn", "code": "F401", "file": "src/app.py", "line": 1, "message": "bad", }], ) p, claw, gitea = _patcher_with_mocks(db_only, workspace) # Model returns prose without JSON. claw.turn.return_value = { "events": [{"type": "text", "content": "I cannot help with that"}] } attempt = await p.maybe_draft(job_id, finding_id=finding_id) assert attempt is not None assert attempt.status == "drafted" assert attempt.error == "malformed_response" assert claw.create_session.await_count == 1 assert claw.close_session.await_count == 1 @pytest.mark.asyncio async def test_apply_failed_when_diff_rejects(db_only, tmp_path): git_url = _make_origin_repo(tmp_path) workspace = WorkspaceManager(tmp_path / "ws") job_id, finding_id = _seed_project_and_job( db_only, project_name="demo", git_url=git_url, findings=[{ "kind": "lint", "severity": "warn", "code": "F401", "file": "src/app.py", "line": 1, "message": "x", }], ) p, claw, gitea = _patcher_with_mocks(db_only, workspace) # Diff with wrong line numbers (the file is 2 lines, this hits line 999). bad_diff = ( "diff --git a/src/app.py b/src/app.py\n" "--- a/src/app.py\n" "+++ b/src/app.py\n" "@@ -999,1 +999,1 @@\n" "-nonexistent\n" "+something else\n" ) claw.turn.return_value = { "events": [{"type": "text", "content": json.dumps({ "diff": bad_diff, "explanation": "x", "confidence": "high" })}] } attempt = await p.maybe_draft(job_id, finding_id=finding_id) assert attempt is not None assert attempt.status == "apply_failed" assert claw.close_session.await_count == 1 @pytest.mark.asyncio async def test_verify_failed_when_recipe_still_fails(db_only, tmp_path): git_url = _make_origin_repo(tmp_path) workspace = WorkspaceManager(tmp_path / "ws") job_id, finding_id = _seed_project_and_job( db_only, project_name="demo", git_url=git_url, findings=[{ "kind": "lint", "severity": "warn", "code": "F401", "file": "src/app.py", "line": 1, "message": "x", }], ) # Stub runner that fails verify. fake_runner = MagicMock() fake_runner._exec_recipe = AsyncMock(return_value=(1, False)) p, claw, gitea = _patcher_with_mocks(db_only, workspace, runner=fake_runner) # Valid diff that DOES apply (replace 'hello' with 'goodbye') good_diff = _diff_for("src/app.py", old="hello\nworld", new="goodbye\nworld") claw.turn.return_value = { "events": [{"type": "text", "content": json.dumps({ "diff": good_diff, "explanation": "x", "confidence": "high" })}] } attempt = await p.maybe_draft(job_id, finding_id=finding_id) assert attempt is not None assert attempt.status == "verify_failed" assert fake_runner._exec_recipe.await_count == 1 @pytest.mark.asyncio async def test_pushed_and_pr_opened_on_success(db_only, tmp_path): git_url = _make_origin_repo(tmp_path) workspace = WorkspaceManager(tmp_path / "ws") job_id, finding_id = _seed_project_and_job( db_only, project_name="demo", git_url=git_url, findings=[{ "kind": "lint", "severity": "warn", "code": "F401", "file": "src/app.py", "line": 1, "message": "x", }], ) fake_runner = MagicMock() fake_runner._exec_recipe = AsyncMock(return_value=(0, False)) p, claw, gitea = _patcher_with_mocks(db_only, workspace, runner=fake_runner) good_diff = _diff_for("src/app.py", old="hello\nworld", new="goodbye\nworld") claw.turn.return_value = { "events": [{"type": "text", "content": json.dumps({ "diff": good_diff, "explanation": "tiny fix", "confidence": "high" })}] } attempt = await p.maybe_draft(job_id, finding_id=finding_id) assert attempt is not None, "expected a PatchAttempt" assert attempt.status == "pr_opened", f"unexpected: {attempt.status} / {attempt.error}" assert attempt.pr_url == "http://git.example.com/X/Y/pulls/1" assert attempt.branch_name and "crafting-table/auto/" in attempt.branch_name assert gitea.open_pr.await_count == 1 @pytest.mark.asyncio async def test_max_attempts_per_finding(db_only, tmp_path): git_url = _make_origin_repo(tmp_path) workspace = WorkspaceManager(tmp_path / "ws") job_id, finding_id = _seed_project_and_job( db_only, project_name="demo", git_url=git_url, findings=[{ "kind": "lint", "severity": "warn", "code": "F401", "file": "src/app.py", "line": 1, "message": "x", }], ) # Pre-seed three failed attempts so the 4th early-exits. for i in range(1, 4): db_only.insert_patch_attempt( finding_id=finding_id, job_id=job_id, project_name="demo", attempt_number=i, status="apply_failed", ) p, claw, gitea = _patcher_with_mocks(db_only, workspace) attempt = await p.maybe_draft(job_id, finding_id=finding_id) assert attempt is not None assert attempt.status == "max_attempts_exceeded" assert claw.create_session.await_count == 0 @pytest.mark.asyncio async def test_clawdforge_session_always_closes_on_exception(db_only, tmp_path): git_url = _make_origin_repo(tmp_path) workspace = WorkspaceManager(tmp_path / "ws") job_id, finding_id = _seed_project_and_job( db_only, project_name="demo", git_url=git_url, findings=[{ "kind": "lint", "severity": "warn", "code": "F401", "file": "src/app.py", "line": 1, "message": "x", }], ) p, claw, gitea = _patcher_with_mocks(db_only, workspace) claw.turn.side_effect = RuntimeError("simulated network blip") attempt = await p.maybe_draft(job_id, finding_id=finding_id) assert attempt is not None assert attempt.status == "failed" # Session was created and then closed even though turn raised. assert claw.create_session.await_count == 1 assert claw.close_session.await_count == 1 @pytest.mark.asyncio async def test_runner_invokes_patcher_when_auto_patch_true(client, tmp_path): """Integration: the runner's post-job hook calls patcher.maybe_draft_for_job when project.notify.auto_patch=true and there are actionable findings. """ tc, ctx = client server = ctx["server"] # Build + inject a stub patcher BEFORE we kick the job. The real # _maybe_auto_patch_hook closes over server.patcher at call time. stub_patcher = MagicMock() stub_patcher.maybe_draft_for_job = AsyncMock(return_value=[]) server.patcher = stub_patcher # Make a tiny git repo so the runner can clone+worktree. if shutil.which("git") is None: pytest.skip("git not available") repo = tmp_path / "fixture-repo" repo.mkdir() subprocess.run(["git", "init", "-q", "-b", "main"], cwd=repo, check=True) subprocess.run(["git", "config", "user.email", "t@e"], cwd=repo, check=True) subprocess.run(["git", "config", "user.name", "t"], cwd=repo, check=True) subprocess.run(["git", "config", "commit.gpgsign", "false"], cwd=repo, check=True) (repo / "README.md").write_text("hi\n") subprocess.run(["git", "add", "."], cwd=repo, check=True) subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=repo, check=True) git_url = str(repo) # Register a project with notify.auto_patch=true and a lint that emits # ruff-shaped JSON so the parser picks up an actionable finding. ruff_stub = json.dumps([{ "code": "F401", "message": "'os' imported", "filename": "src/app.py", "location": {"row": 3, "column": 1}, }]) payload = { "name": "ct-autopatch-on", "git_url": git_url, "default_branch": "main", "languages": ["python"], "subprojects": [{ "path": ".", "language": "python", "lint": f"echo '{ruff_stub}'; exit 1", "timeout_secs": 20, }], "schedule": {}, "notify": {"email": ["x@y"], "on": [], "auto_patch": True}, } r = tc.post( "/projects", headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, json=payload, ) assert r.status_code == 200, r.text r2 = tc.post( "/projects/ct-autopatch-on/jobs", headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, json={"recipe": "lint"}, ) assert r2.status_code == 200, r2.text job_id = r2.json()["job_id"] # Wait for terminal. deadline = time.monotonic() + 30 while time.monotonic() < deadline: rr = tc.get( f"/jobs/{job_id}", headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, ) if rr.json()["job"]["status"] in ("succeeded", "failed", "timed_out", "cancelled"): break time.sleep(0.1) # Hook fan-out is fire-and-forget; let the loop turn once more. time.sleep(0.2) # Patcher.maybe_draft_for_job should have been called at least once. assert stub_patcher.maybe_draft_for_job.await_count >= 1 @pytest.mark.asyncio async def test_runner_skips_patcher_when_auto_patch_false(client, tmp_path): tc, ctx = client server = ctx["server"] stub_patcher = MagicMock() stub_patcher.maybe_draft_for_job = AsyncMock(return_value=[]) server.patcher = stub_patcher if shutil.which("git") is None: pytest.skip("git not available") repo = tmp_path / "fixture-repo-off" repo.mkdir() subprocess.run(["git", "init", "-q", "-b", "main"], cwd=repo, check=True) subprocess.run(["git", "config", "user.email", "t@e"], cwd=repo, check=True) subprocess.run(["git", "config", "user.name", "t"], cwd=repo, check=True) subprocess.run(["git", "config", "commit.gpgsign", "false"], cwd=repo, check=True) (repo / "README.md").write_text("hi\n") subprocess.run(["git", "add", "."], cwd=repo, check=True) subprocess.run(["git", "commit", "-q", "-m", "init"], cwd=repo, check=True) git_url = str(repo) ruff_stub = json.dumps([{ "code": "F401", "message": "x", "filename": "src/app.py", "location": {"row": 3, "column": 1}, }]) payload = { "name": "ct-autopatch-off", "git_url": git_url, "default_branch": "main", "languages": ["python"], "subprojects": [{ "path": ".", "language": "python", "lint": f"echo '{ruff_stub}'; exit 1", "timeout_secs": 20, }], "schedule": {}, "notify": {"email": ["x@y"], "on": [], "auto_patch": False}, } r = tc.post( "/projects", headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, json=payload, ) assert r.status_code == 200, r.text r2 = tc.post( "/projects/ct-autopatch-off/jobs", headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, json={"recipe": "lint"}, ) assert r2.status_code == 200, r2.text job_id = r2.json()["job_id"] deadline = time.monotonic() + 30 while time.monotonic() < deadline: rr = tc.get( f"/jobs/{job_id}", headers={"Authorization": f"Bearer {ctx['alpha_bearer']}"}, ) if rr.json()["job"]["status"] in ("succeeded", "failed", "timed_out", "cancelled"): break time.sleep(0.1) time.sleep(0.2) assert stub_patcher.maybe_draft_for_job.await_count == 0