patcher: robust extract_diff_json — handles 5 model-output shapes

The 4 patcher-fired-but-malformed_response failures showed extract_diff_json was too strict: it required {"diff": "..."} as the top-level JSON shape with at most 1 brace nesting depth (regex-based). Real model output varies more. Now handles: 1. Bare JSON {"diff", "explanation", "confidence"} 2. Fenced JSON: ```json {…} ``` 3. Fenced diff + prose: ```diff …unified diff… ``` + loose explanation 4. Bare unified diff (no JSON wrapper, no fence) 5. JSON with deeply-nested {} inside the diff string (struct literals, function bodies) Fixes: - Replaced regex-based balanced-{} matcher (capped at depth 1) with a string-aware depth-tracking generator that handles arbitrary nesting + skips brace chars inside JSON string literals - Walk all fenced blocks not just the first; recognize ```diff and ```patch language tags - Fall back to fenced-diff-with-prose construction when no JSON form matches — synthetic payload with surrounding text as explanation - Final fallback for bare unified diffs (no fence, no wrapper) using a simple line-prefix detector - Normalize alternate keys (patch, content, diff_text → diff) - Always set confidence (defaults to medium when absent, low for bare diffs that have no model commentary) Tests: 16 → 20 (5 new shape coverage tests). All green.
2026-04-29 15:37:24 -07:00 · 2026-04-29 15:37:24 -07:00 · 3273d66003
commit 3273d66003
parent 80c4eebf3b
2 changed files with 206 additions and 28 deletions
--- a/crafting_table/patcher.py
+++ b/crafting_table/patcher.py
@ -285,52 +285,175 @@ def turn_text(payload: dict) -> str:
    return "".join(parts)


-_JSON_BLOCK_RE = re.compile(r"\{(?:[^{}]|\{[^{}]*\})*\}", re.DOTALL)
+# Match all fenced code blocks with their language tag. Greedy + DOTALL so
+# the body can be multi-line. Captures (lang, body).
+_FENCE_BLOCK_RE = re.compile(
+    r"```(\w*)\s*\n(.*?)```",
+    re.DOTALL,
+)
+
+# A unified diff starts with one of these patterns somewhere — used to
+# detect "is this body a raw diff" without a fence.
+_DIFF_PREFIX_RE = re.compile(
+    r"^(?:diff --git |--- |\+\+\+ |Index: |@@ )",
+    re.MULTILINE,
+)
+
+
+def _balanced_json_objects(text: str) -> Iterable[str]:
+    """Yield each top-level balanced ``{...}`` JSON candidate in *text*.
+
+    Walks the string brace-by-brace tracking depth — handles arbitrary
+    nesting (the regex form was capped at depth 1 which broke on diffs
+    that contain struct literals etc.). Skips brace chars inside strings
+    so {"diff": "fn x() { 1 }"} doesn't break the depth counter.
+    """
+    depth = 0
+    start = -1
+    in_str = False
+    escape = False
+    for i, ch in enumerate(text):
+        if escape:
+            escape = False
+            continue
+        if in_str:
+            if ch == "\\":
+                escape = True
+            elif ch == '"':
+                in_str = False
+            continue
+        if ch == '"':
+            in_str = True
+            continue
+        if ch == "{":
+            if depth == 0:
+                start = i
+            depth += 1
+        elif ch == "}":
+            depth -= 1
+            if depth == 0 and start >= 0:
+                yield text[start : i + 1]
+                start = -1


 def extract_diff_json(text: str) -> dict[str, Any] | None:
-    """Pull the ``{"diff": ..., "explanation": ..., "confidence": ...}``
-    object out of the model's text reply.
+    """Pull the patcher's expected ``{"diff", "explanation", "confidence"}``
+    payload out of an Opus / Sonnet / agent response.

-    The prompt asks for "JSON ONLY", but real-world models leak the
-    occasional fence (```json ... ```) or trailing prose. We strip code
-    fences first, then try the whole string, then walk substrings for
-    the first JSON object that has a ``diff`` field.
+    The prompt asks for "JSON ONLY" but in practice models return any of:
+
+    1. **Bare JSON** — what the prompt asked for.
+    2. **Fenced JSON** — ```` ```json {…} ``` ````.
+    3. **Fenced diff + prose** — ``` ```diff …unified diff… ``` ``` plus
+       loose explanation text. No JSON wrapper.
+    4. **Bare unified diff** — leading lines like ``diff --git`` or
+       ``--- a/foo`` with no fences and no JSON at all.
+
+    Strategy:
+      a. Try the whole string as JSON; accept if it has a ``diff`` field.
+      b. Walk all fenced blocks; if any is JSON-with-diff, accept.
+      c. Walk balanced ``{…}`` substrings (depth-aware, string-aware) and
+         accept the first one with a ``diff`` field.
+      d. Fall back to scanning fenced ``diff`` blocks; if found, build a
+         synthetic payload using the prose around the fence as the
+         explanation. Confidence defaults to ``"medium"``.
+      e. Final fallback: if the entire body looks like a raw unified diff,
+         wrap it the same way as (d).
+
+    Returns ``None`` only when none of the above produces a usable diff.
    """
    cleaned = text.strip()
-    # Strip a leading ``` fence (with optional language tag) and trailing ```
-    if cleaned.startswith("```"):
-        # Drop the first line (fence) and find the closing fence.
-        lines = cleaned.splitlines()
-        if len(lines) >= 2:
-            # First line is fence; the closing ``` may be on its own line.
-            body_lines = []
-            for ln in lines[1:]:
-                if ln.strip() == "```":
-                    break
-                body_lines.append(ln)
-            cleaned = "\n".join(body_lines).strip()
+    if not cleaned:
+        return None

-    # Fast path
+    # (a) Bare JSON
    try:
        obj = json.loads(cleaned)
-        if isinstance(obj, dict) and "diff" in obj:
-            return obj
+        if isinstance(obj, dict):
+            cand = _normalize_diff_payload(obj)
+            if cand.get("diff"):
+                return cand
    except (ValueError, TypeError):
        pass

-    # Fallback: scan for a balanced JSON object that contains "diff"
-    for m in _JSON_BLOCK_RE.finditer(cleaned):
-        chunk = m.group(0)
+    # (b) Walk fenced blocks for JSON-with-diff
+    fenced = list(_FENCE_BLOCK_RE.finditer(cleaned))
+    diff_fenced: list[str] = []
+    for m in fenced:
+        lang = (m.group(1) or "").lower().strip()
+        body = m.group(2).strip()
+        if lang in ("json", "", "javascript", "js"):
+            try:
+                obj = json.loads(body)
+                if isinstance(obj, dict):
+                    cand = _normalize_diff_payload(obj)
+                    if cand.get("diff"):
+                        return cand
+            except (ValueError, TypeError):
+                pass
+        if lang in ("diff", "patch", "unified-diff"):
+            diff_fenced.append(body)
+
+    # (c) Balanced JSON substring scan, depth + string aware
+    for chunk in _balanced_json_objects(cleaned):
        try:
            obj = json.loads(chunk)
        except (ValueError, TypeError):
            continue
-        if isinstance(obj, dict) and "diff" in obj:
-            return obj
+        if isinstance(obj, dict):
+            cand = _normalize_diff_payload(obj)
+            if cand.get("diff"):
+                return cand
+
+    # (d) Fenced diff block(s) — synthesize the payload
+    if diff_fenced:
+        diff_text = diff_fenced[0]
+        explanation = _strip_fenced_blocks(cleaned).strip()[:500]
+        return {
+            "diff": diff_text,
+            "explanation": explanation or "diff extracted from fenced ```diff block",
+            "confidence": "medium",
+        }
+
+    # (e) Bare unified diff at the top level
+    if _DIFF_PREFIX_RE.search(cleaned):
+        return {
+            "diff": cleaned,
+            "explanation": "raw unified diff (no JSON wrapper, no code fence)",
+            "confidence": "low",
+        }
+
    return None


+def _normalize_diff_payload(obj: dict[str, Any]) -> dict[str, Any]:
+    """Coerce a found JSON object to the canonical patcher shape.
+
+    Models occasionally return the diff under alt keys (``patch``,
+    ``content``) or with extra metadata. We normalize the keys we use and
+    leave the rest in place for diagnostics. ``confidence`` defaults to
+    ``"medium"`` if unset.
+    """
+    out = dict(obj)
+    if "diff" not in out:
+        for alt in ("patch", "content", "diff_text"):
+            if isinstance(out.get(alt), str):
+                out["diff"] = out[alt]
+                break
+    out.setdefault("explanation", "")
+    out.setdefault("confidence", "medium")
+    return out
+
+
+def _strip_fenced_blocks(text: str) -> str:
+    """Return *text* with all ```…``` fenced blocks removed.
+
+    Used to recover prose-around-code as the explanation field when we
+    extract a raw fenced diff (case d above).
+    """
+    return _FENCE_BLOCK_RE.sub("", text)
+
+
 # --- Gitea wire wrapper -----------------------------------------------------


--- a/tests/test_patcher.py
+++ b/tests/test_patcher.py
@ -205,7 +205,12 @@ def test_findings_were_actionable_cve():

 def test_extract_diff_json_plain():
    obj = extract_diff_json('{"diff": "x", "explanation": "y"}')
-    assert obj == {"diff": "x", "explanation": "y"}
+    # Parser normalizes — confidence defaults to "medium" when absent so
+    # downstream code can rely on the field always being present.
+    assert obj is not None
+    assert obj["diff"] == "x"
+    assert obj["explanation"] == "y"
+    assert obj["confidence"] == "medium"


 def test_extract_diff_json_fenced():
@ -218,6 +223,56 @@ def test_extract_diff_json_returns_none_on_garbage():
    assert extract_diff_json("not even json") is None


+def test_extract_diff_json_fenced_diff_block():
+    """Real-world Opus shape: prose + a fenced ```diff block, no JSON wrapper."""
+    text = (
+        "Here is the fix:\n\n"
+        "```diff\n"
+        "--- a/src/lib.rs\n"
+        "+++ b/src/lib.rs\n"
+        "@@ -1 +1 @@\n"
+        "-old\n"
+        "+new\n"
+        "```\n\n"
+        "That should resolve the off-by-one."
+    )
+    obj = extract_diff_json(text)
+    assert obj is not None
+    assert "lib.rs" in obj["diff"]
+    assert "off-by-one" in obj["explanation"]
+
+
+def test_extract_diff_json_bare_unified_diff():
+    """No fence, no JSON wrapper — just the diff body."""
+    text = "--- a/x\n+++ b/x\n@@ -1 +1 @@\n-old\n+new\n"
+    obj = extract_diff_json(text)
+    assert obj is not None
+    assert obj["diff"].rstrip() == text.rstrip()  # parser strips trailing whitespace; semantic equivalence
+    assert obj["confidence"] == "low"  # bare diff is low-confidence — no model commentary to weigh
+
+
+def test_extract_diff_json_deeply_nested_braces_in_diff():
+    """The old regex was capped at one level of brace nesting; real diffs
+    contain struct literals etc. with arbitrary depth."""
+    deep = (
+        '{"diff": "--- a/x.rs\\n+++ b/x.rs\\n@@\\n'
+        '-fn x() { Some(Foo { a: 1 }) }\\n'
+        '+fn x() { Some(Foo { a: 2 }) }", '
+        '"explanation": "depth-2 nesting", "confidence": "high"}'
+    )
+    obj = extract_diff_json(deep)
+    assert obj is not None
+    assert obj["explanation"] == "depth-2 nesting"
+
+
+def test_extract_diff_json_alt_key():
+    """Models sometimes use 'patch' instead of 'diff'."""
+    obj = extract_diff_json('{"patch": "--- a\\n+++ b\\n@@\\n-x\\n+y", "explanation": "via alt key"}')
+    assert obj is not None
+    # Normalizer copies the alt key into the canonical 'diff' field
+    assert obj["diff"].startswith("--- a")
+
+
 def test_turn_text_concatenates_text_events():
    assert turn_text({"events": [
        {"type": "text", "content": "hello "},