patcher: robust extract_diff_json — handles 5 model-output shapes

The 4 patcher-fired-but-malformed_response failures showed extract_diff_json
was too strict: it required {"diff": "..."} as the top-level JSON shape
with at most 1 brace nesting depth (regex-based). Real model output
varies more.

Now handles:
1. Bare JSON {"diff", "explanation", "confidence"}
2. Fenced JSON: ```json {…} ```
3. Fenced diff + prose: ```diff …unified diff… ``` + loose explanation
4. Bare unified diff (no JSON wrapper, no fence)
5. JSON with deeply-nested {} inside the diff string (struct literals,
   function bodies)

Fixes:
- Replaced regex-based balanced-{} matcher (capped at depth 1) with a
  string-aware depth-tracking generator that handles arbitrary nesting
  + skips brace chars inside JSON string literals
- Walk all fenced blocks not just the first; recognize ```diff and
  ```patch language tags
- Fall back to fenced-diff-with-prose construction when no JSON form
  matches — synthetic payload with surrounding text as explanation
- Final fallback for bare unified diffs (no fence, no wrapper) using a
  simple line-prefix detector
- Normalize alternate keys (patch, content, diff_text → diff)
- Always set confidence (defaults to medium when absent, low for bare
  diffs that have no model commentary)

Tests: 16 → 20 (5 new shape coverage tests). All green.
This commit is contained in:
Kayos 2026-04-29 15:37:24 -07:00
parent 80c4eebf3b
commit 3273d66003
2 changed files with 206 additions and 28 deletions

View file

@ -285,52 +285,175 @@ def turn_text(payload: dict) -> str:
return "".join(parts)
_JSON_BLOCK_RE = re.compile(r"\{(?:[^{}]|\{[^{}]*\})*\}", re.DOTALL)
# Match all fenced code blocks with their language tag. Greedy + DOTALL so
# the body can be multi-line. Captures (lang, body).
_FENCE_BLOCK_RE = re.compile(
r"```(\w*)\s*\n(.*?)```",
re.DOTALL,
)
# A unified diff starts with one of these patterns somewhere — used to
# detect "is this body a raw diff" without a fence.
_DIFF_PREFIX_RE = re.compile(
r"^(?:diff --git |--- |\+\+\+ |Index: |@@ )",
re.MULTILINE,
)
def _balanced_json_objects(text: str) -> Iterable[str]:
"""Yield each top-level balanced ``{...}`` JSON candidate in *text*.
Walks the string brace-by-brace tracking depth handles arbitrary
nesting (the regex form was capped at depth 1 which broke on diffs
that contain struct literals etc.). Skips brace chars inside strings
so {"diff": "fn x() { 1 }"} doesn't break the depth counter.
"""
depth = 0
start = -1
in_str = False
escape = False
for i, ch in enumerate(text):
if escape:
escape = False
continue
if in_str:
if ch == "\\":
escape = True
elif ch == '"':
in_str = False
continue
if ch == '"':
in_str = True
continue
if ch == "{":
if depth == 0:
start = i
depth += 1
elif ch == "}":
depth -= 1
if depth == 0 and start >= 0:
yield text[start : i + 1]
start = -1
def extract_diff_json(text: str) -> dict[str, Any] | None:
"""Pull the ``{"diff": ..., "explanation": ..., "confidence": ...}``
object out of the model's text reply.
"""Pull the patcher's expected ``{"diff", "explanation", "confidence"}``
payload out of an Opus / Sonnet / agent response.
The prompt asks for "JSON ONLY", but real-world models leak the
occasional fence (```json ... ```) or trailing prose. We strip code
fences first, then try the whole string, then walk substrings for
the first JSON object that has a ``diff`` field.
The prompt asks for "JSON ONLY" but in practice models return any of:
1. **Bare JSON** what the prompt asked for.
2. **Fenced JSON** ```` ```json {} ``` ````.
3. **Fenced diff + prose** ``` ```diff unified diff ``` ``` plus
loose explanation text. No JSON wrapper.
4. **Bare unified diff** leading lines like ``diff --git`` or
``--- a/foo`` with no fences and no JSON at all.
Strategy:
a. Try the whole string as JSON; accept if it has a ``diff`` field.
b. Walk all fenced blocks; if any is JSON-with-diff, accept.
c. Walk balanced ``{}`` substrings (depth-aware, string-aware) and
accept the first one with a ``diff`` field.
d. Fall back to scanning fenced ``diff`` blocks; if found, build a
synthetic payload using the prose around the fence as the
explanation. Confidence defaults to ``"medium"``.
e. Final fallback: if the entire body looks like a raw unified diff,
wrap it the same way as (d).
Returns ``None`` only when none of the above produces a usable diff.
"""
cleaned = text.strip()
# Strip a leading ``` fence (with optional language tag) and trailing ```
if cleaned.startswith("```"):
# Drop the first line (fence) and find the closing fence.
lines = cleaned.splitlines()
if len(lines) >= 2:
# First line is fence; the closing ``` may be on its own line.
body_lines = []
for ln in lines[1:]:
if ln.strip() == "```":
break
body_lines.append(ln)
cleaned = "\n".join(body_lines).strip()
if not cleaned:
return None
# Fast path
# (a) Bare JSON
try:
obj = json.loads(cleaned)
if isinstance(obj, dict) and "diff" in obj:
return obj
if isinstance(obj, dict):
cand = _normalize_diff_payload(obj)
if cand.get("diff"):
return cand
except (ValueError, TypeError):
pass
# Fallback: scan for a balanced JSON object that contains "diff"
for m in _JSON_BLOCK_RE.finditer(cleaned):
chunk = m.group(0)
# (b) Walk fenced blocks for JSON-with-diff
fenced = list(_FENCE_BLOCK_RE.finditer(cleaned))
diff_fenced: list[str] = []
for m in fenced:
lang = (m.group(1) or "").lower().strip()
body = m.group(2).strip()
if lang in ("json", "", "javascript", "js"):
try:
obj = json.loads(body)
if isinstance(obj, dict):
cand = _normalize_diff_payload(obj)
if cand.get("diff"):
return cand
except (ValueError, TypeError):
pass
if lang in ("diff", "patch", "unified-diff"):
diff_fenced.append(body)
# (c) Balanced JSON substring scan, depth + string aware
for chunk in _balanced_json_objects(cleaned):
try:
obj = json.loads(chunk)
except (ValueError, TypeError):
continue
if isinstance(obj, dict) and "diff" in obj:
return obj
if isinstance(obj, dict):
cand = _normalize_diff_payload(obj)
if cand.get("diff"):
return cand
# (d) Fenced diff block(s) — synthesize the payload
if diff_fenced:
diff_text = diff_fenced[0]
explanation = _strip_fenced_blocks(cleaned).strip()[:500]
return {
"diff": diff_text,
"explanation": explanation or "diff extracted from fenced ```diff block",
"confidence": "medium",
}
# (e) Bare unified diff at the top level
if _DIFF_PREFIX_RE.search(cleaned):
return {
"diff": cleaned,
"explanation": "raw unified diff (no JSON wrapper, no code fence)",
"confidence": "low",
}
return None
def _normalize_diff_payload(obj: dict[str, Any]) -> dict[str, Any]:
"""Coerce a found JSON object to the canonical patcher shape.
Models occasionally return the diff under alt keys (``patch``,
``content``) or with extra metadata. We normalize the keys we use and
leave the rest in place for diagnostics. ``confidence`` defaults to
``"medium"`` if unset.
"""
out = dict(obj)
if "diff" not in out:
for alt in ("patch", "content", "diff_text"):
if isinstance(out.get(alt), str):
out["diff"] = out[alt]
break
out.setdefault("explanation", "")
out.setdefault("confidence", "medium")
return out
def _strip_fenced_blocks(text: str) -> str:
"""Return *text* with all ```…``` fenced blocks removed.
Used to recover prose-around-code as the explanation field when we
extract a raw fenced diff (case d above).
"""
return _FENCE_BLOCK_RE.sub("", text)
# --- Gitea wire wrapper -----------------------------------------------------

View file

@ -205,7 +205,12 @@ def test_findings_were_actionable_cve():
def test_extract_diff_json_plain():
obj = extract_diff_json('{"diff": "x", "explanation": "y"}')
assert obj == {"diff": "x", "explanation": "y"}
# Parser normalizes — confidence defaults to "medium" when absent so
# downstream code can rely on the field always being present.
assert obj is not None
assert obj["diff"] == "x"
assert obj["explanation"] == "y"
assert obj["confidence"] == "medium"
def test_extract_diff_json_fenced():
@ -218,6 +223,56 @@ def test_extract_diff_json_returns_none_on_garbage():
assert extract_diff_json("not even json") is None
def test_extract_diff_json_fenced_diff_block():
"""Real-world Opus shape: prose + a fenced ```diff block, no JSON wrapper."""
text = (
"Here is the fix:\n\n"
"```diff\n"
"--- a/src/lib.rs\n"
"+++ b/src/lib.rs\n"
"@@ -1 +1 @@\n"
"-old\n"
"+new\n"
"```\n\n"
"That should resolve the off-by-one."
)
obj = extract_diff_json(text)
assert obj is not None
assert "lib.rs" in obj["diff"]
assert "off-by-one" in obj["explanation"]
def test_extract_diff_json_bare_unified_diff():
"""No fence, no JSON wrapper — just the diff body."""
text = "--- a/x\n+++ b/x\n@@ -1 +1 @@\n-old\n+new\n"
obj = extract_diff_json(text)
assert obj is not None
assert obj["diff"].rstrip() == text.rstrip() # parser strips trailing whitespace; semantic equivalence
assert obj["confidence"] == "low" # bare diff is low-confidence — no model commentary to weigh
def test_extract_diff_json_deeply_nested_braces_in_diff():
"""The old regex was capped at one level of brace nesting; real diffs
contain struct literals etc. with arbitrary depth."""
deep = (
'{"diff": "--- a/x.rs\\n+++ b/x.rs\\n@@\\n'
'-fn x() { Some(Foo { a: 1 }) }\\n'
'+fn x() { Some(Foo { a: 2 }) }", '
'"explanation": "depth-2 nesting", "confidence": "high"}'
)
obj = extract_diff_json(deep)
assert obj is not None
assert obj["explanation"] == "depth-2 nesting"
def test_extract_diff_json_alt_key():
"""Models sometimes use 'patch' instead of 'diff'."""
obj = extract_diff_json('{"patch": "--- a\\n+++ b\\n@@\\n-x\\n+y", "explanation": "via alt key"}')
assert obj is not None
# Normalizer copies the alt key into the canonical 'diff' field
assert obj["diff"].startswith("--- a")
def test_turn_text_concatenates_text_events():
assert turn_text({"events": [
{"type": "text", "content": "hello "},