patcher: robust extract_diff_json — handles 5 model-output shapes
The 4 patcher-fired-but-malformed_response failures showed extract_diff_json
was too strict: it required {"diff": "..."} as the top-level JSON shape
with at most 1 brace nesting depth (regex-based). Real model output
varies more.
Now handles:
1. Bare JSON {"diff", "explanation", "confidence"}
2. Fenced JSON: ```json {…} ```
3. Fenced diff + prose: ```diff …unified diff… ``` + loose explanation
4. Bare unified diff (no JSON wrapper, no fence)
5. JSON with deeply-nested {} inside the diff string (struct literals,
function bodies)
Fixes:
- Replaced regex-based balanced-{} matcher (capped at depth 1) with a
string-aware depth-tracking generator that handles arbitrary nesting
+ skips brace chars inside JSON string literals
- Walk all fenced blocks not just the first; recognize ```diff and
```patch language tags
- Fall back to fenced-diff-with-prose construction when no JSON form
matches — synthetic payload with surrounding text as explanation
- Final fallback for bare unified diffs (no fence, no wrapper) using a
simple line-prefix detector
- Normalize alternate keys (patch, content, diff_text → diff)
- Always set confidence (defaults to medium when absent, low for bare
diffs that have no model commentary)
Tests: 16 → 20 (5 new shape coverage tests). All green.
This commit is contained in:
parent
80c4eebf3b
commit
3273d66003
2 changed files with 206 additions and 28 deletions
|
|
@ -205,7 +205,12 @@ def test_findings_were_actionable_cve():
|
|||
|
||||
def test_extract_diff_json_plain():
|
||||
obj = extract_diff_json('{"diff": "x", "explanation": "y"}')
|
||||
assert obj == {"diff": "x", "explanation": "y"}
|
||||
# Parser normalizes — confidence defaults to "medium" when absent so
|
||||
# downstream code can rely on the field always being present.
|
||||
assert obj is not None
|
||||
assert obj["diff"] == "x"
|
||||
assert obj["explanation"] == "y"
|
||||
assert obj["confidence"] == "medium"
|
||||
|
||||
|
||||
def test_extract_diff_json_fenced():
|
||||
|
|
@ -218,6 +223,56 @@ def test_extract_diff_json_returns_none_on_garbage():
|
|||
assert extract_diff_json("not even json") is None
|
||||
|
||||
|
||||
def test_extract_diff_json_fenced_diff_block():
|
||||
"""Real-world Opus shape: prose + a fenced ```diff block, no JSON wrapper."""
|
||||
text = (
|
||||
"Here is the fix:\n\n"
|
||||
"```diff\n"
|
||||
"--- a/src/lib.rs\n"
|
||||
"+++ b/src/lib.rs\n"
|
||||
"@@ -1 +1 @@\n"
|
||||
"-old\n"
|
||||
"+new\n"
|
||||
"```\n\n"
|
||||
"That should resolve the off-by-one."
|
||||
)
|
||||
obj = extract_diff_json(text)
|
||||
assert obj is not None
|
||||
assert "lib.rs" in obj["diff"]
|
||||
assert "off-by-one" in obj["explanation"]
|
||||
|
||||
|
||||
def test_extract_diff_json_bare_unified_diff():
|
||||
"""No fence, no JSON wrapper — just the diff body."""
|
||||
text = "--- a/x\n+++ b/x\n@@ -1 +1 @@\n-old\n+new\n"
|
||||
obj = extract_diff_json(text)
|
||||
assert obj is not None
|
||||
assert obj["diff"].rstrip() == text.rstrip() # parser strips trailing whitespace; semantic equivalence
|
||||
assert obj["confidence"] == "low" # bare diff is low-confidence — no model commentary to weigh
|
||||
|
||||
|
||||
def test_extract_diff_json_deeply_nested_braces_in_diff():
|
||||
"""The old regex was capped at one level of brace nesting; real diffs
|
||||
contain struct literals etc. with arbitrary depth."""
|
||||
deep = (
|
||||
'{"diff": "--- a/x.rs\\n+++ b/x.rs\\n@@\\n'
|
||||
'-fn x() { Some(Foo { a: 1 }) }\\n'
|
||||
'+fn x() { Some(Foo { a: 2 }) }", '
|
||||
'"explanation": "depth-2 nesting", "confidence": "high"}'
|
||||
)
|
||||
obj = extract_diff_json(deep)
|
||||
assert obj is not None
|
||||
assert obj["explanation"] == "depth-2 nesting"
|
||||
|
||||
|
||||
def test_extract_diff_json_alt_key():
|
||||
"""Models sometimes use 'patch' instead of 'diff'."""
|
||||
obj = extract_diff_json('{"patch": "--- a\\n+++ b\\n@@\\n-x\\n+y", "explanation": "via alt key"}')
|
||||
assert obj is not None
|
||||
# Normalizer copies the alt key into the canonical 'diff' field
|
||||
assert obj["diff"].startswith("--- a")
|
||||
|
||||
|
||||
def test_turn_text_concatenates_text_events():
|
||||
assert turn_text({"events": [
|
||||
{"type": "text", "content": "hello "},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue