From 37d7d60a8b4fde11cf6976f38d582ef890114fa6 Mon Sep 17 00:00:00 2001 From: Kayos Date: Thu, 30 Apr 2026 22:50:12 -0700 Subject: [PATCH] =?UTF-8?q?forge:=20tolerant=20JSON=20parsing=20=E2=80=94?= =?UTF-8?q?=20extract=20first=20object=20even=20if=20Sonnet=20appends=20ex?= =?UTF-8?q?tra=20prose?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Job 3 hit one recipe (healthy-chicken-stir-fry-with-vegetables) where Sonnet returned a valid JSON object + appended prose afterward, in violation of 'no prose' rule. json.loads choked with 'Extra data: line 54'. _parse_json_blob now falls back to JSONDecoder.raw_decode which extracts the first complete JSON value and ignores anything after — any trailing notes / fence remnants / inline commentary get silently dropped. Plain json.loads is still tried first (fastest path on clean output). The fallback only kicks in for malformed-but-recoverable cases. --- cauldron/forge.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/cauldron/forge.py b/cauldron/forge.py index d2e1afa..68d1292 100644 --- a/cauldron/forge.py +++ b/cauldron/forge.py @@ -1204,11 +1204,28 @@ def _extract_plan_payload(forge_result: dict) -> tuple[list, str]: def _parse_json_blob(s: str): + """Parse the FIRST balanced JSON value out of a string. Tolerates Sonnet + appending extra prose/notes after the JSON object (which violates the + 'no prose' rule but happens occasionally). Also strips ```json fences.""" s = s.strip() # Strip code fences if Sonnet wrapped its output s = re.sub(r"^```(?:json)?\s*", "", s) s = re.sub(r"\s*```$", "", s) try: + # Plain decode first — fastest path when output is clean return json.loads(s) + except Exception: + pass + # Fall back to raw_decode which extracts the first JSON value and + # tells us where it ended. Anything after gets ignored. Handles the + # "Extra data: line 54" failure mode where Sonnet appended notes. + try: + decoder = json.JSONDecoder() + # Skip any leading whitespace before scanning + idx = 0 + while idx < len(s) and s[idx] in " \t\n\r": + idx += 1 + obj, _end = decoder.raw_decode(s[idx:]) + return obj except Exception as e: raise ForgeError(f"could not parse model JSON: {e}; head={s[:200]!r}") from e