crafting-table/crafting_table/parsers/rust.py
Kayos d467b2f5be v0.1 wave 2A (steps 5+6): per-language parsers + findings extraction
- parsers/ package: rust / python / go / typescript / generic
- parser registry with language+recipe -> fallback resolution
- fingerprint hash (kind+file+line+code) for cross-run dedup
- runner.py post-exec hook: parse log, persist findings, count on job row
  (extraction runs before mark_job_finished so callers polling on terminal
  status see findings_count populated atomically)
- db.insert_finding / list_findings / increment_findings_count DAOs already
  shipped in wave 1; wired here
- GET /jobs/{id}/findings now returns real data (server route already
  shipped; was returning empty list because nothing populated the table)
- tests/test_parsers/: 6 modules + 11 fixtures (rust/python/go/typescript)
- tests/test_runner_findings.py: 3 integration tests
- README: tick steps 2-6, add Findings section

Suite: 108 passing (62 wave-1 + 46 new).
Spec: memory/spec-crafting-table.md
2026-04-29 08:36:16 -07:00

235 lines
8.4 KiB
Python

"""Rust parser — clippy / cargo audit / cargo test.
Recipes handled:
- ``audit`` → cargo audit --json envelope → list of CVE findings
- ``lint`` → cargo clippy --message-format=json (NDJSON) → lint findings
- ``test`` → cargo test human output (no good machine format) → failures
- ``build`` → falls through to the generic recipe_fail behavior because
build success/failure is captured by exit_code alone; structured build
errors come through clippy on the lint recipe.
Each branch degrades gracefully: malformed JSON → empty findings, not
crash. The runner logs the parse failure and still records the job as
finished.
"""
from __future__ import annotations
import json
import re
from .base import Finding, _iter_jsonl, _safe_json_loads
class RustParser:
@classmethod
def matches(cls, language: str, recipe: str) -> bool:
return language == "rust" and recipe in {"audit", "lint", "test", "build"}
@classmethod
def parse(cls, raw_log: str, exit_code: int, recipe: str) -> list[Finding]:
if recipe == "audit":
return cls._parse_audit(raw_log)
if recipe == "lint":
return cls._parse_clippy(raw_log)
if recipe == "test":
return cls._parse_test(raw_log, exit_code)
# build: defer to generic-style behaviour. We don't try to parse
# cargo build output here; lint + clippy is the structured channel.
if exit_code != 0:
return [
Finding(
kind="recipe_fail",
severity="warn",
code=f"exit_{exit_code}",
message=f"cargo build exited with status {exit_code}",
)
]
return []
# ---- audit -------------------------------------------------------------
@classmethod
def _parse_audit(cls, raw_log: str) -> list[Finding]:
"""cargo-audit emits a single JSON envelope on stdout when invoked
with --json. Shape:
{"vulnerabilities": {"list": [{"package": {...}, "advisory": {...},
"versions": {"patched": [...]}}, ...]}}
We extract the JSON object substring (the recipe usually echoes
other text first) and pull each vulnerability out.
"""
envelope = _extract_json_object(raw_log)
if envelope is None:
return []
vulns = (envelope.get("vulnerabilities") or {}).get("list") or []
out: list[Finding] = []
for v in vulns:
pkg = (v.get("package") or {}).get("name") or "?"
ver = (v.get("package") or {}).get("version") or "?"
adv = v.get("advisory") or {}
adv_id = adv.get("id") or "RUSTSEC-?"
title = adv.get("title") or adv.get("description") or "advisory"
patched = (v.get("versions") or {}).get("patched") or []
patched_str = ", ".join(patched) if patched else "no fix available"
out.append(
Finding(
kind="cve",
severity="high",
code=adv_id,
message=f"{pkg} {ver}: {title} — patched in {patched_str}",
suggested_fix=(
f"bump {pkg} to {patched[0]}" if patched else None
),
raw_json=json.dumps(v),
extras={
"package": pkg,
"version": ver,
"fixed_in": patched,
"advisory": adv_id,
},
)
)
return out
# ---- clippy ------------------------------------------------------------
@classmethod
def _parse_clippy(cls, raw_log: str) -> list[Finding]:
"""cargo clippy --message-format=json emits NDJSON. Each line is a
cargo build-message; the ones we care about have:
reason == "compiler-message"
message.level in {"warning", "error"}
"""
out: list[Finding] = []
for obj in _iter_jsonl(raw_log):
if not isinstance(obj, dict):
continue
if obj.get("reason") != "compiler-message":
continue
msg = obj.get("message") or {}
level = msg.get("level")
if level not in {"warning", "error"}:
continue
code_obj = msg.get("code") or {}
code = code_obj.get("code") if isinstance(code_obj, dict) else None
spans = msg.get("spans") or []
primary = next(
(s for s in spans if s.get("is_primary")),
spans[0] if spans else None,
)
file = primary.get("file_name") if primary else None
line = primary.get("line_start") if primary else None
children = msg.get("children") or []
suggested = None
for ch in children:
rendered = ch.get("rendered")
if rendered:
suggested = rendered
break
severity = "error" if level == "error" else "warn"
out.append(
Finding(
kind="lint",
severity=severity,
file=file,
line=line,
code=code,
message=msg.get("message") or "",
suggested_fix=suggested,
raw_json=json.dumps(obj),
)
)
return out
# ---- test --------------------------------------------------------------
_TEST_FAIL_RE = re.compile(r"^\s*test\s+(\S+)\s+\.{3}\s+FAILED\s*$")
_FAILURES_RE = re.compile(r"^\s*failures:\s*$")
@classmethod
def _parse_test(cls, raw_log: str, exit_code: int) -> list[Finding]:
"""cargo test prints human-formatted output by default. Two reliable
signals:
1. ``test foo::bar ... FAILED`` lines from the runner.
2. ``failures:`` block listing the failed tests indented.
We collect the FAILED line names since they appear once per failure
and are the cleanest extraction. exit_code == 0 means no failures.
"""
if exit_code == 0:
return []
names: list[str] = []
for raw_line in raw_log.splitlines():
m = cls._TEST_FAIL_RE.match(raw_line)
if m:
names.append(m.group(1))
# Dedup while preserving order.
seen: set[str] = set()
unique: list[str] = []
for n in names:
if n in seen:
continue
seen.add(n)
unique.append(n)
if not unique:
return [
Finding(
kind="test_fail",
severity="error",
code=f"exit_{exit_code}",
message=(
f"cargo test exited {exit_code} but no FAILED lines "
f"detected; check log"
),
)
]
return [
Finding(
kind="test_fail",
severity="error",
code=name,
message=f"test {name} failed",
)
for name in unique
]
def _extract_json_object(text: str) -> dict | None:
"""Pull the first balanced ``{...}`` block out of ``text`` and json.loads it.
cargo-audit's --json output is a single object but the recipe shell
might echo a banner before/after. Scan for the first '{' and walk
braces (string-aware) to find its match. Falls back to None.
"""
start = text.find("{")
while start != -1:
depth = 0
in_str = False
esc = False
for i in range(start, len(text)):
c = text[i]
if in_str:
if esc:
esc = False
elif c == "\\":
esc = True
elif c == '"':
in_str = False
continue
if c == '"':
in_str = True
elif c == "{":
depth += 1
elif c == "}":
depth -= 1
if depth == 0:
candidate = text[start : i + 1]
parsed = _safe_json_loads(candidate)
if isinstance(parsed, dict):
return parsed
break
start = text.find("{", start + 1)
return None