- parsers/ package: rust / python / go / typescript / generic
- parser registry with language+recipe -> fallback resolution
- fingerprint hash (kind+file+line+code) for cross-run dedup
- runner.py post-exec hook: parse log, persist findings, count on job row
(extraction runs before mark_job_finished so callers polling on terminal
status see findings_count populated atomically)
- db.insert_finding / list_findings / increment_findings_count DAOs already
shipped in wave 1; wired here
- GET /jobs/{id}/findings now returns real data (server route already
shipped; was returning empty list because nothing populated the table)
- tests/test_parsers/: 6 modules + 11 fixtures (rust/python/go/typescript)
- tests/test_runner_findings.py: 3 integration tests
- README: tick steps 2-6, add Findings section
Suite: 108 passing (62 wave-1 + 46 new).
Spec: memory/spec-crafting-table.md
235 lines
8.4 KiB
Python
235 lines
8.4 KiB
Python
"""Rust parser — clippy / cargo audit / cargo test.
|
|
|
|
Recipes handled:
|
|
- ``audit`` → cargo audit --json envelope → list of CVE findings
|
|
- ``lint`` → cargo clippy --message-format=json (NDJSON) → lint findings
|
|
- ``test`` → cargo test human output (no good machine format) → failures
|
|
- ``build`` → falls through to the generic recipe_fail behavior because
|
|
build success/failure is captured by exit_code alone; structured build
|
|
errors come through clippy on the lint recipe.
|
|
|
|
Each branch degrades gracefully: malformed JSON → empty findings, not
|
|
crash. The runner logs the parse failure and still records the job as
|
|
finished.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import re
|
|
|
|
from .base import Finding, _iter_jsonl, _safe_json_loads
|
|
|
|
|
|
class RustParser:
|
|
@classmethod
|
|
def matches(cls, language: str, recipe: str) -> bool:
|
|
return language == "rust" and recipe in {"audit", "lint", "test", "build"}
|
|
|
|
@classmethod
|
|
def parse(cls, raw_log: str, exit_code: int, recipe: str) -> list[Finding]:
|
|
if recipe == "audit":
|
|
return cls._parse_audit(raw_log)
|
|
if recipe == "lint":
|
|
return cls._parse_clippy(raw_log)
|
|
if recipe == "test":
|
|
return cls._parse_test(raw_log, exit_code)
|
|
# build: defer to generic-style behaviour. We don't try to parse
|
|
# cargo build output here; lint + clippy is the structured channel.
|
|
if exit_code != 0:
|
|
return [
|
|
Finding(
|
|
kind="recipe_fail",
|
|
severity="warn",
|
|
code=f"exit_{exit_code}",
|
|
message=f"cargo build exited with status {exit_code}",
|
|
)
|
|
]
|
|
return []
|
|
|
|
# ---- audit -------------------------------------------------------------
|
|
|
|
@classmethod
|
|
def _parse_audit(cls, raw_log: str) -> list[Finding]:
|
|
"""cargo-audit emits a single JSON envelope on stdout when invoked
|
|
with --json. Shape:
|
|
{"vulnerabilities": {"list": [{"package": {...}, "advisory": {...},
|
|
"versions": {"patched": [...]}}, ...]}}
|
|
We extract the JSON object substring (the recipe usually echoes
|
|
other text first) and pull each vulnerability out.
|
|
"""
|
|
envelope = _extract_json_object(raw_log)
|
|
if envelope is None:
|
|
return []
|
|
vulns = (envelope.get("vulnerabilities") or {}).get("list") or []
|
|
out: list[Finding] = []
|
|
for v in vulns:
|
|
pkg = (v.get("package") or {}).get("name") or "?"
|
|
ver = (v.get("package") or {}).get("version") or "?"
|
|
adv = v.get("advisory") or {}
|
|
adv_id = adv.get("id") or "RUSTSEC-?"
|
|
title = adv.get("title") or adv.get("description") or "advisory"
|
|
patched = (v.get("versions") or {}).get("patched") or []
|
|
patched_str = ", ".join(patched) if patched else "no fix available"
|
|
out.append(
|
|
Finding(
|
|
kind="cve",
|
|
severity="high",
|
|
code=adv_id,
|
|
message=f"{pkg} {ver}: {title} — patched in {patched_str}",
|
|
suggested_fix=(
|
|
f"bump {pkg} to {patched[0]}" if patched else None
|
|
),
|
|
raw_json=json.dumps(v),
|
|
extras={
|
|
"package": pkg,
|
|
"version": ver,
|
|
"fixed_in": patched,
|
|
"advisory": adv_id,
|
|
},
|
|
)
|
|
)
|
|
return out
|
|
|
|
# ---- clippy ------------------------------------------------------------
|
|
|
|
@classmethod
|
|
def _parse_clippy(cls, raw_log: str) -> list[Finding]:
|
|
"""cargo clippy --message-format=json emits NDJSON. Each line is a
|
|
cargo build-message; the ones we care about have:
|
|
reason == "compiler-message"
|
|
message.level in {"warning", "error"}
|
|
"""
|
|
out: list[Finding] = []
|
|
for obj in _iter_jsonl(raw_log):
|
|
if not isinstance(obj, dict):
|
|
continue
|
|
if obj.get("reason") != "compiler-message":
|
|
continue
|
|
msg = obj.get("message") or {}
|
|
level = msg.get("level")
|
|
if level not in {"warning", "error"}:
|
|
continue
|
|
|
|
code_obj = msg.get("code") or {}
|
|
code = code_obj.get("code") if isinstance(code_obj, dict) else None
|
|
spans = msg.get("spans") or []
|
|
primary = next(
|
|
(s for s in spans if s.get("is_primary")),
|
|
spans[0] if spans else None,
|
|
)
|
|
file = primary.get("file_name") if primary else None
|
|
line = primary.get("line_start") if primary else None
|
|
|
|
children = msg.get("children") or []
|
|
suggested = None
|
|
for ch in children:
|
|
rendered = ch.get("rendered")
|
|
if rendered:
|
|
suggested = rendered
|
|
break
|
|
|
|
severity = "error" if level == "error" else "warn"
|
|
out.append(
|
|
Finding(
|
|
kind="lint",
|
|
severity=severity,
|
|
file=file,
|
|
line=line,
|
|
code=code,
|
|
message=msg.get("message") or "",
|
|
suggested_fix=suggested,
|
|
raw_json=json.dumps(obj),
|
|
)
|
|
)
|
|
return out
|
|
|
|
# ---- test --------------------------------------------------------------
|
|
|
|
_TEST_FAIL_RE = re.compile(r"^\s*test\s+(\S+)\s+\.{3}\s+FAILED\s*$")
|
|
_FAILURES_RE = re.compile(r"^\s*failures:\s*$")
|
|
|
|
@classmethod
|
|
def _parse_test(cls, raw_log: str, exit_code: int) -> list[Finding]:
|
|
"""cargo test prints human-formatted output by default. Two reliable
|
|
signals:
|
|
1. ``test foo::bar ... FAILED`` lines from the runner.
|
|
2. ``failures:`` block listing the failed tests indented.
|
|
|
|
We collect the FAILED line names since they appear once per failure
|
|
and are the cleanest extraction. exit_code == 0 means no failures.
|
|
"""
|
|
if exit_code == 0:
|
|
return []
|
|
names: list[str] = []
|
|
for raw_line in raw_log.splitlines():
|
|
m = cls._TEST_FAIL_RE.match(raw_line)
|
|
if m:
|
|
names.append(m.group(1))
|
|
# Dedup while preserving order.
|
|
seen: set[str] = set()
|
|
unique: list[str] = []
|
|
for n in names:
|
|
if n in seen:
|
|
continue
|
|
seen.add(n)
|
|
unique.append(n)
|
|
|
|
if not unique:
|
|
return [
|
|
Finding(
|
|
kind="test_fail",
|
|
severity="error",
|
|
code=f"exit_{exit_code}",
|
|
message=(
|
|
f"cargo test exited {exit_code} but no FAILED lines "
|
|
f"detected; check log"
|
|
),
|
|
)
|
|
]
|
|
return [
|
|
Finding(
|
|
kind="test_fail",
|
|
severity="error",
|
|
code=name,
|
|
message=f"test {name} failed",
|
|
)
|
|
for name in unique
|
|
]
|
|
|
|
|
|
def _extract_json_object(text: str) -> dict | None:
|
|
"""Pull the first balanced ``{...}`` block out of ``text`` and json.loads it.
|
|
|
|
cargo-audit's --json output is a single object but the recipe shell
|
|
might echo a banner before/after. Scan for the first '{' and walk
|
|
braces (string-aware) to find its match. Falls back to None.
|
|
"""
|
|
start = text.find("{")
|
|
while start != -1:
|
|
depth = 0
|
|
in_str = False
|
|
esc = False
|
|
for i in range(start, len(text)):
|
|
c = text[i]
|
|
if in_str:
|
|
if esc:
|
|
esc = False
|
|
elif c == "\\":
|
|
esc = True
|
|
elif c == '"':
|
|
in_str = False
|
|
continue
|
|
if c == '"':
|
|
in_str = True
|
|
elif c == "{":
|
|
depth += 1
|
|
elif c == "}":
|
|
depth -= 1
|
|
if depth == 0:
|
|
candidate = text[start : i + 1]
|
|
parsed = _safe_json_loads(candidate)
|
|
if isinstance(parsed, dict):
|
|
return parsed
|
|
break
|
|
start = text.find("{", start + 1)
|
|
return None
|