crafting-table/crafting_table/parsers/rust.py

"""Rust parser — clippy / cargo audit / cargo test.

Recipes handled:
- ``audit`` → cargo audit --json envelope → list of CVE findings
- ``lint``  → cargo clippy --message-format=json (NDJSON) → lint findings
- ``test``  → cargo test human output (no good machine format) → failures
- ``build`` → falls through to the generic recipe_fail behavior because
  build success/failure is captured by exit_code alone; structured build
  errors come through clippy on the lint recipe.

Each branch degrades gracefully: malformed JSON → empty findings, not
crash. The runner logs the parse failure and still records the job as
finished.
"""
from __future__ import annotations

import json
import re

from .base import Finding, _iter_jsonl, _safe_json_loads


class RustParser:
    @classmethod
    def matches(cls, language: str, recipe: str) -> bool:
        return language == "rust" and recipe in {"audit", "lint", "test", "build"}

    @classmethod
    def parse(cls, raw_log: str, exit_code: int, recipe: str) -> list[Finding]:
        if recipe == "audit":
            return cls._parse_audit(raw_log)
        if recipe == "lint":
            return cls._parse_clippy(raw_log)
        if recipe == "test":
            return cls._parse_test(raw_log, exit_code)
        # build: defer to generic-style behaviour. We don't try to parse
        # cargo build output here; lint + clippy is the structured channel.
        if exit_code != 0:
            return [
                Finding(
                    kind="recipe_fail",
                    severity="warn",
                    code=f"exit_{exit_code}",
                    message=f"cargo build exited with status {exit_code}",
                )
            ]
        return []

    # ---- audit -------------------------------------------------------------

    @classmethod
    def _parse_audit(cls, raw_log: str) -> list[Finding]:
        """cargo-audit emits a single JSON envelope on stdout when invoked
        with --json. Shape:
          {"vulnerabilities": {"list": [{"package": {...}, "advisory": {...},
                                         "versions": {"patched": [...]}}, ...]}}
        We extract the JSON object substring (the recipe usually echoes
        other text first) and pull each vulnerability out.
        """
        envelope = _extract_json_object(raw_log)
        if envelope is None:
            return []
        vulns = (envelope.get("vulnerabilities") or {}).get("list") or []
        out: list[Finding] = []
        for v in vulns:
            pkg = (v.get("package") or {}).get("name") or "?"
            ver = (v.get("package") or {}).get("version") or "?"
            adv = v.get("advisory") or {}
            adv_id = adv.get("id") or "RUSTSEC-?"
            title = adv.get("title") or adv.get("description") or "advisory"
            patched = (v.get("versions") or {}).get("patched") or []
            patched_str = ", ".join(patched) if patched else "no fix available"
            out.append(
                Finding(
                    kind="cve",
                    severity="high",
                    code=adv_id,
                    message=f"{pkg} {ver}: {title} — patched in {patched_str}",
                    suggested_fix=(
                        f"bump {pkg} to {patched[0]}" if patched else None
                    ),
                    raw_json=json.dumps(v),
                    extras={
                        "package": pkg,
                        "version": ver,
                        "fixed_in": patched,
                        "advisory": adv_id,
                    },
                )
            )
        return out

    # ---- clippy ------------------------------------------------------------

    @classmethod
    def _parse_clippy(cls, raw_log: str) -> list[Finding]:
        """cargo clippy --message-format=json emits NDJSON. Each line is a
        cargo build-message; the ones we care about have:
            reason == "compiler-message"
            message.level in {"warning", "error"}
        """
        out: list[Finding] = []
        for obj in _iter_jsonl(raw_log):
            if not isinstance(obj, dict):
                continue
            if obj.get("reason") != "compiler-message":
                continue
            msg = obj.get("message") or {}
            level = msg.get("level")
            if level not in {"warning", "error"}:
                continue

            code_obj = msg.get("code") or {}
            code = code_obj.get("code") if isinstance(code_obj, dict) else None
            spans = msg.get("spans") or []
            primary = next(
                (s for s in spans if s.get("is_primary")),
                spans[0] if spans else None,
            )
            file = primary.get("file_name") if primary else None
            line = primary.get("line_start") if primary else None

            children = msg.get("children") or []
            suggested = None
            for ch in children:
                rendered = ch.get("rendered")
                if rendered:
                    suggested = rendered
                    break

            severity = "error" if level == "error" else "warn"
            out.append(
                Finding(
                    kind="lint",
                    severity=severity,
                    file=file,
                    line=line,
                    code=code,
                    message=msg.get("message") or "",
                    suggested_fix=suggested,
                    raw_json=json.dumps(obj),
                )
            )
        return out

    # ---- test --------------------------------------------------------------

    _TEST_FAIL_RE = re.compile(r"^\s*test\s+(\S+)\s+\.{3}\s+FAILED\s*$")
    _FAILURES_RE = re.compile(r"^\s*failures:\s*$")

    @classmethod
    def _parse_test(cls, raw_log: str, exit_code: int) -> list[Finding]:
        """cargo test prints human-formatted output by default. Two reliable
        signals:
          1. ``test foo::bar ... FAILED`` lines from the runner.
          2. ``failures:`` block listing the failed tests indented.

        We collect the FAILED line names since they appear once per failure
        and are the cleanest extraction. exit_code == 0 means no failures.
        """
        if exit_code == 0:
            return []
        names: list[str] = []
        for raw_line in raw_log.splitlines():
            m = cls._TEST_FAIL_RE.match(raw_line)
            if m:
                names.append(m.group(1))
        # Dedup while preserving order.
        seen: set[str] = set()
        unique: list[str] = []
        for n in names:
            if n in seen:
                continue
            seen.add(n)
            unique.append(n)

        if not unique:
            return [
                Finding(
                    kind="test_fail",
                    severity="error",
                    code=f"exit_{exit_code}",
                    message=(
                        f"cargo test exited {exit_code} but no FAILED lines "
                        f"detected; check log"
                    ),
                )
            ]
        return [
            Finding(
                kind="test_fail",
                severity="error",
                code=name,
                message=f"test {name} failed",
            )
            for name in unique
        ]


def _extract_json_object(text: str) -> dict | None:
    """Pull the first balanced ``{...}`` block out of ``text`` and json.loads it.

    cargo-audit's --json output is a single object but the recipe shell
    might echo a banner before/after. Scan for the first '{' and walk
    braces (string-aware) to find its match. Falls back to None.
    """
    start = text.find("{")
    while start != -1:
        depth = 0
        in_str = False
        esc = False
        for i in range(start, len(text)):
            c = text[i]
            if in_str:
                if esc:
                    esc = False
                elif c == "\\":
                    esc = True
                elif c == '"':
                    in_str = False
                continue
            if c == '"':
                in_str = True
            elif c == "{":
                depth += 1
            elif c == "}":
                depth -= 1
                if depth == 0:
                    candidate = text[start : i + 1]
                    parsed = _safe_json_loads(candidate)
                    if isinstance(parsed, dict):
                        return parsed
                    break
        start = text.find("{", start + 1)
    return None