- db.py: migrations + DAOs for tokens / projects / jobs / findings (SQLite WAL)
- auth.py: SHA-256 bearer hashing + LAN-CIDR allowlist + admin/app token tiers
- models.py: Pydantic shapes (Project, Subproject, Schedule, Notify, Job, CreateJobRequest)
- server.py: FastAPI on port 8810; /healthz, /admin/tokens/*, /projects/*, /jobs, /jobs/{id}, /jobs/{id}/log, /jobs/{id}/findings
- runner.py: bounded asyncio pool, per-job timeout with process-group SIGTERM→SIGKILL escalation, orphaned-job recovery on boot
- workspace.py: bare-clone + worktree materialization, gc
- config.py: env-driven
- 62 tests across db / auth / projects / jobs / runner / e2e — all green
Cross-token project access returns 404 (not 403) — existence-leak guard.
Bearer tokens hashed at rest; admin token bootstrapped on first boot.
Recipe subprocess uses start_new_session=True so killpg targets the
whole process tree on timeout — child processes can't escape SIGKILL.
Pump task guarded with wait_for(2s) + cancel fallback against any
orphan that survives the group kill.
Wave 2 (parsers + findings extraction + MCP + email digest) pending.
Spec: memory/spec-crafting-table.md
111 lines
3.2 KiB
Python
111 lines
3.2 KiB
Python
"""Pydantic schemas for projects, recipes, jobs, findings.
|
|
|
|
All wire shapes — what HTTP request bodies look like and what the API returns.
|
|
The DB stores Project minus the name (which is the row PK) as recipe_json so
|
|
recipe drift is visible per-job (jobs snapshot their recipe at run-time).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from typing import Literal
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
# Slug pattern shared between project names and token names — lowercase
|
|
# alphanumerics + hyphen + underscore, must start with alphanumeric.
|
|
SLUG_PATTERN = r"^[a-z0-9][a-z0-9_-]*$"
|
|
|
|
|
|
class Subproject(BaseModel):
|
|
"""One language target inside a repo. A project has one or more."""
|
|
|
|
path: str = "."
|
|
language: str
|
|
build: str | None = None
|
|
test: str | None = None
|
|
lint: str | None = None
|
|
audit: str | None = None
|
|
timeout_secs: int = Field(default=1800, ge=1, le=86400)
|
|
|
|
|
|
class Schedule(BaseModel):
|
|
"""Cron-style schedules per recipe kind. 'manual' = caller-driven only.
|
|
|
|
Wave 1 doesn't run the scheduler yet — these strings are persisted but the
|
|
sweeper that consumes them lands in a later wave. Stored as-is.
|
|
"""
|
|
|
|
audit: str | None = None
|
|
test: str | None = None
|
|
build: str | None = None
|
|
lint: str | None = None
|
|
|
|
|
|
class Notify(BaseModel):
|
|
email: list[str] = Field(default_factory=list)
|
|
on: list[str] = Field(default_factory=lambda: ["audit_fail", "cve_found", "patch_drafted"])
|
|
auto_patch: bool = False
|
|
|
|
|
|
class Project(BaseModel):
|
|
"""Full project shape — what the API accepts on POST /projects.
|
|
|
|
`created_at` and `updated_at` are server-stamped on insert/update; if the
|
|
caller supplies them we ignore the values and use server time.
|
|
"""
|
|
|
|
name: str = Field(pattern=SLUG_PATTERN, min_length=1, max_length=64)
|
|
git_url: str = Field(min_length=1)
|
|
default_branch: str = "main"
|
|
languages: list[str] = Field(default_factory=list)
|
|
subprojects: list[Subproject] = Field(default_factory=list)
|
|
schedule: Schedule = Field(default_factory=Schedule)
|
|
notify: Notify = Field(default_factory=Notify)
|
|
created_at: int = 0
|
|
updated_at: int = 0
|
|
|
|
|
|
class CreateJobRequest(BaseModel):
|
|
recipe: Literal["build", "test", "lint", "audit"]
|
|
subproject: str | None = None
|
|
branch: str | None = None
|
|
|
|
|
|
class Job(BaseModel):
|
|
"""API view of a job row."""
|
|
|
|
id: str
|
|
project_name: str
|
|
subproject_path: str
|
|
recipe: str
|
|
branch: str
|
|
status: Literal["queued", "running", "succeeded", "failed", "timed_out", "cancelled"]
|
|
queued_at: int
|
|
started_at: int | None = None
|
|
finished_at: int | None = None
|
|
exit_code: int | None = None
|
|
log_path: str
|
|
findings_count: int = 0
|
|
|
|
|
|
class TokenCreateRequest(BaseModel):
|
|
name: str = Field(pattern=SLUG_PATTERN, min_length=1, max_length=64)
|
|
is_admin: bool = False
|
|
ip_cidrs: list[str] = Field(default_factory=list)
|
|
|
|
|
|
class Finding(BaseModel):
|
|
"""One structured finding from a parser. Wave 1 ships the schema; wave 2
|
|
actually populates these from cargo/clippy/ruff/etc. JSON output."""
|
|
|
|
id: int
|
|
job_id: str
|
|
kind: str
|
|
severity: str
|
|
file: str | None = None
|
|
line: int | None = None
|
|
code: str | None = None
|
|
message: str
|
|
suggested_fix: str | None = None
|
|
fingerprint: str
|
|
created_at: int
|