- db.py: migrations + DAOs for tokens / projects / jobs / findings (SQLite WAL)
- auth.py: SHA-256 bearer hashing + LAN-CIDR allowlist + admin/app token tiers
- models.py: Pydantic shapes (Project, Subproject, Schedule, Notify, Job, CreateJobRequest)
- server.py: FastAPI on port 8810; /healthz, /admin/tokens/*, /projects/*, /jobs, /jobs/{id}, /jobs/{id}/log, /jobs/{id}/findings
- runner.py: bounded asyncio pool, per-job timeout with process-group SIGTERM→SIGKILL escalation, orphaned-job recovery on boot
- workspace.py: bare-clone + worktree materialization, gc
- config.py: env-driven
- 62 tests across db / auth / projects / jobs / runner / e2e — all green
Cross-token project access returns 404 (not 403) — existence-leak guard.
Bearer tokens hashed at rest; admin token bootstrapped on first boot.
Recipe subprocess uses start_new_session=True so killpg targets the
whole process tree on timeout — child processes can't escape SIGKILL.
Pump task guarded with wait_for(2s) + cancel fallback against any
orphan that survives the group kill.
Wave 2 (parsers + findings extraction + MCP + email digest) pending.
Spec: memory/spec-crafting-table.md
232 lines
7.8 KiB
Python
232 lines
7.8 KiB
Python
"""Runner — exec recipe, timeout, bounded concurrency, orphan recovery.
|
|
|
|
Strategy: bypass the workspace materialization step (no real git URLs in
|
|
tests) by stubbing WorkspaceManager.materialize to return a path the test
|
|
controls. Recipes are stub shell strings (`echo`, `sleep`, `false`).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import time
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from crafting_table.db import DB
|
|
from crafting_table.runner import Runner
|
|
from crafting_table.workspace import WorkspaceManager, WorkspacePaths
|
|
|
|
|
|
# ---------- helper: stub materialize() so we don't need real git ----------
|
|
|
|
|
|
class _StubWorkspace(WorkspaceManager):
|
|
def __init__(self, root: Path):
|
|
super().__init__(root)
|
|
self._cleanups: list[Path] = []
|
|
|
|
async def materialize(self, *, project, job_id, git_url, branch, log_fh):
|
|
# Echo what would happen, write to log, return a path inside root.
|
|
worktree = self.root / project / job_id
|
|
worktree.mkdir(parents=True, exist_ok=True)
|
|
log_fh.write(f"[stub-workspace] would clone {git_url}@{branch} to {worktree}\n")
|
|
log_fh.flush()
|
|
return WorkspacePaths(
|
|
project_root=self.root / project,
|
|
cache_dir=self.root / project / ".cache",
|
|
worktree_dir=worktree,
|
|
)
|
|
|
|
async def cleanup(self, paths):
|
|
self._cleanups.append(paths.worktree_dir)
|
|
|
|
|
|
def _seed_project_and_job(
|
|
db: DB, *, recipe_cmd: str, timeout_secs: int = 5, recipe_kind: str = "test"
|
|
) -> str:
|
|
if not db.get_token("o"):
|
|
db.insert_token(name="o", bearer="t-runner", is_admin=False, ip_cidrs=None)
|
|
sub = {
|
|
"path": ".",
|
|
"language": "python",
|
|
"build": None, "test": None, "lint": None, "audit": None,
|
|
"timeout_secs": timeout_secs,
|
|
}
|
|
sub[recipe_kind] = recipe_cmd
|
|
snapshot = {
|
|
"git_url": "stub://localhost",
|
|
"default_branch": "main",
|
|
"subprojects": [sub],
|
|
"languages": ["python"],
|
|
}
|
|
db.upsert_project(
|
|
name="proj",
|
|
git_url="stub://localhost",
|
|
default_branch="main",
|
|
recipe_json=json.dumps(snapshot),
|
|
owner_token="o",
|
|
)
|
|
job_id = f"job-{int(time.time()*1000)}-{recipe_kind}"
|
|
log_path = Path(db.db_path).parent / "jobs" / f"{job_id}.log"
|
|
log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
db.insert_job(
|
|
job_id=job_id, project_name="proj", subproject_path=".",
|
|
recipe=recipe_kind, branch="main",
|
|
log_path=str(log_path), recipe_snapshot_json=json.dumps(snapshot),
|
|
)
|
|
return job_id
|
|
|
|
|
|
# ---------- tests ---------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_succeeds(tmp_path):
|
|
db = DB(str(tmp_path / "ct.db"))
|
|
ws = _StubWorkspace(tmp_path / "ws")
|
|
runner = Runner(db=db, workspace=ws, log_dir=tmp_path / "jobs", max_concurrent=2)
|
|
await runner.start()
|
|
try:
|
|
job_id = _seed_project_and_job(db, recipe_cmd="echo hello && exit 0", recipe_kind="test")
|
|
await runner.enqueue(job_id)
|
|
await _wait_terminal(db, job_id)
|
|
j = db.get_job(job_id)
|
|
assert j["status"] == "succeeded"
|
|
assert j["exit_code"] == 0
|
|
finally:
|
|
await runner.stop()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_fails(tmp_path):
|
|
db = DB(str(tmp_path / "ct.db"))
|
|
ws = _StubWorkspace(tmp_path / "ws")
|
|
runner = Runner(db=db, workspace=ws, log_dir=tmp_path / "jobs", max_concurrent=2)
|
|
await runner.start()
|
|
try:
|
|
job_id = _seed_project_and_job(db, recipe_cmd="exit 1", recipe_kind="test")
|
|
await runner.enqueue(job_id)
|
|
await _wait_terminal(db, job_id)
|
|
j = db.get_job(job_id)
|
|
assert j["status"] == "failed"
|
|
assert j["exit_code"] == 1
|
|
finally:
|
|
await runner.stop()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_times_out(tmp_path):
|
|
db = DB(str(tmp_path / "ct.db"))
|
|
ws = _StubWorkspace(tmp_path / "ws")
|
|
runner = Runner(db=db, workspace=ws, log_dir=tmp_path / "jobs", max_concurrent=2)
|
|
await runner.start()
|
|
try:
|
|
job_id = _seed_project_and_job(
|
|
db, recipe_cmd="sleep 60", timeout_secs=1, recipe_kind="audit"
|
|
)
|
|
await runner.enqueue(job_id)
|
|
await _wait_terminal(db, job_id, deadline_s=15)
|
|
j = db.get_job(job_id)
|
|
assert j["status"] == "timed_out"
|
|
log_text = Path(j["log_path"]).read_text()
|
|
assert "timeout" in log_text
|
|
finally:
|
|
await runner.stop()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_bounded_concurrency(tmp_path):
|
|
"""Queue 5 jobs with max_concurrent=2, assert peak in-flight stays at 2."""
|
|
db = DB(str(tmp_path / "ct.db"))
|
|
ws = _StubWorkspace(tmp_path / "ws")
|
|
runner = Runner(db=db, workspace=ws, log_dir=tmp_path / "jobs", max_concurrent=2)
|
|
await runner.start()
|
|
try:
|
|
ids = []
|
|
for _ in range(5):
|
|
job_id = _seed_project_and_job(
|
|
db, recipe_cmd="sleep 0.5 && echo ok", timeout_secs=10, recipe_kind="test"
|
|
)
|
|
ids.append(job_id)
|
|
for j in ids:
|
|
await runner.enqueue(j)
|
|
for j in ids:
|
|
await _wait_terminal(db, j, deadline_s=30)
|
|
assert runner.peak_in_flight <= 2
|
|
assert runner.peak_in_flight >= 1 # at least one concurrent run happened
|
|
for j in ids:
|
|
assert db.get_job(j)["status"] == "succeeded"
|
|
finally:
|
|
await runner.stop()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_orphaned_running_marked_failed_on_start(tmp_path):
|
|
"""A row left in 'running' from a previous boot should be flipped to failed."""
|
|
db = DB(str(tmp_path / "ct.db"))
|
|
db.insert_token(name="o", bearer="t-orph", is_admin=False, ip_cidrs=None)
|
|
db.upsert_project(
|
|
name="proj", git_url="stub://x", default_branch="main",
|
|
recipe_json="{}", owner_token="o",
|
|
)
|
|
db.insert_job(
|
|
job_id="orph-1", project_name="proj", subproject_path=".",
|
|
recipe="test", branch="main",
|
|
log_path=str(tmp_path / "orph.log"), recipe_snapshot_json="{}",
|
|
)
|
|
db.mark_job_running("orph-1")
|
|
assert db.get_job("orph-1")["status"] == "running"
|
|
|
|
ws = _StubWorkspace(tmp_path / "ws")
|
|
runner = Runner(db=db, workspace=ws, log_dir=tmp_path / "jobs", max_concurrent=2)
|
|
await runner.start()
|
|
try:
|
|
# start() should have flipped orph-1 to failed
|
|
j = db.get_job("orph-1")
|
|
assert j["status"] == "failed"
|
|
assert j["exit_code"] == -1
|
|
finally:
|
|
await runner.stop()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_jobs_finished_hook_fires(tmp_path):
|
|
db = DB(str(tmp_path / "ct.db"))
|
|
ws = _StubWorkspace(tmp_path / "ws")
|
|
runner = Runner(db=db, workspace=ws, log_dir=tmp_path / "jobs", max_concurrent=2)
|
|
|
|
seen: list[dict] = []
|
|
|
|
async def cap(event):
|
|
seen.append(event)
|
|
|
|
runner.add_hook(cap)
|
|
await runner.start()
|
|
try:
|
|
job_id = _seed_project_and_job(db, recipe_cmd="echo h && true", recipe_kind="test")
|
|
await runner.enqueue(job_id)
|
|
await _wait_terminal(db, job_id)
|
|
# Hook fires AFTER mark_job_finished so we have to give the loop a tick.
|
|
for _ in range(50):
|
|
if seen:
|
|
break
|
|
await asyncio.sleep(0.05)
|
|
assert len(seen) == 1
|
|
assert seen[0]["job_id"] == job_id
|
|
assert seen[0]["status"] == "succeeded"
|
|
finally:
|
|
await runner.stop()
|
|
|
|
|
|
# ---------- helpers -------------------------------------------------------
|
|
|
|
|
|
async def _wait_terminal(db: DB, job_id: str, *, deadline_s: float = 15.0) -> None:
|
|
deadline = time.monotonic() + deadline_s
|
|
while time.monotonic() < deadline:
|
|
j = db.get_job(job_id)
|
|
if j and j["status"] in ("succeeded", "failed", "timed_out", "cancelled"):
|
|
return
|
|
await asyncio.sleep(0.1)
|
|
raise AssertionError(f"job {job_id} did not reach terminal state within {deadline_s}s")
|