scaffold v0.1: postgres+pgvector inside-container, schema, markdown ingest, CLI
Skald is a generic story-writer. The database is the product; the binary is the tooling. Everything story-specific lives in rows, not in code. cwho's monorepo + binary-per-role pattern transplanted to this domain. What this commit ships: - Cargo workspace (resolver=3, edition 2024): skald-core (lib) + skald (bin) - Migration 0001: stories, characters, canon_facts, chapters, chapter_summaries, passages (vector(1536)), generation_runs, audit_findings, tags. pgvector + pg_trgm extensions. ivfflat index deferred until we have data (post-import the first ~1k passages and add the index). - skald-core::ingest — markdown parser for the cwho/coast-down shape: '# Title' → '## Chapter N — date' headings → '# Continuity Bible' section with character roster (real + fictional sub-sections) + setting / mystery / historical / liberty / hook sub-sections. Decomposed into structured rows; original bullet body preserved in key_facts/body fields for fidelity. 6 unit tests cover the shape. - skald-core::db — Postgres connection pool + migration runner. - skald-core::models — row types via sqlx::FromRow. - skald binary — clap CLI: 'serve' (http + migrations) and 'import-markdown' (one-shot ingest). - Dockerfile — multi-stage: rust:1.95-bookworm builder, pgvector/ pgvector:pg17 runtime, tini under PID 1, custom entrypoint.sh that boots embedded postgres then execs skald serve. - compose.yml — singleton container, postgres data in volume, story corpus mounted read-only at /seed. Decisions locked 2026-05-13: 1. DB in same container 'till we have a real working tool' (cobb) 2. postgres+pgvector (NOT sqlite) — keeps semantic-search story 3. Network-not-socket connection (postgresql://localhost:5432) from day one so future split is config-only, not code-rewrite Not yet wired: - Web UI - clawdforge calls (gen → cleanup → canon-audit pipeline) - Embedding pass - TTS sidecar
This commit is contained in:
commit
f575ad3722
17 changed files with 4065 additions and 0 deletions
8
.gitignore
vendored
Normal file
8
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
target/
|
||||
*.swp
|
||||
*.swo
|
||||
.DS_Store
|
||||
.env
|
||||
*.env.local
|
||||
.idea/
|
||||
.vscode/
|
||||
2735
Cargo.lock
generated
Normal file
2735
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
32
Cargo.toml
Normal file
32
Cargo.toml
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
[workspace]
|
||||
resolver = "3"
|
||||
members = ["skald-core", "skald"]
|
||||
|
||||
[workspace.package]
|
||||
version = "0.0.1"
|
||||
edition = "2024"
|
||||
license = "MIT"
|
||||
authors = ["Cobb (Jacob Hayes)"]
|
||||
repository = "http://192.168.0.5:3001/cobb/skald"
|
||||
|
||||
[workspace.dependencies]
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
axum = "0.8"
|
||||
tower = "0.5"
|
||||
tower-http = { version = "0.6", features = ["trace", "limit"] }
|
||||
sqlx = { version = "0.8", default-features = false, features = [
|
||||
"postgres", "runtime-tokio", "tls-rustls",
|
||||
"chrono", "uuid", "macros", "migrate",
|
||||
] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1"
|
||||
clap = { version = "4", features = ["derive", "env"] }
|
||||
anyhow = "1"
|
||||
thiserror = "2"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter", "json", "chrono"] }
|
||||
chrono = { version = "0.4", features = ["serde"] }
|
||||
uuid = { version = "1", features = ["v4", "serde"] }
|
||||
regex = "1"
|
||||
async-trait = "0.1"
|
||||
maud = "0.27"
|
||||
59
Dockerfile
Normal file
59
Dockerfile
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
# Multi-stage build for skald.
|
||||
#
|
||||
# Stage 1: compile the rust binary against rust:1-bookworm.
|
||||
# Stage 2: pgvector/pgvector:pg17 (debian-bookworm postgres with
|
||||
# pgvector preinstalled) + tini + the skald binary.
|
||||
#
|
||||
# v0.1 ships postgres inside the same container ("singleton till we
|
||||
# have a real working tool"). When we extract the DB out, swap the
|
||||
# runtime base to debian:bookworm-slim, drop entrypoint.sh, point
|
||||
# DATABASE_URL at the external pg.
|
||||
#
|
||||
# Build context is the workspace root:
|
||||
# docker build -t skald:latest .
|
||||
|
||||
# ─── builder ──────────────────────────────────────────────────────
|
||||
FROM rust:1.95-bookworm AS builder
|
||||
WORKDIR /build
|
||||
|
||||
# Cache the dependency graph: copy manifests first, fetch + build
|
||||
# stubs, THEN drop in real sources.
|
||||
COPY Cargo.toml Cargo.lock ./
|
||||
COPY skald-core/Cargo.toml skald-core/Cargo.toml
|
||||
COPY skald/Cargo.toml skald/Cargo.toml
|
||||
COPY migrations migrations
|
||||
|
||||
RUN mkdir -p skald-core/src skald/src \
|
||||
&& echo 'pub fn placeholder() {}' > skald-core/src/lib.rs \
|
||||
&& echo 'fn main() {}' > skald/src/main.rs \
|
||||
&& cargo build --release -p skald \
|
||||
&& rm -rf skald-core/src skald/src
|
||||
|
||||
COPY skald-core skald-core
|
||||
COPY skald skald
|
||||
|
||||
RUN touch skald-core/src/lib.rs skald/src/main.rs \
|
||||
&& cargo build --release -p skald
|
||||
|
||||
# ─── runtime ──────────────────────────────────────────────────────
|
||||
FROM pgvector/pgvector:pg17 AS runtime
|
||||
|
||||
# tini for sane signal handling / zombie reaping under PID 1.
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends tini ca-certificates \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY --from=builder /build/target/release/skald /usr/local/bin/skald
|
||||
COPY --from=builder /build/migrations /var/lib/skald/migrations
|
||||
COPY entrypoint.sh /usr/local/bin/skald-entrypoint.sh
|
||||
RUN chmod +x /usr/local/bin/skald-entrypoint.sh
|
||||
|
||||
ENV RUST_LOG=info \
|
||||
SKALD_LISTEN=0.0.0.0:7780 \
|
||||
POSTGRES_USER=skald \
|
||||
POSTGRES_DB=skald
|
||||
|
||||
EXPOSE 7780
|
||||
|
||||
ENTRYPOINT ["/usr/bin/tini", "--", "/usr/local/bin/skald-entrypoint.sh"]
|
||||
CMD ["serve"]
|
||||
84
README.md
Normal file
84
README.md
Normal file
|
|
@ -0,0 +1,84 @@
|
|||
# skald
|
||||
|
||||
Long-form story-writer with canon-keeping, sequel continuity, and
|
||||
(future) self-hosted audiobook narration. Database is the source of
|
||||
truth — the writer is the tooling.
|
||||
|
||||
Named for the Old Norse poets who composed and memorized kings'
|
||||
sagas across generations.
|
||||
|
||||
## Status: v0.1 — scaffold
|
||||
|
||||
What's wired:
|
||||
|
||||
- Rust workspace (`skald-core` + `skald`)
|
||||
- Postgres schema for stories, characters, canon facts, chapters,
|
||||
passages, generation runs, audit findings, tags
|
||||
- pgvector extension installed for future similarity search
|
||||
- `skald import-markdown` ingests a story file (chapters + bible)
|
||||
into the schema
|
||||
- `skald serve` exposes `/health` and runs migrations on boot
|
||||
- Single-container deploy: postgres + skald in one image
|
||||
|
||||
Not yet wired:
|
||||
|
||||
- Web UI (the inbox + browse + queue surface)
|
||||
- clawdforge calls (the actual generate / cleanup / canon-audit
|
||||
pipeline)
|
||||
- Embeddings + similarity search
|
||||
- TTS sidecar
|
||||
|
||||
## v0.1 smoke
|
||||
|
||||
```bash
|
||||
docker compose -p skald up -d
|
||||
docker exec skald skald import-markdown \
|
||||
--path /seed/coast-down.md \
|
||||
--title "The Coast-Down"
|
||||
|
||||
curl http://lucy:7780/health
|
||||
# → { ok: true, db_ok: true, story_count: 1, ... }
|
||||
```
|
||||
|
||||
## Schema (cheat sheet)
|
||||
|
||||
```
|
||||
stories → meta + status + parent/root for series
|
||||
characters → real or fictional, story-scoped
|
||||
canon_facts → setting, mystery, theme, rule, historical_anchor, hook
|
||||
chapters → full prose body
|
||||
chapter_summaries → short summaries for cheap context loading
|
||||
passages → paragraph-level + embedding vector(1536)
|
||||
generation_runs → every LLM call logged
|
||||
audit_findings → canon audit output (severity + area)
|
||||
tags → arbitrary labels
|
||||
```
|
||||
|
||||
## Architecture (v0.1 + the plan)
|
||||
|
||||
```
|
||||
┌─────────────────────────────────┐
|
||||
│ skald container │
|
||||
│ ┌───────────┐ ┌────────────┐ │
|
||||
│ │ postgres │ │ skald-rust │ │
|
||||
│ │ pgvector │←─│ axum + cli │ │
|
||||
│ │ localhost │ │ :7780 │ │
|
||||
│ └───────────┘ └─────┬──────┘ │
|
||||
└─────────────────────────┼────────┘
|
||||
│ HTTP (future)
|
||||
↓
|
||||
┌──────────┐
|
||||
│clawdforge│
|
||||
└─────┬────┘
|
||||
↓
|
||||
opus calls
|
||||
```
|
||||
|
||||
v1.0+: extract postgres to its own container on db-net. skald
|
||||
becomes pure stateless rust, connects via `DATABASE_URL`. Migration
|
||||
is a connection-string change + a network move; the binary doesn't
|
||||
care where the DB lives.
|
||||
|
||||
## License
|
||||
|
||||
MIT.
|
||||
37
compose.yml
Normal file
37
compose.yml
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
# Standalone compose stack for skald v0.1. Postgres lives in the
|
||||
# same container — single deployable unit "till we have a real
|
||||
# working tool" (cobb's call, 2026-05-13).
|
||||
#
|
||||
# To deploy on Lucy:
|
||||
# sudo mkdir -p /mnt/cache/appdata/skald/{pgdata,seed}
|
||||
# sudo cp <story>.md /mnt/cache/appdata/skald/seed/
|
||||
# sudo cp skald.env /mnt/cache/appdata/secrets/skald.env # POSTGRES_PASSWORD=...
|
||||
# docker compose -p skald up -d
|
||||
#
|
||||
# To import the first story:
|
||||
# docker exec skald skald import-markdown \
|
||||
# --path /seed/<story>.md \
|
||||
# --title "<title>"
|
||||
|
||||
services:
|
||||
skald:
|
||||
image: lucy-registry:5000/skald:latest
|
||||
container_name: skald
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "7780:7780"
|
||||
env_file:
|
||||
- /mnt/cache/appdata/secrets/skald.env
|
||||
volumes:
|
||||
# Postgres data — persist across container recreates.
|
||||
- /mnt/cache/appdata/skald/pgdata:/var/lib/postgresql/data
|
||||
# Markdown corpus to import via `docker exec skald skald import-markdown`.
|
||||
- /mnt/cache/appdata/skald/seed:/seed:ro
|
||||
environment:
|
||||
RUST_LOG: ${RUST_LOG:-info}
|
||||
SKALD_LOG_FORMAT: json
|
||||
labels:
|
||||
org.sulkta.domain: "sulkta"
|
||||
org.sulkta.owner: "cobb"
|
||||
org.sulkta.managed-by: "compose"
|
||||
org.sulkta.role: "skald"
|
||||
37
entrypoint.sh
Normal file
37
entrypoint.sh
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
#!/usr/bin/env bash
|
||||
# Skald container entrypoint.
|
||||
#
|
||||
# Boots the embedded postgres via the pgvector image's own
|
||||
# docker-entrypoint, waits for it to accept connections, then execs
|
||||
# `skald` in the foreground. Tini is PID 1 (so it can reap zombies +
|
||||
# forward signals); we are PID 2; postgres becomes our child.
|
||||
#
|
||||
# This is explicitly "DB in the same container, for now" — when we
|
||||
# split the DB out (see project notes), the entrypoint reduces to
|
||||
# `exec /usr/local/bin/skald "$@"` and the pg startup goes away.
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
# Hand off to the pgvector image's own initdb + start dance.
|
||||
/usr/local/bin/docker-entrypoint.sh postgres &
|
||||
PG_PID=$!
|
||||
|
||||
# Wait for postgres to accept connections — initdb-on-first-run can
|
||||
# take a few seconds. 60s cap so we don't hang forever.
|
||||
for i in $(seq 1 120); do
|
||||
if pg_isready -h localhost -p 5432 -U "${POSTGRES_USER:-skald}" -d "${POSTGRES_DB:-skald}" >/dev/null 2>&1; then
|
||||
echo "skald-entrypoint: postgres ready after ${i} polls"
|
||||
break
|
||||
fi
|
||||
if [ "$i" -eq 120 ]; then
|
||||
echo "skald-entrypoint: postgres failed to become ready after 60s" >&2
|
||||
kill "$PG_PID" 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
sleep 0.5
|
||||
done
|
||||
|
||||
# Exec skald in the foreground. Container's lifecycle now tracks
|
||||
# skald — if skald exits, the container exits, postgres comes down
|
||||
# with it, restart policy decides whether to recycle.
|
||||
exec /usr/local/bin/skald "$@"
|
||||
189
migrations/0001_init.sql
Normal file
189
migrations/0001_init.sql
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
-- Skald v0.1 schema. Database is the source of truth; the writer is
|
||||
-- generic tooling that knows nothing hardcoded about any specific
|
||||
-- story. Every story is rows.
|
||||
--
|
||||
-- pgvector for embedding-based callback search across past prose;
|
||||
-- pg_trgm for fuzzy character-name lookups.
|
||||
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||
|
||||
-- One row per story (or per sequel). parent_story_id chains a
|
||||
-- series; root_story_id is the head of the chain (denormalized for
|
||||
-- cheap series scans).
|
||||
CREATE TABLE stories (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
title TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'seed'
|
||||
CHECK (status IN (
|
||||
'seed', 'draft', 'generating', 'cleaning',
|
||||
'auditing', 'complete', 'failed'
|
||||
)),
|
||||
prompt TEXT,
|
||||
model TEXT,
|
||||
parent_story_id UUID REFERENCES stories(id) ON DELETE SET NULL,
|
||||
root_story_id UUID REFERENCES stories(id) ON DELETE SET NULL,
|
||||
series_name TEXT,
|
||||
word_count_target INTEGER,
|
||||
word_count_actual INTEGER NOT NULL DEFAULT 0,
|
||||
summary TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_stories_parent ON stories(parent_story_id);
|
||||
CREATE INDEX idx_stories_root ON stories(root_story_id);
|
||||
CREATE INDEX idx_stories_status ON stories(status);
|
||||
CREATE INDEX idx_stories_series ON stories(series_name) WHERE series_name IS NOT NULL;
|
||||
|
||||
-- Characters: real (historical) or fictional. The bible blob is
|
||||
-- decomposed enough to be searchable but the original prose blob
|
||||
-- stays in key_facts for full fidelity.
|
||||
CREATE TABLE characters (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
kind TEXT NOT NULL CHECK (kind IN ('real', 'fictional')),
|
||||
role TEXT,
|
||||
voice_traits TEXT,
|
||||
key_facts TEXT NOT NULL,
|
||||
aliases TEXT[] NOT NULL DEFAULT '{}',
|
||||
first_seen_chapter INTEGER,
|
||||
state_at_latest TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_characters_story ON characters(story_id);
|
||||
CREATE INDEX idx_characters_name_trgm ON characters USING gin (name gin_trgm_ops);
|
||||
CREATE INDEX idx_characters_story_kind ON characters(story_id, kind);
|
||||
|
||||
-- Canon facts: everything that's bible-shaped but not a character.
|
||||
-- Setting details, mystery threads, themes, rules, historical
|
||||
-- anchors, fictional liberties, suggested hooks for sequels.
|
||||
CREATE TABLE canon_facts (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
||||
category TEXT NOT NULL CHECK (category IN (
|
||||
'setting', 'event', 'rule', 'theme',
|
||||
'mystery', 'liberty', 'hook', 'historical_anchor'
|
||||
)),
|
||||
title TEXT NOT NULL,
|
||||
body TEXT NOT NULL,
|
||||
weight INTEGER NOT NULL DEFAULT 1,
|
||||
source_chapter INTEGER,
|
||||
resolved BOOLEAN NOT NULL DEFAULT false,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_canon_facts_story_category ON canon_facts(story_id, category);
|
||||
|
||||
-- Chapters: full prose body, stored in DB (markdown). One row per
|
||||
-- chapter; UNIQUE(story_id, n) prevents duplicate insertion.
|
||||
CREATE TABLE chapters (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
||||
n INTEGER NOT NULL,
|
||||
title TEXT,
|
||||
body_md TEXT NOT NULL,
|
||||
word_count INTEGER NOT NULL DEFAULT 0,
|
||||
generated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
UNIQUE (story_id, n)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_chapters_story ON chapters(story_id);
|
||||
|
||||
-- Per-chapter short summary. The writer pulls these instead of full
|
||||
-- chapter prose when assembling context for a sequel — much cheaper
|
||||
-- on tokens. Generated by a separate LLM pass after the chapter is
|
||||
-- finished.
|
||||
CREATE TABLE chapter_summaries (
|
||||
chapter_id UUID PRIMARY KEY REFERENCES chapters(id) ON DELETE CASCADE,
|
||||
body TEXT NOT NULL,
|
||||
generated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
-- Passages: paragraph-level prose with embedding vectors for
|
||||
-- similarity search. Embeddings nullable so v0.1 import doesn't
|
||||
-- require an embedding pass — we fill them in lazily when we
|
||||
-- actually need semantic recall.
|
||||
CREATE TABLE passages (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
chapter_id UUID NOT NULL REFERENCES chapters(id) ON DELETE CASCADE,
|
||||
paragraph_n INTEGER NOT NULL,
|
||||
body TEXT NOT NULL,
|
||||
embedding vector(1536),
|
||||
embedded_at TIMESTAMPTZ,
|
||||
UNIQUE (chapter_id, paragraph_n)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_passages_chapter ON passages(chapter_id);
|
||||
-- ivfflat index on `embedding` is deferred until we have data —
|
||||
-- ivfflat requires training rows to build, and an empty-table
|
||||
-- index degrades query plans. Add after first ~1k passages.
|
||||
|
||||
-- Every LLM call we make is logged. Useful for cost tracking,
|
||||
-- forensics, "why is this chapter weird?" investigations.
|
||||
CREATE TABLE generation_runs (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
||||
kind TEXT NOT NULL CHECK (kind IN (
|
||||
'gen', 'cleanup', 'audit',
|
||||
'summary', 'embed'
|
||||
)),
|
||||
clawdforge_session_id TEXT,
|
||||
tokens_in INTEGER,
|
||||
tokens_out INTEGER,
|
||||
cost_estimate_cents INTEGER,
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
ended_at TIMESTAMPTZ,
|
||||
status TEXT NOT NULL DEFAULT 'running'
|
||||
CHECK (status IN ('running', 'succeeded', 'failed')),
|
||||
error TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX idx_generation_runs_story ON generation_runs(story_id);
|
||||
CREATE INDEX idx_generation_runs_kind ON generation_runs(kind);
|
||||
|
||||
-- Canon audit findings. Third-Opus reads parent + sequel + bible
|
||||
-- and flags any continuity drift, character voice shift, retconned
|
||||
-- facts, timeline contradictions.
|
||||
CREATE TABLE audit_findings (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
||||
run_id UUID REFERENCES generation_runs(id) ON DELETE SET NULL,
|
||||
severity TEXT NOT NULL CHECK (severity IN ('info', 'warn', 'crit')),
|
||||
area TEXT NOT NULL CHECK (area IN (
|
||||
'character', 'continuity', 'tone',
|
||||
'fact', 'timeline', 'other'
|
||||
)),
|
||||
body TEXT NOT NULL,
|
||||
resolved BOOLEAN NOT NULL DEFAULT false,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_audit_findings_story ON audit_findings(story_id);
|
||||
|
||||
-- Arbitrary user-applied labels. Genre, mood, status filters, etc.
|
||||
CREATE TABLE tags (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
UNIQUE (story_id, name)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_tags_story ON tags(story_id);
|
||||
|
||||
-- Auto-touch stories.updated_at whenever anything changes on the
|
||||
-- story row itself. Cascade-only — not triggered by child writes.
|
||||
CREATE OR REPLACE FUNCTION touch_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = now();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE TRIGGER stories_updated_at
|
||||
BEFORE UPDATE ON stories
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION touch_updated_at();
|
||||
23
skald-core/Cargo.toml
Normal file
23
skald-core/Cargo.toml
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
[package]
|
||||
name = "skald-core"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
authors.workspace = true
|
||||
repository.workspace = true
|
||||
description = "Skald's shared lib: db models, schema migrations, markdown ingest, context assembly."
|
||||
|
||||
[dependencies]
|
||||
tokio = { workspace = true }
|
||||
sqlx = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
anyhow = { workspace = true }
|
||||
thiserror = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }
|
||||
22
skald-core/src/db.rs
Normal file
22
skald-core/src/db.rs
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
//! Postgres connection pool helper.
|
||||
|
||||
use sqlx::postgres::{PgConnectOptions, PgPoolOptions};
|
||||
use sqlx::{ConnectOptions, PgPool};
|
||||
use std::str::FromStr;
|
||||
use std::time::Duration;
|
||||
|
||||
/// Connect to postgres, run pending migrations, return the pool.
|
||||
pub async fn connect_and_migrate(url: &str) -> anyhow::Result<PgPool> {
|
||||
let mut opts = PgConnectOptions::from_str(url)?;
|
||||
// sqlx logs every query at INFO by default; that's hostile to
|
||||
// production logs. Pull it down to debug.
|
||||
opts = opts.log_statements(tracing::log::LevelFilter::Debug);
|
||||
|
||||
let pool = PgPoolOptions::new()
|
||||
.max_connections(10)
|
||||
.acquire_timeout(Duration::from_secs(10))
|
||||
.connect_with(opts)
|
||||
.await?;
|
||||
crate::MIGRATOR.run(&pool).await?;
|
||||
Ok(pool)
|
||||
}
|
||||
510
skald-core/src/ingest.rs
Normal file
510
skald-core/src/ingest.rs
Normal file
|
|
@ -0,0 +1,510 @@
|
|||
//! Parse a long-form story markdown file into the rows we'll store
|
||||
//! in the database. The parser knows the shape we generated in the
|
||||
//! 2026-05-13 Coast-Down side-quest (chapters as `## Chapter N — date`,
|
||||
//! then a `# Continuity Bible` section with structured subsections),
|
||||
//! but isn't story-specific — any markdown that follows that shape
|
||||
//! parses cleanly. Other shapes go through `parse_story_file` and
|
||||
//! fail loudly so the operator can adjust the doc, not the code.
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use regex::Regex;
|
||||
use sqlx::PgPool;
|
||||
use std::path::Path;
|
||||
use std::sync::OnceLock;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// What we extract from a story markdown file before touching the
|
||||
/// database.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParsedStory {
|
||||
pub title: String,
|
||||
pub chapters: Vec<ParsedChapter>,
|
||||
pub characters: Vec<ParsedCharacter>,
|
||||
pub canon_facts: Vec<ParsedFact>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParsedChapter {
|
||||
pub n: i32,
|
||||
pub title: Option<String>,
|
||||
pub body: String,
|
||||
pub paragraphs: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParsedCharacter {
|
||||
pub name: String,
|
||||
pub kind: CharacterKind,
|
||||
pub key_facts: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum CharacterKind {
|
||||
Real,
|
||||
Fictional,
|
||||
}
|
||||
|
||||
impl CharacterKind {
|
||||
pub fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Real => "real",
|
||||
Self::Fictional => "fictional",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ParsedFact {
|
||||
pub category: FactCategory,
|
||||
pub title: String,
|
||||
pub body: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum FactCategory {
|
||||
Setting,
|
||||
Mystery,
|
||||
HistoricalAnchor,
|
||||
Liberty,
|
||||
Hook,
|
||||
Event,
|
||||
Rule,
|
||||
Theme,
|
||||
}
|
||||
|
||||
impl FactCategory {
|
||||
pub fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Setting => "setting",
|
||||
Self::Mystery => "mystery",
|
||||
Self::HistoricalAnchor => "historical_anchor",
|
||||
Self::Liberty => "liberty",
|
||||
Self::Hook => "hook",
|
||||
Self::Event => "event",
|
||||
Self::Rule => "rule",
|
||||
Self::Theme => "theme",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a story markdown file.
|
||||
pub fn parse_story_file(path: &Path) -> anyhow::Result<ParsedStory> {
|
||||
let raw = std::fs::read_to_string(path)
|
||||
.with_context(|| format!("read {}", path.display()))?;
|
||||
parse_story(&raw)
|
||||
}
|
||||
|
||||
/// Parse a story markdown string. See module-level docs for the
|
||||
/// shape it expects.
|
||||
pub fn parse_story(raw: &str) -> anyhow::Result<ParsedStory> {
|
||||
let bible_split: Vec<&str> = raw.splitn(2, "\n# Continuity Bible").collect();
|
||||
let pre_bible = bible_split[0];
|
||||
let bible_body = bible_split.get(1).copied().unwrap_or("");
|
||||
|
||||
let title = extract_title(pre_bible).context("no title heading found")?;
|
||||
let chapters = parse_chapters(pre_bible);
|
||||
let (characters, canon_facts) = parse_bible(bible_body);
|
||||
|
||||
if chapters.is_empty() {
|
||||
bail!("no chapters parsed — expected `## Chapter N — …` headings");
|
||||
}
|
||||
|
||||
Ok(ParsedStory {
|
||||
title,
|
||||
chapters,
|
||||
characters,
|
||||
canon_facts,
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_title(pre_bible: &str) -> Option<String> {
|
||||
for line in pre_bible.lines() {
|
||||
let line = line.trim_end();
|
||||
if let Some(rest) = line.strip_prefix("# ") {
|
||||
let t = rest.trim();
|
||||
if !t.is_empty() {
|
||||
return Some(t.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn parse_chapters(pre_bible: &str) -> Vec<ParsedChapter> {
|
||||
let mut chapters: Vec<ParsedChapter> = Vec::new();
|
||||
let mut cur_title: Option<String> = None;
|
||||
let mut cur_body: Vec<&str> = Vec::new();
|
||||
|
||||
for line in pre_bible.lines() {
|
||||
if let Some(rest) = line.strip_prefix("## ") {
|
||||
// Flush previous chapter.
|
||||
if cur_title.is_some() {
|
||||
push_chapter(&mut chapters, cur_title.take(), &cur_body);
|
||||
cur_body.clear();
|
||||
}
|
||||
cur_title = Some(rest.trim().to_string());
|
||||
} else if cur_title.is_some() {
|
||||
cur_body.push(line);
|
||||
}
|
||||
}
|
||||
if cur_title.is_some() {
|
||||
push_chapter(&mut chapters, cur_title.take(), &cur_body);
|
||||
}
|
||||
chapters
|
||||
}
|
||||
|
||||
fn push_chapter(out: &mut Vec<ParsedChapter>, title: Option<String>, lines: &[&str]) {
|
||||
let title = title.unwrap_or_default();
|
||||
let body = lines.join("\n").trim().to_string();
|
||||
if body.is_empty() {
|
||||
return;
|
||||
}
|
||||
let n = (out.len() + 1) as i32;
|
||||
let paragraphs = split_paragraphs(&body);
|
||||
out.push(ParsedChapter {
|
||||
n,
|
||||
title: if title.is_empty() { None } else { Some(title) },
|
||||
body,
|
||||
paragraphs,
|
||||
});
|
||||
}
|
||||
|
||||
/// Split a chapter body into paragraphs. Blank-line delimited; `---`
|
||||
/// (markdown horizontal rule) is treated as a paragraph break and
|
||||
/// dropped.
|
||||
fn split_paragraphs(body: &str) -> Vec<String> {
|
||||
let mut paragraphs: Vec<String> = Vec::new();
|
||||
let mut cur: Vec<&str> = Vec::new();
|
||||
for line in body.lines() {
|
||||
let trimmed = line.trim();
|
||||
let is_break = trimmed.is_empty() || trimmed == "---" || trimmed == "***";
|
||||
if is_break {
|
||||
if !cur.is_empty() {
|
||||
paragraphs.push(cur.join("\n").trim().to_string());
|
||||
cur.clear();
|
||||
}
|
||||
} else {
|
||||
cur.push(line);
|
||||
}
|
||||
}
|
||||
if !cur.is_empty() {
|
||||
paragraphs.push(cur.join("\n").trim().to_string());
|
||||
}
|
||||
paragraphs
|
||||
}
|
||||
|
||||
fn parse_bible(bible_body: &str) -> (Vec<ParsedCharacter>, Vec<ParsedFact>) {
|
||||
let mut characters: Vec<ParsedCharacter> = Vec::new();
|
||||
let mut canon_facts: Vec<ParsedFact> = Vec::new();
|
||||
if bible_body.is_empty() {
|
||||
return (characters, canon_facts);
|
||||
}
|
||||
|
||||
// Section boundaries: lines starting with `## ` partition the
|
||||
// bible into named sections.
|
||||
let mut cur_section: Option<String> = None;
|
||||
let mut cur_body: Vec<&str> = Vec::new();
|
||||
|
||||
for line in bible_body.lines() {
|
||||
if let Some(rest) = line.strip_prefix("## ") {
|
||||
flush_bible_section(cur_section.take(), &cur_body, &mut characters, &mut canon_facts);
|
||||
cur_body.clear();
|
||||
cur_section = Some(rest.trim().to_string());
|
||||
} else if cur_section.is_some() {
|
||||
cur_body.push(line);
|
||||
}
|
||||
}
|
||||
flush_bible_section(cur_section, &cur_body, &mut characters, &mut canon_facts);
|
||||
|
||||
(characters, canon_facts)
|
||||
}
|
||||
|
||||
fn flush_bible_section(
|
||||
section: Option<String>,
|
||||
body_lines: &[&str],
|
||||
characters: &mut Vec<ParsedCharacter>,
|
||||
canon_facts: &mut Vec<ParsedFact>,
|
||||
) {
|
||||
let Some(section) = section else { return };
|
||||
let body = body_lines.join("\n").trim().to_string();
|
||||
if body.is_empty() {
|
||||
return;
|
||||
}
|
||||
|
||||
let lower = section.to_lowercase();
|
||||
if lower.starts_with("character roster") {
|
||||
characters.extend(parse_character_roster(&body));
|
||||
} else if let Some(category) = section_to_category(&lower) {
|
||||
canon_facts.push(ParsedFact {
|
||||
category,
|
||||
title: section,
|
||||
body,
|
||||
});
|
||||
}
|
||||
// Sections we don't recognize get silently dropped. That's fine
|
||||
// for v0.1; the operator can re-import after adjusting the doc.
|
||||
}
|
||||
|
||||
fn section_to_category(lower_title: &str) -> Option<FactCategory> {
|
||||
if lower_title.starts_with("setting") {
|
||||
Some(FactCategory::Setting)
|
||||
} else if lower_title.starts_with("open mystery") || lower_title.starts_with("mystery") {
|
||||
Some(FactCategory::Mystery)
|
||||
} else if lower_title.starts_with("verified historical") || lower_title.contains("historical events") {
|
||||
Some(FactCategory::HistoricalAnchor)
|
||||
} else if lower_title.starts_with("fictional liberties") || lower_title.starts_with("liberties") {
|
||||
Some(FactCategory::Liberty)
|
||||
} else if lower_title.contains("hook") || lower_title.contains("next-chapter") || lower_title.contains("suggested next") {
|
||||
Some(FactCategory::Hook)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_character_roster(body: &str) -> Vec<ParsedCharacter> {
|
||||
let mut out: Vec<ParsedCharacter> = Vec::new();
|
||||
let mut kind: Option<CharacterKind> = None;
|
||||
let mut cur_name: Option<String> = None;
|
||||
let mut cur_body: Vec<String> = Vec::new();
|
||||
|
||||
fn flush(
|
||||
cur_name: &mut Option<String>,
|
||||
cur_body: &mut Vec<String>,
|
||||
kind: Option<CharacterKind>,
|
||||
out: &mut Vec<ParsedCharacter>,
|
||||
) {
|
||||
if let (Some(name), Some(kind)) = (cur_name.take(), kind)
|
||||
&& !name.is_empty()
|
||||
{
|
||||
let body = cur_body.join(" ").trim().to_string();
|
||||
out.push(ParsedCharacter {
|
||||
name,
|
||||
kind,
|
||||
key_facts: body,
|
||||
});
|
||||
}
|
||||
cur_body.clear();
|
||||
}
|
||||
|
||||
for line in body.lines() {
|
||||
let trimmed = line.trim();
|
||||
if let Some(rest) = line.strip_prefix("### ") {
|
||||
// New sub-section → flush current entry first.
|
||||
flush(&mut cur_name, &mut cur_body, kind, &mut out);
|
||||
let s = rest.trim().to_lowercase();
|
||||
kind = if s.starts_with("real") {
|
||||
Some(CharacterKind::Real)
|
||||
} else if s.starts_with("fictional") {
|
||||
Some(CharacterKind::Fictional)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
} else if let Some(stripped) = trimmed.strip_prefix("- ") {
|
||||
// New character bullet → flush previous.
|
||||
flush(&mut cur_name, &mut cur_body, kind, &mut out);
|
||||
if let Some((name, rest)) = split_bold_name(stripped) {
|
||||
cur_name = Some(name);
|
||||
let rest = rest.trim_start_matches([':', '—', '-', ' ']).trim();
|
||||
if !rest.is_empty() {
|
||||
cur_body.push(rest.to_string());
|
||||
}
|
||||
}
|
||||
} else if !trimmed.is_empty() && cur_name.is_some() {
|
||||
// Continuation of the current bullet.
|
||||
cur_body.push(line.trim_start().to_string());
|
||||
}
|
||||
}
|
||||
flush(&mut cur_name, &mut cur_body, kind, &mut out);
|
||||
out
|
||||
}
|
||||
|
||||
/// Extract the **bold** name at the start of a bullet body.
|
||||
/// Returns (name, rest-of-bullet).
|
||||
fn split_bold_name(s: &str) -> Option<(String, &str)> {
|
||||
static RE: OnceLock<Regex> = OnceLock::new();
|
||||
let re = RE.get_or_init(|| Regex::new(r"^\*\*(.+?)\*\*\s*(.*)$").unwrap());
|
||||
let caps = re.captures(s)?;
|
||||
let name = caps.get(1)?.as_str().trim().to_string();
|
||||
let rest_match = caps.get(2)?;
|
||||
Some((name, &s[rest_match.start()..rest_match.end()]))
|
||||
}
|
||||
|
||||
/// Insert a parsed story into the database. Returns the story's id.
|
||||
pub async fn import_to_db(pool: &PgPool, parsed: ParsedStory) -> anyhow::Result<Uuid> {
|
||||
let mut tx = pool.begin().await?;
|
||||
|
||||
let total_words: i32 = parsed
|
||||
.chapters
|
||||
.iter()
|
||||
.map(|c| word_count(&c.body))
|
||||
.sum();
|
||||
|
||||
let story_id: Uuid = sqlx::query_scalar(
|
||||
"INSERT INTO stories (title, status, word_count_actual)
|
||||
VALUES ($1, 'seed', $2)
|
||||
RETURNING id",
|
||||
)
|
||||
.bind(&parsed.title)
|
||||
.bind(total_words)
|
||||
.fetch_one(&mut *tx)
|
||||
.await?;
|
||||
|
||||
// root_story_id self-references on the seed row.
|
||||
sqlx::query("UPDATE stories SET root_story_id = id WHERE id = $1")
|
||||
.bind(story_id)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
|
||||
for chapter in &parsed.chapters {
|
||||
let words = word_count(&chapter.body);
|
||||
let chapter_id: Uuid = sqlx::query_scalar(
|
||||
"INSERT INTO chapters (story_id, n, title, body_md, word_count)
|
||||
VALUES ($1, $2, $3, $4, $5)
|
||||
RETURNING id",
|
||||
)
|
||||
.bind(story_id)
|
||||
.bind(chapter.n)
|
||||
.bind(chapter.title.as_deref())
|
||||
.bind(&chapter.body)
|
||||
.bind(words)
|
||||
.fetch_one(&mut *tx)
|
||||
.await?;
|
||||
|
||||
for (i, para) in chapter.paragraphs.iter().enumerate() {
|
||||
sqlx::query(
|
||||
"INSERT INTO passages (chapter_id, paragraph_n, body)
|
||||
VALUES ($1, $2, $3)",
|
||||
)
|
||||
.bind(chapter_id)
|
||||
.bind(i as i32 + 1)
|
||||
.bind(para)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
}
|
||||
|
||||
for ch in &parsed.characters {
|
||||
sqlx::query(
|
||||
"INSERT INTO characters (story_id, name, kind, key_facts)
|
||||
VALUES ($1, $2, $3, $4)",
|
||||
)
|
||||
.bind(story_id)
|
||||
.bind(&ch.name)
|
||||
.bind(ch.kind.as_str())
|
||||
.bind(&ch.key_facts)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
for fact in &parsed.canon_facts {
|
||||
sqlx::query(
|
||||
"INSERT INTO canon_facts (story_id, category, title, body)
|
||||
VALUES ($1, $2, $3, $4)",
|
||||
)
|
||||
.bind(story_id)
|
||||
.bind(fact.category.as_str())
|
||||
.bind(&fact.title)
|
||||
.bind(&fact.body)
|
||||
.execute(&mut *tx)
|
||||
.await?;
|
||||
}
|
||||
|
||||
tx.commit().await?;
|
||||
Ok(story_id)
|
||||
}
|
||||
|
||||
fn word_count(s: &str) -> i32 {
|
||||
s.split_whitespace().count() as i32
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
const SAMPLE: &str = r#"# Sample Tale
|
||||
|
||||
## Chapter One — Monday, May 1
|
||||
|
||||
The morning was bright. The bread was warm. The cat sat in the sun.
|
||||
|
||||
She drank her coffee slowly.
|
||||
|
||||
## Chapter Two — Tuesday, May 2
|
||||
|
||||
The cat moved to the windowsill.
|
||||
|
||||
She watched the rain.
|
||||
|
||||
# Continuity Bible
|
||||
|
||||
## Character Roster
|
||||
|
||||
### Real historical figures
|
||||
|
||||
- **Anya Petrov** — 34, baker. Real. Husband died in 1985.
|
||||
Two children.
|
||||
|
||||
### Fictional characters
|
||||
|
||||
- **Boris** — 50, the cat. Black with one white paw.
|
||||
|
||||
## Setting Bible
|
||||
|
||||
A small village in northern Ukraine in May 1985.
|
||||
|
||||
## Open Mystery Threads
|
||||
|
||||
1. Whose footprints in the flour bin?
|
||||
2. Why does Boris meow at midnight?
|
||||
"#;
|
||||
|
||||
#[test]
|
||||
fn parses_title() {
|
||||
let p = parse_story(SAMPLE).unwrap();
|
||||
assert_eq!(p.title, "Sample Tale");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_chapter_count_and_numbering() {
|
||||
let p = parse_story(SAMPLE).unwrap();
|
||||
assert_eq!(p.chapters.len(), 2);
|
||||
assert_eq!(p.chapters[0].n, 1);
|
||||
assert_eq!(p.chapters[1].n, 2);
|
||||
assert!(p.chapters[0].title.as_deref().unwrap().starts_with("Chapter One"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn paragraphs_split_on_blank_line_and_hr() {
|
||||
let p = parse_story(SAMPLE).unwrap();
|
||||
// Chapter 1 has 2 paragraphs (the bright-morning one + the
|
||||
// coffee-drinking one).
|
||||
assert_eq!(p.chapters[0].paragraphs.len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_real_and_fictional_characters() {
|
||||
let p = parse_story(SAMPLE).unwrap();
|
||||
assert_eq!(p.characters.len(), 2);
|
||||
let anya = p.characters.iter().find(|c| c.name == "Anya Petrov").unwrap();
|
||||
assert_eq!(anya.kind, CharacterKind::Real);
|
||||
assert!(anya.key_facts.contains("baker"));
|
||||
let boris = p.characters.iter().find(|c| c.name == "Boris").unwrap();
|
||||
assert_eq!(boris.kind, CharacterKind::Fictional);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parses_canon_fact_sections() {
|
||||
let p = parse_story(SAMPLE).unwrap();
|
||||
let setting = p.canon_facts.iter().find(|f| f.category == FactCategory::Setting).unwrap();
|
||||
assert!(setting.body.contains("northern Ukraine"));
|
||||
let mystery = p.canon_facts.iter().find(|f| f.category == FactCategory::Mystery).unwrap();
|
||||
assert!(mystery.body.contains("footprints"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_chapters_errors() {
|
||||
let bad = "# Title only\n\nSome body text but no chapters.";
|
||||
let err = parse_story(bad).unwrap_err();
|
||||
assert!(err.to_string().contains("no chapters"), "{err}");
|
||||
}
|
||||
}
|
||||
13
skald-core/src/lib.rs
Normal file
13
skald-core/src/lib.rs
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
//! Skald's shared kernel.
|
||||
//!
|
||||
//! Database schema, row types, markdown ingest, and (later) context
|
||||
//! assembly for LLM calls. The story-independence rule: nothing in
|
||||
//! this crate knows about any specific story. Every story is rows.
|
||||
|
||||
pub mod db;
|
||||
pub mod ingest;
|
||||
pub mod models;
|
||||
|
||||
/// Embeds the workspace `migrations/` directory at compile time.
|
||||
/// Run via `MIGRATOR.run(&pool).await` at boot.
|
||||
pub static MIGRATOR: sqlx::migrate::Migrator = sqlx::migrate!("../migrations");
|
||||
75
skald-core/src/models.rs
Normal file
75
skald-core/src/models.rs
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
//! Row types. Mirror the schema in `migrations/0001_init.sql`.
|
||||
//!
|
||||
//! These are deliberately thin — no business logic. Queries that need
|
||||
//! to project subsets of fields can use `sqlx::query_as!` against
|
||||
//! their own narrower types; these full structs are for the cases
|
||||
//! where we want the whole row.
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use sqlx::FromRow;
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct Story {
|
||||
pub id: Uuid,
|
||||
pub title: String,
|
||||
pub status: String,
|
||||
pub prompt: Option<String>,
|
||||
pub model: Option<String>,
|
||||
pub parent_story_id: Option<Uuid>,
|
||||
pub root_story_id: Option<Uuid>,
|
||||
pub series_name: Option<String>,
|
||||
pub word_count_target: Option<i32>,
|
||||
pub word_count_actual: i32,
|
||||
pub summary: Option<String>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
pub updated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct Character {
|
||||
pub id: Uuid,
|
||||
pub story_id: Uuid,
|
||||
pub name: String,
|
||||
pub kind: String,
|
||||
pub role: Option<String>,
|
||||
pub voice_traits: Option<String>,
|
||||
pub key_facts: String,
|
||||
pub aliases: Vec<String>,
|
||||
pub first_seen_chapter: Option<i32>,
|
||||
pub state_at_latest: Option<String>,
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct CanonFact {
|
||||
pub id: Uuid,
|
||||
pub story_id: Uuid,
|
||||
pub category: String,
|
||||
pub title: String,
|
||||
pub body: String,
|
||||
pub weight: i32,
|
||||
pub source_chapter: Option<i32>,
|
||||
pub resolved: bool,
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct Chapter {
|
||||
pub id: Uuid,
|
||||
pub story_id: Uuid,
|
||||
pub n: i32,
|
||||
pub title: Option<String>,
|
||||
pub body_md: String,
|
||||
pub word_count: i32,
|
||||
pub generated_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, FromRow)]
|
||||
pub struct Passage {
|
||||
pub id: Uuid,
|
||||
pub chapter_id: Uuid,
|
||||
pub paragraph_n: i32,
|
||||
pub body: String,
|
||||
}
|
||||
28
skald/Cargo.toml
Normal file
28
skald/Cargo.toml
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
[package]
|
||||
name = "skald"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
license.workspace = true
|
||||
authors.workspace = true
|
||||
repository.workspace = true
|
||||
description = "Skald: long-form story-writer with canon-keeping. DB-is-source-of-truth; writer is the tooling."
|
||||
|
||||
[[bin]]
|
||||
name = "skald"
|
||||
path = "src/main.rs"
|
||||
|
||||
[dependencies]
|
||||
skald-core = { path = "../skald-core" }
|
||||
tokio = { workspace = true }
|
||||
axum = { workspace = true }
|
||||
tower-http = { workspace = true }
|
||||
sqlx = { workspace = true }
|
||||
clap = { workspace = true }
|
||||
serde = { workspace = true }
|
||||
serde_json = { workspace = true }
|
||||
anyhow = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true }
|
||||
chrono = { workspace = true }
|
||||
uuid = { workspace = true }
|
||||
maud = { workspace = true }
|
||||
38
skald/src/import.rs
Normal file
38
skald/src/import.rs
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
//! `skald import-markdown` subcommand. Reads a story markdown file,
|
||||
//! parses it, writes the result into postgres, prints a one-line
|
||||
//! summary the operator can copy/paste.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use skald_core::db;
|
||||
use skald_core::ingest;
|
||||
|
||||
pub async fn run(database_url: &str, path: &Path, title_override: Option<&str>) -> anyhow::Result<()> {
|
||||
tracing::info!(path = %path.display(), "parsing markdown");
|
||||
let mut parsed = ingest::parse_story_file(path)?;
|
||||
|
||||
if let Some(t) = title_override {
|
||||
parsed.title = t.to_string();
|
||||
}
|
||||
|
||||
let chapter_count = parsed.chapters.len();
|
||||
let paragraph_count: usize = parsed.chapters.iter().map(|c| c.paragraphs.len()).sum();
|
||||
let character_count = parsed.characters.len();
|
||||
let fact_count = parsed.canon_facts.len();
|
||||
|
||||
tracing::info!(
|
||||
title = %parsed.title,
|
||||
chapters = chapter_count,
|
||||
paragraphs = paragraph_count,
|
||||
characters = character_count,
|
||||
canon_facts = fact_count,
|
||||
"parsed; connecting to database",
|
||||
);
|
||||
|
||||
let pool = db::connect_and_migrate(database_url).await?;
|
||||
let story_id = ingest::import_to_db(&pool, parsed).await?;
|
||||
println!(
|
||||
"imported story {story_id}: {chapter_count} chapters / {paragraph_count} paragraphs / {character_count} characters / {fact_count} canon-facts"
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
85
skald/src/main.rs
Normal file
85
skald/src/main.rs
Normal file
|
|
@ -0,0 +1,85 @@
|
|||
//! skald — CLI entry point.
|
||||
//!
|
||||
//! Two subcommands today:
|
||||
//! skald serve — boot the http server (v0.1 = /health + migrations)
|
||||
//! skald import-markdown — ingest a story markdown file into the DB
|
||||
|
||||
mod import;
|
||||
mod serve;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::process::ExitCode;
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
#[command(
|
||||
name = "skald",
|
||||
version,
|
||||
about = "Long-form story-writer. Database is the source of truth; the writer is the tooling."
|
||||
)]
|
||||
struct Cli {
|
||||
/// Postgres connection URL. Defaults to `postgresql://skald:skald@localhost:5432/skald`.
|
||||
#[arg(long, env = "DATABASE_URL", default_value = "postgresql://skald:skald@localhost:5432/skald")]
|
||||
database_url: String,
|
||||
|
||||
#[command(subcommand)]
|
||||
cmd: Cmd,
|
||||
}
|
||||
|
||||
#[derive(Debug, Subcommand)]
|
||||
enum Cmd {
|
||||
/// Start the http server. v0.1 exposes /health and runs migrations on boot.
|
||||
Serve {
|
||||
#[arg(long, env = "SKALD_LISTEN", default_value = "0.0.0.0:7780")]
|
||||
listen: String,
|
||||
},
|
||||
/// Ingest a story markdown file into the database. Creates a new
|
||||
/// `stories` row + chapters + characters + canon_facts. Idempotent
|
||||
/// only at the title level: re-importing the same file makes a
|
||||
/// second story row.
|
||||
ImportMarkdown {
|
||||
/// Path to the markdown file.
|
||||
#[arg(long)]
|
||||
path: PathBuf,
|
||||
/// Override the title (defaults to the markdown's first `#` heading).
|
||||
#[arg(long)]
|
||||
title: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> ExitCode {
|
||||
init_logging();
|
||||
match run().await {
|
||||
Ok(()) => ExitCode::SUCCESS,
|
||||
Err(e) => {
|
||||
eprintln!("skald fatal: {e:#}");
|
||||
ExitCode::FAILURE
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn run() -> anyhow::Result<()> {
|
||||
let cli = Cli::parse();
|
||||
match cli.cmd {
|
||||
Cmd::Serve { listen } => serve::run(&cli.database_url, &listen).await,
|
||||
Cmd::ImportMarkdown { path, title } => {
|
||||
import::run(&cli.database_url, &path, title.as_deref()).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn init_logging() {
|
||||
use tracing_subscriber::{EnvFilter, fmt, prelude::*, registry};
|
||||
|
||||
let filter = EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info"));
|
||||
let format = std::env::var("SKALD_LOG_FORMAT").unwrap_or_else(|_| "pretty".into());
|
||||
|
||||
let registry = registry().with(filter);
|
||||
if format == "json" {
|
||||
registry.with(fmt::layer().json()).init();
|
||||
} else {
|
||||
registry.with(fmt::layer()).init();
|
||||
}
|
||||
}
|
||||
90
skald/src/serve.rs
Normal file
90
skald/src/serve.rs
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
//! HTTP server (v0.1).
|
||||
//!
|
||||
//! Today this is intentionally tiny: connect to postgres, run any
|
||||
//! pending migrations, expose `/health`, and stay alive. The web
|
||||
//! GUI + clawdforge wiring lands in v0.2.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use axum::Router;
|
||||
use axum::extract::State;
|
||||
use axum::routing::get;
|
||||
use axum::Json;
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::Serialize;
|
||||
use skald_core::db;
|
||||
use sqlx::PgPool;
|
||||
use tokio::signal::unix::{SignalKind, signal};
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AppState {
|
||||
pool: PgPool,
|
||||
started_at: DateTime<Utc>,
|
||||
}
|
||||
|
||||
pub async fn run(database_url: &str, listen: &str) -> anyhow::Result<()> {
|
||||
tracing::info!(listen, version = env!("CARGO_PKG_VERSION"), "skald serve starting");
|
||||
|
||||
let pool = db::connect_and_migrate(database_url).await?;
|
||||
tracing::info!("database connected, migrations applied");
|
||||
|
||||
let state = AppState {
|
||||
pool,
|
||||
started_at: Utc::now(),
|
||||
};
|
||||
let router = Router::new()
|
||||
.route("/health", get(health))
|
||||
.with_state(state);
|
||||
|
||||
let listener = tokio::net::TcpListener::bind(listen).await?;
|
||||
tracing::info!(listen, "api listening");
|
||||
|
||||
let serve = axum::serve(listener, router).with_graceful_shutdown(shutdown());
|
||||
match tokio::time::timeout(Duration::from_secs(15), serve).await {
|
||||
Ok(r) => r?,
|
||||
Err(_) => tracing::warn!("graceful shutdown timed out after 15s — exiting anyway"),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn shutdown() {
|
||||
let ctrl_c = async {
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
};
|
||||
let term = async {
|
||||
if let Ok(mut s) = signal(SignalKind::terminate()) {
|
||||
s.recv().await;
|
||||
}
|
||||
};
|
||||
tokio::select! { _ = ctrl_c => {}, _ = term => {} }
|
||||
tracing::info!("shutdown signal received");
|
||||
}
|
||||
|
||||
#[derive(Serialize)]
|
||||
struct Health {
|
||||
ok: bool,
|
||||
version: &'static str,
|
||||
uptime_secs: i64,
|
||||
db_ok: bool,
|
||||
story_count: i64,
|
||||
}
|
||||
|
||||
async fn health(State(state): State<AppState>) -> Json<Health> {
|
||||
let row: Result<(i64,), _> = sqlx::query_as("SELECT count(*) FROM stories")
|
||||
.fetch_one(&state.pool)
|
||||
.await;
|
||||
let (db_ok, story_count) = match row {
|
||||
Ok((n,)) => (true, n),
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %e, "health: db query failed");
|
||||
(false, 0)
|
||||
}
|
||||
};
|
||||
Json(Health {
|
||||
ok: db_ok,
|
||||
version: env!("CARGO_PKG_VERSION"),
|
||||
uptime_secs: (Utc::now() - state.started_at).num_seconds(),
|
||||
db_ok,
|
||||
story_count,
|
||||
})
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue