download + extract pipeline

- artifact.Download: resumable HTTP with optional SHA256 check + progress cb
- artifact.ExtractZstdTar: streamed zstd+tar with tar-slip defense
- aggregator client matches real API shape (digests/immutables/ancillary blocks
  with URIHolder polymorphism for templated immutable URIs)
- cmd: show + download subcommands wired up
- end-to-end verified against preprod: digests archive pulls cleanly, yields
  16836-entry SHA manifest ready for verification sprint

deps: github.com/klauspost/compress (pure-go zstd)
This commit is contained in:
Kayos 2026-04-23 15:16:48 -07:00
parent f87b7fc3c4
commit e557d85d5a
6 changed files with 483 additions and 84 deletions

View file

@ -5,7 +5,8 @@
//
// Subcommands:
// list — list available cardano-database snapshots on an aggregator
// download — fetch a snapshot (verify + extract optional)
// show — show full detail for a snapshot (or "latest")
// download — fetch a snapshot (digests + ancillary; optionally immutables)
// verify — verify an already-downloaded snapshot
// info — show aggregator + network details
package main
@ -15,28 +16,38 @@ import (
"flag"
"fmt"
"os"
"os/signal"
"path/filepath"
"syscall"
"text/tabwriter"
"time"
"git.sulkta.coop/Sulkta-Coop/mithril-go/internal/aggregator"
"git.sulkta.coop/Sulkta-Coop/mithril-go/internal/artifact"
"git.sulkta.coop/Sulkta-Coop/mithril-go/internal/networks"
)
const version = "0.0.1-dev"
const version = "0.0.2-dev"
func main() {
if len(os.Args) < 2 {
usage()
os.Exit(2)
}
ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer stop()
cmd := os.Args[1]
args := os.Args[2:]
switch cmd {
case "list":
os.Exit(cmdList(args))
os.Exit(cmdList(ctx, args))
case "show":
os.Exit(cmdShow(ctx, args))
case "download":
os.Exit(cmdDownload(args))
os.Exit(cmdDownload(ctx, args))
case "verify":
os.Exit(cmdVerify(args))
os.Exit(cmdVerify(ctx, args))
case "info":
os.Exit(cmdInfo(args))
case "version", "--version", "-v":
@ -58,8 +69,9 @@ Usage:
Commands:
list List available cardano-database snapshots
download Download + verify + extract a snapshot
verify Verify an already-downloaded snapshot
show Show detail for one snapshot (hash or "latest")
download Download a snapshot to a target directory
verify Verify an already-downloaded snapshot (not yet implemented)
info Show network + aggregator info
version Print version
help Show this help
@ -80,15 +92,15 @@ func resolveNetwork(fs *flag.FlagSet, args []string) (networks.Network, []string
return n, fs.Args(), nil
}
func cmdList(args []string) int {
func cmdList(ctx context.Context, args []string) int {
fs := flag.NewFlagSet("list", flag.ExitOnError)
n, _, err := resolveNetwork(fs, args)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return 2
}
client := aggregator.New(n.AggregatorURL)
snaps, err := client.ListCardanoDBSnapshots(context.Background())
c := aggregator.New(n.AggregatorURL)
snaps, err := c.ListCardanoDBSnapshots(ctx)
if err != nil {
fmt.Fprintln(os.Stderr, "list:", err)
return 1
@ -102,19 +114,124 @@ func cmdList(args []string) int {
s.CreatedAt.UTC().Format("2006-01-02 15:04 MST"))
}
if err := tw.Flush(); err != nil {
fmt.Fprintln(os.Stderr, "flush:", err)
return 1
}
return 0
}
func cmdDownload(args []string) int {
fmt.Fprintln(os.Stderr, "download: not yet implemented")
return 1
func cmdShow(ctx context.Context, args []string) int {
fs := flag.NewFlagSet("show", flag.ExitOnError)
n, rest, err := resolveNetwork(fs, args)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return 2
}
hash := "latest"
if len(rest) > 0 {
hash = rest[0]
}
c := aggregator.New(n.AggregatorURL)
snap, err := resolveSnapshot(ctx, c, hash)
if err != nil {
fmt.Fprintln(os.Stderr, "show:", err)
return 1
}
fmt.Printf("hash: %s\n", snap.Hash)
fmt.Printf("network: %s\n", snap.Network)
fmt.Printf("epoch/immutable: %d / %d\n", snap.Beacon.Epoch, snap.Beacon.ImmutableFileNumber)
fmt.Printf("certificate: %s\n", snap.CertificateHash)
fmt.Printf("cardano version: %s\n", snap.CardanoNodeVersion)
fmt.Printf("created: %s\n", snap.CreatedAt.UTC().Format(time.RFC3339))
fmt.Printf("size uncompressed: %s\n", humanSize(snap.TotalDBSizeUncompressed))
fmt.Printf("digests size: %s locations: %d\n", humanSize(snap.Digests.SizeUncompressed), len(snap.Digests.Locations))
fmt.Printf("ancillary size: %s locations: %d\n", humanSize(snap.Ancillary.SizeUncompressed), len(snap.Ancillary.Locations))
fmt.Printf("immutable avg: %s files: %d locations: %d\n",
humanSize(snap.Immutables.AverageSizeUncompressed), snap.Beacon.ImmutableFileNumber, len(snap.Immutables.Locations))
return 0
}
func cmdVerify(args []string) int {
fmt.Fprintln(os.Stderr, "verify: not yet implemented")
func cmdDownload(ctx context.Context, args []string) int {
fs := flag.NewFlagSet("download", flag.ExitOnError)
out := fs.String("out", "./db", "output directory")
includeAncillary := fs.Bool("ancillary", true, "download the ancillary archive")
includeImmuts := fs.Bool("immutables", false, "download all immutable files (huge on mainnet — off by default)")
n, rest, err := resolveNetwork(fs, args)
if err != nil {
fmt.Fprintln(os.Stderr, err)
return 2
}
hash := "latest"
if len(rest) > 0 {
hash = rest[0]
}
c := aggregator.New(n.AggregatorURL)
snap, err := resolveSnapshot(ctx, c, hash)
if err != nil {
fmt.Fprintln(os.Stderr, "download:", err)
return 1
}
fmt.Printf("Target: %s\n", snap.Hash)
fmt.Printf("Network: %s\n", snap.Network)
fmt.Printf("Epoch/Imm: %d / %d\n", snap.Beacon.Epoch, snap.Beacon.ImmutableFileNumber)
fmt.Printf("Output dir: %s\n", *out)
fmt.Println()
if err := os.MkdirAll(*out, 0o755); err != nil {
fmt.Fprintln(os.Stderr, "mkdir:", err)
return 1
}
// 1. Download + extract digests archive (few MB — always)
fmt.Println("=== digests ===")
digestsURIs := cloudURIs(snap.Digests.Locations)
if len(digestsURIs) == 0 {
fmt.Fprintln(os.Stderr, "no cloud_storage digest location available")
return 1
}
digestsArchive := filepath.Join(*out, "digests.tar.zst")
if err := downloadWithBar(ctx, digestsURIs[0], digestsArchive, snap.Digests.SizeUncompressed); err != nil {
fmt.Fprintln(os.Stderr, "digests download:", err)
return 1
}
if err := artifact.ExtractZstdTar(ctx, digestsArchive, filepath.Join(*out, "digests")); err != nil {
fmt.Fprintln(os.Stderr, "digests extract:", err)
return 1
}
fmt.Println(" extracted to", filepath.Join(*out, "digests"))
// 2. Ancillary archive
if *includeAncillary {
fmt.Println("\n=== ancillary ===")
anciURIs := cloudURIs(snap.Ancillary.Locations)
if len(anciURIs) == 0 {
fmt.Fprintln(os.Stderr, "no cloud_storage ancillary location available")
return 1
}
anciArchive := filepath.Join(*out, "ancillary.tar.zst")
if err := downloadWithBar(ctx, anciURIs[0], anciArchive, snap.Ancillary.SizeUncompressed); err != nil {
fmt.Fprintln(os.Stderr, "ancillary download:", err)
return 1
}
if err := artifact.ExtractZstdTar(ctx, anciArchive, filepath.Join(*out, "db")); err != nil {
fmt.Fprintln(os.Stderr, "ancillary extract:", err)
return 1
}
fmt.Println(" extracted to", filepath.Join(*out, "db"))
}
// 3. Immutables (optional, huge on mainnet)
if *includeImmuts {
fmt.Fprintln(os.Stderr, "immutables download: not yet wired (will come in v0.0.3)")
return 1
}
fmt.Println("\nDone.")
return 0
}
func cmdVerify(ctx context.Context, args []string) int {
fmt.Fprintln(os.Stderr, "verify: not yet implemented (STM BLS sprint pending)")
return 1
}
@ -125,23 +242,66 @@ func cmdInfo(args []string) int {
fmt.Fprintln(os.Stderr, err)
return 2
}
fmt.Printf("network: %s\n", n.Name)
fmt.Printf("aggregator: %s\n", n.AggregatorURL)
fmt.Printf("network: %s\n", n.Name)
fmt.Printf("aggregator: %s\n", n.AggregatorURL)
fmt.Printf("genesis verify key: %s…\n", n.GenesisVerifyKey[:16])
return 0
}
func resolveSnapshot(ctx context.Context, c *aggregator.Client, hashOrLatest string) (*aggregator.CardanoDBSnapshot, error) {
if hashOrLatest == "latest" {
snaps, err := c.ListCardanoDBSnapshots(ctx)
if err != nil {
return nil, err
}
if len(snaps) == 0 {
return nil, fmt.Errorf("aggregator returned no snapshots")
}
hashOrLatest = snaps[0].Hash
}
return c.GetCardanoDBSnapshot(ctx, hashOrLatest)
}
func cloudURIs(locs []aggregator.Location) []string {
var out []string
for _, l := range locs {
if l.Type == "cloud_storage" && l.URI.Plain != "" {
out = append(out, l.URI.Plain)
}
}
return out
}
func downloadWithBar(ctx context.Context, uri, dest string, expectedSize uint64) error {
fmt.Printf(" %s\n", uri)
start := time.Now()
var last int64
cb := func(b int64) {
elapsed := time.Since(start).Seconds()
rate := float64(b) / elapsed
pct := ""
if expectedSize > 0 {
pct = fmt.Sprintf("%5.1f%% ", float64(b)/float64(expectedSize)*100)
}
fmt.Printf("\r %s%s @ %s/s ", pct, humanSize(uint64(b)), humanSize(uint64(rate)))
last = b
}
err := artifact.Download(ctx, uri, dest, "", cb)
fmt.Printf("\r %s in %s \n", humanSize(uint64(last)), time.Since(start).Round(time.Second))
return err
}
func humanSize(b uint64) string {
const k = 1024
if b < k {
const k = 1024.0
if b < 1024 {
return fmt.Sprintf("%dB", b)
}
units := []string{"K", "M", "G", "T"}
v := float64(b)
u := 0
for v >= k && u < len(units)-1 {
for _, u := range []string{"K", "M", "G", "T", "P"} {
v /= k
u++
if v < k {
return fmt.Sprintf("%.1f%s", v, u)
}
}
return fmt.Sprintf("%.1f%s", v, units[u])
return fmt.Sprintf("%.1fE", v/k)
}