mithril-go/internal/stm/types.go
Kayos 9d6c7cffbe v1.0.1: audit fixes — fetchCertRaw status check, .part cleanup, AVK guards, strict merkle, JSON error envelope
Independent code audit (in-repo, fresh-eyes pass) flagged 0 critical, 4
high, 8 medium, 7 low. This commit addresses all 4 highs + the JSON
error-path inconsistency + the vestigial verify.STM stub.

HIGH fixes:
- cmd/mithril-go/main.go fetchCertRaw: missing status check let HTML 4xx/5xx
  bodies fall through to confusing JSON-decode errors. Added explicit
  StatusCode>=400 check + 16 MiB response body cap + Accept header.
- internal/artifact/download.go: SHA mismatch left .part on disk, causing
  every retry to resume the corrupted bytes and fail SHA forever. Now
  removes .part on hash mismatch so the next attempt starts clean.
- internal/stm/types.go DecodeAVK: rejects total_stake=0 and nr_leaves=0
  at decode-time. internal/stm/lottery.go adds defensive guard for
  stake==0 || totalStake==0 to prevent big.Rat.SetFrac panic (DoS vector
  for the MCP server when fed crafted AVK).
- internal/stm/merkle.go: now requires (a) every proof value is exactly
  32 bytes, (b) indices are STRICTLY ascending (no duplicates),
  (c) every index is < nr_leaves, (d) all proof values are consumed by
  the algorithm. Prevents parser-differential bugs vs upstream Rust.

JSON error-path wiring:
- cmd/mithril-go/json.go: replaced unused emitJSONErr with failure() helper
  that routes errors to stdout-as-JSON when -json is set, else stderr-as-text.
  Error envelope shape: {error: {code, kind, message}} where 'kind' is a
  stable short string (network/integrity/verify/usage/internal) for agents
  to branch on without parsing human text.
- All -json-supporting commands (info, list, show, cert, verify+subcommands)
  now use failure() in error paths instead of bare fmt.Fprintln(stderr).
- Verified: 'verify -json deadbeef' on a bogus hash now emits valid JSON
  to stdout with exit=3, instead of empty stdout + text on stderr.

Vestigial code:
- internal/verify/verify.go: removed STM() stub + ErrSTMNotImplemented.
  Real STM verification has lived in internal/stm/verify.go since the
  crypto sprint; the stub was dead code from milestone-by-milestone work.

Verification (still all green):
- preprod chain: 90 certs, 1124 wins ✓
- mainnet head:  59 signers, 1972 wins ✓
- preprod head:   2 signers,   11 wins ✓
- preprod genesis: Ed25519 ✓
- JSON error envelope on bogus hash: well-formed JSON, exit=3
- internal/stm unit test: PASS

Audit findings deferred to v1.0.2+: bubble-sort in stm.Verify (medium,
perf only at scale); int-vs-uint64 truncation guards on 32-bit targets
(medium, won't bite on 64-bit); tar mode-bit masking (medium, low impact
since archives are from trusted aggregator); no User-Agent header on
aggregator requests (low, op nicety); MCP scanner silent stop on >10 MiB
line (low, defensive).
2026-04-23 17:30:34 -07:00

216 lines
6.7 KiB
Go

// Package stm implements Mithril Stake-based Threshold Multi-signature
// decoding and verification.
//
// The wire format of a Mithril multi_signature field is:
//
// hex( ASCII( JSON( ... ) ) )
//
// i.e. hex-encoded bytes that are the UTF-8 of a JSON object. The JSON
// contents are documented in DecodeMultiSig below.
//
// Verification phases:
//
// 1. DecodeMultiSig — parse the wrapped JSON
// 2. BLS single-sig verification of each (signer, sigma) over the msg
// 3. Merkle proof verification: each signer index is a registered party
// 4. Lottery check: for each (index, sigma), evaluate_dense_mapping < threshold(stake)
// 5. Threshold: total distinct lottery wins >= k
//
// Phases 2-5 are stubbed in verify.go pending the BLS crypto sprint.
// This package's current role is rock-solid decoding.
package stm
import (
"encoding/hex"
"encoding/json"
"fmt"
)
// ByteArray decodes from either a JSON array of ints [1,2,3] (Mithril's
// on-wire shape) or a base64 string (Go's default []byte handling).
// Always emits an array of ints for forward compatibility.
type ByteArray []byte
func (b *ByteArray) UnmarshalJSON(data []byte) error {
// Try array-of-ints first — this is what Mithril ships.
var ints []int
if err := json.Unmarshal(data, &ints); err == nil {
out := make([]byte, len(ints))
for i, v := range ints {
if v < 0 || v > 255 {
return fmt.Errorf("byte out of range at %d: %d", i, v)
}
out[i] = byte(v)
}
*b = out
return nil
}
// Fallback: base64 string.
var s string
if err := json.Unmarshal(data, &s); err == nil {
*b = []byte(s)
return nil
}
return fmt.Errorf("ByteArray: neither int-array nor string")
}
// MultiSig is the decoded top-level shape.
type MultiSig struct {
Signatures []SignerEntry `json:"signatures"`
BatchProof BatchProof `json:"batch_proof"`
}
// SignerEntry is a 2-tuple serialized as a JSON array: (StmSig, RegParty).
// We decode it via a custom UnmarshalJSON because JSON heterogeneous
// arrays don't map to Go structs directly.
type SignerEntry struct {
Sig StmSig
RegParty RegParty
}
// StmSig is one signer's contribution: their BLS sig, the lottery
// indices they won, and their index in the registered party list.
type StmSig struct {
Sigma ByteArray `json:"sigma"` // 48-byte BLS G1 compressed sig
Indexes []uint64 `json:"indexes"` // winning lottery indices
SignerIndex uint64 `json:"signer_index"` // party position in registered list
}
// RegParty is also a 2-tuple in JSON: (vk_bytes, stake).
type RegParty struct {
VK ByteArray // 96-byte BLS G2 compressed verification key
Stake uint64
}
// AVK is the cert's aggregate verification key — a Merkle commitment over
// the registered (vk, stake) parties plus the total stake. Shipped on the
// wire as hex-of-ASCII-of-JSON, same wrapping as MultiSig.
type AVK struct {
MerkleRoot ByteArray
NumLeaves uint64
TotalStake uint64
}
// DecodeAVK decodes the wrapped JSON of a Mithril aggregate_verification_key
// field.
func DecodeAVK(rawJSON []byte) (*AVK, error) {
hexStr := string(rawJSON)
if len(hexStr) >= 2 && hexStr[0] == '"' && hexStr[len(hexStr)-1] == '"' {
hexStr = hexStr[1 : len(hexStr)-1]
}
data, err := hex.DecodeString(hexStr)
if err != nil {
return nil, fmt.Errorf("AVK hex: %w", err)
}
var wire struct {
MTCommitment struct {
Root ByteArray `json:"root"`
NrLeaves uint64 `json:"nr_leaves"`
Hasher any `json:"hasher"`
} `json:"mt_commitment"`
TotalStake uint64 `json:"total_stake"`
}
if err := json.Unmarshal(data, &wire); err != nil {
return nil, fmt.Errorf("AVK json: %w", err)
}
if len(wire.MTCommitment.Root) != 32 {
return nil, fmt.Errorf("AVK root: got %d bytes, want 32", len(wire.MTCommitment.Root))
}
if wire.TotalStake == 0 {
return nil, fmt.Errorf("AVK total_stake is zero")
}
if wire.MTCommitment.NrLeaves == 0 {
return nil, fmt.Errorf("AVK nr_leaves is zero")
}
return &AVK{
MerkleRoot: wire.MTCommitment.Root,
NumLeaves: wire.MTCommitment.NrLeaves,
TotalStake: wire.TotalStake,
}, nil
}
// BatchProof is a Merkle multi-proof over the registered parties.
type BatchProof struct {
Values []ByteArray `json:"values"` // proof nodes, each 32 bytes (BLAKE2b-256)
Indices []uint64 `json:"indices"` // signer indices being proven
Hasher any `json:"hasher"` // null => BLAKE2b-256 default
}
// UnmarshalJSON for SignerEntry — decode the [StmSig, RegParty] tuple.
func (s *SignerEntry) UnmarshalJSON(b []byte) error {
var raw [2]json.RawMessage
if err := json.Unmarshal(b, &raw); err != nil {
return fmt.Errorf("SignerEntry tuple: %w", err)
}
if err := json.Unmarshal(raw[0], &s.Sig); err != nil {
return fmt.Errorf("SignerEntry.Sig: %w", err)
}
if err := s.RegParty.UnmarshalJSON(raw[1]); err != nil {
return fmt.Errorf("SignerEntry.RegParty: %w", err)
}
return nil
}
// UnmarshalJSON for RegParty — decode the [vk_bytes, stake] tuple.
func (r *RegParty) UnmarshalJSON(b []byte) error {
var raw [2]json.RawMessage
if err := json.Unmarshal(b, &raw); err != nil {
return fmt.Errorf("RegParty tuple: %w", err)
}
if err := r.VK.UnmarshalJSON(raw[0]); err != nil {
return fmt.Errorf("RegParty.VK: %w", err)
}
if err := json.Unmarshal(raw[1], &r.Stake); err != nil {
return fmt.Errorf("RegParty.Stake: %w", err)
}
return nil
}
// DecodeMultiSig takes the raw `multi_signature` field value from a
// Mithril certificate (a JSON string whose contents are hex-encoded
// UTF-8 JSON) and returns the decoded struct.
//
// If rawJSON begins with a JSON string quote, the quotes are stripped
// first; this lets callers pass either the json.RawMessage form or an
// already-unquoted hex string.
func DecodeMultiSig(rawJSON []byte) (*MultiSig, error) {
hexStr := string(rawJSON)
if len(hexStr) >= 2 && hexStr[0] == '"' && hexStr[len(hexStr)-1] == '"' {
hexStr = hexStr[1 : len(hexStr)-1]
}
data, err := hex.DecodeString(hexStr)
if err != nil {
return nil, fmt.Errorf("hex decode: %w", err)
}
var ms MultiSig
if err := json.Unmarshal(data, &ms); err != nil {
return nil, fmt.Errorf("json decode: %w", err)
}
return &ms, nil
}
// TotalWins counts the total number of lottery wins across all signers.
func (m *MultiSig) TotalWins() int {
n := 0
for _, s := range m.Signatures {
n += len(s.Sig.Indexes)
}
return n
}
// DistinctWins returns the set of distinct lottery indices claimed
// across all signers. The Mithril STM spec requires total DISTINCT
// indices >= k for a valid aggregate.
func (m *MultiSig) DistinctWins() []uint64 {
seen := make(map[uint64]struct{})
for _, s := range m.Signatures {
for _, ix := range s.Sig.Indexes {
seen[ix] = struct{}{}
}
}
out := make([]uint64, 0, len(seen))
for ix := range seen {
out = append(out, ix)
}
return out
}