192 lines
7.6 KiB
Rust
192 lines
7.6 KiB
Rust
//! Standalone reproducer for the large-bytestring reference-script
|
|
//! corruption observed in pallas-txbuilder.
|
|
//!
|
|
//! Usage:
|
|
//! ALDABRA_REPRO_HEX=/path/to/governorValidator.rawhex \
|
|
//! cargo run --example repro_script_corruption -p aldabra-dao --release
|
|
//!
|
|
//! The reproducer:
|
|
//! 1. Reads a hex-encoded Plutus V2 script (rawHex) from a file.
|
|
//! 2. Builds a minimal Conway tx with one output that carries the
|
|
//! script as an inline reference script.
|
|
//! 3. Calls `build_conway_raw()` to produce the tx body bytes.
|
|
//! 4. Searches the tx body for the input script bytes verbatim. If
|
|
//! it finds them: pallas's encode is byte-clean (bug is downstream
|
|
//! — chain transport, Koios, MCP transport, etc). If it doesn't:
|
|
//! pallas mutated the bytes during encoding, prints the diff.
|
|
//!
|
|
//! No chain query, no MCP, no JSON-RPC. Pure local serialization.
|
|
|
|
use std::env;
|
|
use std::fs;
|
|
|
|
use pallas_addresses::Address;
|
|
use pallas_txbuilder::{BuildConway, Input, Output as TxOutput, ScriptKind, StagingTransaction};
|
|
use pallas_crypto::hash::Hash;
|
|
|
|
fn hex_to_bytes(s: &str) -> Vec<u8> {
|
|
let s = s.trim();
|
|
let mut v = Vec::with_capacity(s.len() / 2);
|
|
let bytes = s.as_bytes();
|
|
let mut i = 0;
|
|
while i + 1 < bytes.len() {
|
|
let hi = (bytes[i] as char).to_digit(16).expect("invalid hex hi") as u8;
|
|
let lo = (bytes[i + 1] as char).to_digit(16).expect("invalid hex lo") as u8;
|
|
v.push((hi << 4) | lo);
|
|
i += 2;
|
|
}
|
|
v
|
|
}
|
|
|
|
fn find_subseq(haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
|
if needle.is_empty() || needle.len() > haystack.len() {
|
|
return None;
|
|
}
|
|
haystack
|
|
.windows(needle.len())
|
|
.position(|w| w == needle)
|
|
}
|
|
|
|
fn main() {
|
|
let path = env::var("ALDABRA_REPRO_HEX")
|
|
.expect("set ALDABRA_REPRO_HEX to a file containing the script hex");
|
|
let hex = fs::read_to_string(&path).expect("read hex file");
|
|
let script_bytes = hex_to_bytes(&hex);
|
|
println!(
|
|
"input script: {} bytes ({} hex chars)",
|
|
script_bytes.len(),
|
|
hex.trim().len()
|
|
);
|
|
|
|
let dummy_tx_hash: Hash<32> = Hash::new([0u8; 32]);
|
|
let input = Input::new(dummy_tx_hash, 0);
|
|
|
|
// A throwaway preprod testnet enterprise script address (just for
|
|
// shape — no funds, no real chain interaction).
|
|
let dest_addr = Address::from_bech32(
|
|
"addr_test1wptadvtl64h74jmhwuda595j40ss3rgh0p9jam0ejwgz6mcnzvusa",
|
|
)
|
|
.expect("decode addr");
|
|
|
|
let mut output = TxOutput::new(dest_addr, 5_000_000);
|
|
output = output.set_inline_script(ScriptKind::PlutusV2, script_bytes.clone());
|
|
|
|
let staging = StagingTransaction::new()
|
|
.input(input)
|
|
.output(output)
|
|
.fee(2_000_000)
|
|
.network_id(0);
|
|
|
|
let built = staging
|
|
.build_conway_raw()
|
|
.expect("build_conway_raw failed");
|
|
|
|
let tx_bytes = built.tx_bytes.0;
|
|
println!("built tx body: {} bytes", tx_bytes.len());
|
|
|
|
// Sanity: the script bytes should appear somewhere inside the tx
|
|
// body. The output's script_ref encodes as `tag(24) bytes(...)`
|
|
// wrapping the inner array `[2, bytes]`. The actual script bytes
|
|
// are then nested inside that. Search for them verbatim.
|
|
if let Some(pos) = find_subseq(&tx_bytes, &script_bytes) {
|
|
println!("✅ FOUND input script bytes verbatim at tx-body offset {}", pos);
|
|
println!(" pallas-txbuilder serialized them clean.");
|
|
|
|
// BUT: check the bytes-header that precedes them. In CBOR, a
|
|
// bytestring of length N has a leader byte of 0x40+N for N<24,
|
|
// 0x58 + 1 length byte for N<=255, 0x59 + 2 length bytes for
|
|
// N<=65535. For 7213, header = 0x59 0x1c 0x2d. If the header
|
|
// claims a different length, encoding is inconsistent.
|
|
if pos >= 3 {
|
|
let h = &tx_bytes[pos - 3..pos];
|
|
println!(
|
|
" bytes-header preceding script: {:02x} {:02x} {:02x}",
|
|
h[0], h[1], h[2]
|
|
);
|
|
if h[0] == 0x59 {
|
|
let claimed_len = u16::from_be_bytes([h[1], h[2]]) as usize;
|
|
if claimed_len == script_bytes.len() {
|
|
println!(
|
|
" ✅ header length {} == input length {} — consistent.",
|
|
claimed_len,
|
|
script_bytes.len()
|
|
);
|
|
} else {
|
|
println!(
|
|
" ❌ header length {} != input length {} — encoder is OFF BY {}.",
|
|
claimed_len,
|
|
script_bytes.len(),
|
|
script_bytes.len() as i64 - claimed_len as i64
|
|
);
|
|
}
|
|
} else {
|
|
println!(
|
|
" ⚠️ preceding byte not 0x59 (uint16 bytes header) — different size class?"
|
|
);
|
|
}
|
|
}
|
|
|
|
// Print the very first 100 bytes of tx body for inspection
|
|
let preview_len = 100.min(tx_bytes.len());
|
|
let preview: String = tx_bytes[..preview_len]
|
|
.iter()
|
|
.map(|b| format!("{:02x}", b))
|
|
.collect();
|
|
println!(" tx body first {} bytes: {}", preview_len, preview);
|
|
} else {
|
|
println!("❌ DID NOT find input script bytes verbatim in tx body.");
|
|
println!(" pallas-txbuilder mutated the bytes during encoding.");
|
|
// Try to locate the ApproxRegion that contains them. Search
|
|
// for the first 64 bytes of input — if THAT prefix is found,
|
|
// the bytes start there but corrupt later. If not, the start
|
|
// is also mutated.
|
|
let prefix = &script_bytes[..64.min(script_bytes.len())];
|
|
match find_subseq(&tx_bytes, prefix) {
|
|
Some(start) => {
|
|
println!(
|
|
" Found {} -byte prefix at tx-body offset {} — mutation is later in the bytestring",
|
|
prefix.len(),
|
|
start
|
|
);
|
|
let region = &tx_bytes[start..(start + script_bytes.len()).min(tx_bytes.len())];
|
|
let mut diffs = 0usize;
|
|
let mut first_diff = None;
|
|
for (i, (a, b)) in script_bytes.iter().zip(region.iter()).enumerate() {
|
|
if a != b {
|
|
diffs += 1;
|
|
if first_diff.is_none() {
|
|
first_diff = Some(i);
|
|
}
|
|
}
|
|
}
|
|
println!(
|
|
" {} byte-positions differ; first diff at byte {} of script",
|
|
diffs,
|
|
first_diff.map(|x| x as i32).unwrap_or(-1)
|
|
);
|
|
}
|
|
None => {
|
|
println!(" Even the first 64 bytes don't match — corruption starts at offset 0.");
|
|
}
|
|
}
|
|
}
|
|
|
|
// Also search for the known on-chain corrupt fingerprint: at
|
|
// bytes 2390..=2424 the on-chain version has the two 9-byte
|
|
// blocks SWAPPED relative to input. Build the swapped version
|
|
// and check if THAT appears in the tx body.
|
|
if script_bytes.len() >= 2425 {
|
|
let mut corrupted = script_bytes.clone();
|
|
let block_a = corrupted[2390..2399].to_vec();
|
|
let block_b = corrupted[2416..2425].to_vec();
|
|
corrupted[2390..2399].copy_from_slice(&block_b);
|
|
corrupted[2416..2425].copy_from_slice(&block_a);
|
|
|
|
if find_subseq(&tx_bytes, &corrupted).is_some() {
|
|
println!("⚠️ found CORRUPTED variant (block-swap @ 2390↔2416) in tx body.");
|
|
println!(" pallas-txbuilder is producing the same corruption we see on chain.");
|
|
} else {
|
|
println!(" block-swap variant NOT found in tx body either.");
|
|
}
|
|
}
|
|
}
|