aldabra/crates/aldabra-dao/examples/repro_script_corruption.rs

150 lines
5.9 KiB
Rust

//! Standalone reproducer for the large-bytestring reference-script
//! corruption observed in pallas-txbuilder.
//!
//! Usage:
//! ALDABRA_REPRO_HEX=/path/to/governorValidator.rawhex \
//! cargo run --example repro_script_corruption -p aldabra-dao --release
//!
//! The reproducer:
//! 1. Reads a hex-encoded Plutus V2 script (rawHex) from a file.
//! 2. Builds a minimal Conway tx with one output that carries the
//! script as an inline reference script.
//! 3. Calls `build_conway_raw()` to produce the tx body bytes.
//! 4. Searches the tx body for the input script bytes verbatim. If
//! it finds them: pallas's encode is byte-clean (bug is downstream
//! — chain transport, Koios, MCP transport, etc). If it doesn't:
//! pallas mutated the bytes during encoding, prints the diff.
//!
//! No chain query, no MCP, no JSON-RPC. Pure local serialization.
use std::env;
use std::fs;
use pallas_addresses::Address;
use pallas_txbuilder::{BuildConway, Input, Output as TxOutput, ScriptKind, StagingTransaction};
use pallas_crypto::hash::Hash;
fn hex_to_bytes(s: &str) -> Vec<u8> {
let s = s.trim();
let mut v = Vec::with_capacity(s.len() / 2);
let bytes = s.as_bytes();
let mut i = 0;
while i + 1 < bytes.len() {
let hi = (bytes[i] as char).to_digit(16).expect("invalid hex hi") as u8;
let lo = (bytes[i + 1] as char).to_digit(16).expect("invalid hex lo") as u8;
v.push((hi << 4) | lo);
i += 2;
}
v
}
fn find_subseq(haystack: &[u8], needle: &[u8]) -> Option<usize> {
if needle.is_empty() || needle.len() > haystack.len() {
return None;
}
haystack
.windows(needle.len())
.position(|w| w == needle)
}
fn main() {
let path = env::var("ALDABRA_REPRO_HEX")
.expect("set ALDABRA_REPRO_HEX to a file containing the script hex");
let hex = fs::read_to_string(&path).expect("read hex file");
let script_bytes = hex_to_bytes(&hex);
println!(
"input script: {} bytes ({} hex chars)",
script_bytes.len(),
hex.trim().len()
);
let dummy_tx_hash: Hash<32> = Hash::new([0u8; 32]);
let input = Input::new(dummy_tx_hash, 0);
// A throwaway preprod testnet enterprise script address (just for
// shape — no funds, no real chain interaction).
let dest_addr = Address::from_bech32(
"addr_test1wptadvtl64h74jmhwuda595j40ss3rgh0p9jam0ejwgz6mcnzvusa",
)
.expect("decode addr");
let mut output = TxOutput::new(dest_addr, 5_000_000);
output = output.set_inline_script(ScriptKind::PlutusV2, script_bytes.clone());
let staging = StagingTransaction::new()
.input(input)
.output(output)
.fee(2_000_000)
.network_id(0);
let built = staging
.build_conway_raw()
.expect("build_conway_raw failed");
let tx_bytes = built.tx_bytes.0;
println!("built tx body: {} bytes", tx_bytes.len());
// Sanity: the script bytes should appear somewhere inside the tx
// body. The output's script_ref encodes as `tag(24) bytes(...)`
// wrapping the inner array `[2, bytes]`. The actual script bytes
// are then nested inside that. Search for them verbatim.
if let Some(pos) = find_subseq(&tx_bytes, &script_bytes) {
println!("✅ FOUND input script bytes verbatim at tx-body offset {}", pos);
println!(" pallas-txbuilder serialized them clean.");
} else {
println!("❌ DID NOT find input script bytes verbatim in tx body.");
println!(" pallas-txbuilder mutated the bytes during encoding.");
// Try to locate the ApproxRegion that contains them. Search
// for the first 64 bytes of input — if THAT prefix is found,
// the bytes start there but corrupt later. If not, the start
// is also mutated.
let prefix = &script_bytes[..64.min(script_bytes.len())];
match find_subseq(&tx_bytes, prefix) {
Some(start) => {
println!(
" Found {} -byte prefix at tx-body offset {} — mutation is later in the bytestring",
prefix.len(),
start
);
let region = &tx_bytes[start..(start + script_bytes.len()).min(tx_bytes.len())];
let mut diffs = 0usize;
let mut first_diff = None;
for (i, (a, b)) in script_bytes.iter().zip(region.iter()).enumerate() {
if a != b {
diffs += 1;
if first_diff.is_none() {
first_diff = Some(i);
}
}
}
println!(
" {} byte-positions differ; first diff at byte {} of script",
diffs,
first_diff.map(|x| x as i32).unwrap_or(-1)
);
}
None => {
println!(" Even the first 64 bytes don't match — corruption starts at offset 0.");
}
}
}
// Also search for the known on-chain corrupt fingerprint: at
// bytes 2390..=2424 the on-chain version has the two 9-byte
// blocks SWAPPED relative to input. Build the swapped version
// and check if THAT appears in the tx body.
if script_bytes.len() >= 2425 {
let mut corrupted = script_bytes.clone();
let block_a = corrupted[2390..2399].to_vec();
let block_b = corrupted[2416..2425].to_vec();
corrupted[2390..2399].copy_from_slice(&block_b);
corrupted[2416..2425].copy_from_slice(&block_a);
if find_subseq(&tx_bytes, &corrupted).is_some() {
println!("⚠️ found CORRUPTED variant (block-swap @ 2390↔2416) in tx body.");
println!(" pallas-txbuilder is producing the same corruption we see on chain.");
} else {
println!(" block-swap variant NOT found in tx body either.");
}
}
}