From 340a4ee408e9fce168184c7beff39fcbedc88142 Mon Sep 17 00:00:00 2001 From: Kayos Date: Thu, 7 May 2026 16:09:41 -0700 Subject: [PATCH] diag: standalone reproducer for large-bytestring ref-script corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit cargo run --example repro_script_corruption -p aldabra-dao --release Reads a hex-encoded Plutus V2 script, builds a minimal Conway tx with that script as inline reference, calls build_conway_raw, then searches the tx body for the input bytes verbatim. Also tests the known on-chain block-swap corruption fingerprint (bytes 2390-2398 swapped with bytes 2416-2424) to determine whether pallas reproduces the corruption locally. If verbatim found: pallas is byte-clean, bug is downstream (transport / Koios / chain submit). If swapped variant found: pallas itself produces the corruption. No chain query, no MCP, no JSON-RPC — pure local serialization. --- .../examples/repro_script_corruption.rs | 154 ++++++++++++++++++ 1 file changed, 154 insertions(+) create mode 100644 crates/aldabra-dao/examples/repro_script_corruption.rs diff --git a/crates/aldabra-dao/examples/repro_script_corruption.rs b/crates/aldabra-dao/examples/repro_script_corruption.rs new file mode 100644 index 0000000..2f08db1 --- /dev/null +++ b/crates/aldabra-dao/examples/repro_script_corruption.rs @@ -0,0 +1,154 @@ +//! Standalone reproducer for the large-bytestring reference-script +//! corruption observed in pallas-txbuilder. +//! +//! Usage: +//! ALDABRA_REPRO_HEX=/path/to/governorValidator.rawhex \ +//! cargo run --example repro_script_corruption -p aldabra-dao --release +//! +//! The reproducer: +//! 1. Reads a hex-encoded Plutus V2 script (rawHex) from a file. +//! 2. Builds a minimal Conway tx with one output that carries the +//! script as an inline reference script. +//! 3. Calls `build_conway_raw()` to produce the tx body bytes. +//! 4. Searches the tx body for the input script bytes verbatim. If +//! it finds them: pallas's encode is byte-clean (bug is downstream +//! — chain transport, Koios, MCP transport, etc). If it doesn't: +//! pallas mutated the bytes during encoding, prints the diff. +//! +//! No chain query, no MCP, no JSON-RPC. Pure local serialization. + +use std::env; +use std::fs; + +use pallas_addresses::Address; +use pallas_codec::utils::Bytes; +use pallas_txbuilder::{ + BuildConway, Output as TxOutput, ScriptKind, StagingTransaction, + TransactionInput as Input, +}; +use pallas_crypto::hash::Hash; + +fn hex_to_bytes(s: &str) -> Vec { + let s = s.trim(); + let mut v = Vec::with_capacity(s.len() / 2); + let bytes = s.as_bytes(); + let mut i = 0; + while i + 1 < bytes.len() { + let hi = (bytes[i] as char).to_digit(16).expect("invalid hex hi") as u8; + let lo = (bytes[i + 1] as char).to_digit(16).expect("invalid hex lo") as u8; + v.push((hi << 4) | lo); + i += 2; + } + v +} + +fn find_subseq(haystack: &[u8], needle: &[u8]) -> Option { + if needle.is_empty() || needle.len() > haystack.len() { + return None; + } + haystack + .windows(needle.len()) + .position(|w| w == needle) +} + +fn main() { + let path = env::var("ALDABRA_REPRO_HEX") + .expect("set ALDABRA_REPRO_HEX to a file containing the script hex"); + let hex = fs::read_to_string(&path).expect("read hex file"); + let script_bytes = hex_to_bytes(&hex); + println!( + "input script: {} bytes ({} hex chars)", + script_bytes.len(), + hex.trim().len() + ); + + let dummy_tx_hash: Hash<32> = Hash::new([0u8; 32]); + let input = Input::new(dummy_tx_hash, 0); + + // A throwaway preprod testnet enterprise script address (just for + // shape — no funds, no real chain interaction). + let dest_addr = Address::from_bech32( + "addr_test1wptadvtl64h74jmhwuda595j40ss3rgh0p9jam0ejwgz6mcnzvusa", + ) + .expect("decode addr"); + + let mut output = TxOutput::new(dest_addr, 5_000_000); + output = output.set_inline_script(ScriptKind::PlutusV2, script_bytes.clone()); + + let staging = StagingTransaction::new() + .input(input) + .output(output) + .fee(2_000_000) + .network_id(0); + + let built = staging + .build_conway_raw() + .expect("build_conway_raw failed"); + + let tx_bytes = built.tx_bytes.0; + println!("built tx body: {} bytes", tx_bytes.len()); + + // Sanity: the script bytes should appear somewhere inside the tx + // body. The output's script_ref encodes as `tag(24) bytes(...)` + // wrapping the inner array `[2, bytes]`. The actual script bytes + // are then nested inside that. Search for them verbatim. + if let Some(pos) = find_subseq(&tx_bytes, &script_bytes) { + println!("✅ FOUND input script bytes verbatim at tx-body offset {}", pos); + println!(" pallas-txbuilder serialized them clean."); + } else { + println!("❌ DID NOT find input script bytes verbatim in tx body."); + println!(" pallas-txbuilder mutated the bytes during encoding."); + // Try to locate the ApproxRegion that contains them. Search + // for the first 64 bytes of input — if THAT prefix is found, + // the bytes start there but corrupt later. If not, the start + // is also mutated. + let prefix = &script_bytes[..64.min(script_bytes.len())]; + match find_subseq(&tx_bytes, prefix) { + Some(start) => { + println!( + " Found {} -byte prefix at tx-body offset {} — mutation is later in the bytestring", + prefix.len(), + start + ); + let region = &tx_bytes[start..(start + script_bytes.len()).min(tx_bytes.len())]; + let mut diffs = 0usize; + let mut first_diff = None; + for (i, (a, b)) in script_bytes.iter().zip(region.iter()).enumerate() { + if a != b { + diffs += 1; + if first_diff.is_none() { + first_diff = Some(i); + } + } + } + println!( + " {} byte-positions differ; first diff at byte {} of script", + diffs, + first_diff.map(|x| x as i32).unwrap_or(-1) + ); + } + None => { + println!(" Even the first 64 bytes don't match — corruption starts at offset 0."); + } + } + } + + // Also search for the known on-chain corrupt fingerprint: at + // bytes 2390..=2424 the on-chain version has the two 9-byte + // blocks SWAPPED relative to input. Build the swapped version + // and check if THAT appears in the tx body. + if script_bytes.len() >= 2425 { + let mut corrupted = script_bytes.clone(); + let block_a = corrupted[2390..2399].to_vec(); + let block_b = corrupted[2416..2425].to_vec(); + corrupted[2390..2399].copy_from_slice(&block_b); + corrupted[2416..2425].copy_from_slice(&block_a); + + if find_subseq(&tx_bytes, &corrupted).is_some() { + println!("⚠️ found CORRUPTED variant (block-swap @ 2390↔2416) in tx body."); + println!(" pallas-txbuilder is producing the same corruption we see on chain."); + } else { + println!(" block-swap variant NOT found in tx body either."); + } + } +}