diff --git a/Cargo.toml b/Cargo.toml index cfa25e5..355ca7e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ members = [ "pallas-traverse", "pallas-txbuilder", "pallas-utxorpc", + "pallas-hardano", "pallas-wallet", "pallas", "examples/block-download", diff --git a/pallas-hardano/Cargo.toml b/pallas-hardano/Cargo.toml new file mode 100644 index 0000000..d97121b --- /dev/null +++ b/pallas-hardano/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "pallas-hardano" +description = "Pallas interoperability with the Haskel Cardano node implementation" +version = "0.20.0" +edition = "2021" +repository = "https://github.com/txpipe/pallas" +homepage = "https://github.com/txpipe/pallas" +documentation = "https://docs.rs/pallas-hardano" +license = "Apache-2.0" +readme = "README.md" +authors = ["Santiago Carmuega "] + +[dependencies] +binary-layout = "3.2.0" +tap = "1.0.1" +tracing = "0.1.40" + +[dev-dependencies] +tracing-subscriber = "0.3.17" +hex = "0.4.3" +pallas-traverse = { version = "0.20.0", path = "../pallas-traverse" } diff --git a/pallas-hardano/src/lib.rs b/pallas-hardano/src/lib.rs new file mode 100644 index 0000000..30f61eb --- /dev/null +++ b/pallas-hardano/src/lib.rs @@ -0,0 +1 @@ +pub mod storage; diff --git a/pallas-hardano/src/storage/immutable/chunk.rs b/pallas-hardano/src/storage/immutable/chunk.rs new file mode 100644 index 0000000..0363da8 --- /dev/null +++ b/pallas-hardano/src/storage/immutable/chunk.rs @@ -0,0 +1,121 @@ +use std::{ + fs::File, + io::{BufReader, Read, Seek}, + path::Path, +}; + +use immutable::secondary; +use tracing::trace; + +use crate::storage::immutable; + +pub type SecondaryIndex = super::secondary::Reader; + +pub struct Reader { + inner: BufReader, + index: SecondaryIndex, + current: Option, + next: Option, +} + +impl Reader { + fn open(mut index: SecondaryIndex, chunks: File) -> Result { + let inner = BufReader::new(chunks); + + let current = match index.next() { + Some(x) => Some(x?), + None => None, + }; + + let next = match index.next() { + Some(x) => Some(x?), + None => None, + }; + + Ok(Self { + inner, + index, + current, + next, + }) + } + + fn read_middle_block( + file: &mut BufReader, + next_offset: u64, + ) -> Result, std::io::Error> { + let start = file.stream_position().unwrap(); + let delta = next_offset - start; + trace!(start, delta, "reading chunk middle block"); + + let mut buf = vec![0u8; delta as usize]; + file.read_exact(&mut buf)?; + + Ok(buf) + } + + fn read_last_block(file: &mut BufReader) -> Result, std::io::Error> { + let start = file.stream_position().unwrap(); + trace!(start, "reading chunk last block"); + + let mut buf = vec![]; + file.read_to_end(&mut buf)?; + + Ok(buf) + } +} + +impl Iterator for Reader { + type Item = Result, std::io::Error>; + + fn next(&mut self) -> Option { + match (self.current.take(), self.next.take()) { + (None, _) => None, + (Some(_), Some(next)) => { + let block = Self::read_middle_block(&mut self.inner, next.block_offset); + + self.current = Some(next); + self.next = self.index.next().map(|x| x.unwrap()); + + Some(block) + } + (Some(_), None) => { + let block = Self::read_last_block(&mut self.inner); + + self.current = None; + self.next = None; + + Some(block) + } + } + } +} + +pub fn read_blocks(dir: &Path, name: &str) -> Result { + let primary = dir.join(name).with_extension("primary"); + let primary = std::fs::File::open(primary)?; + let primary = immutable::primary::Reader::open(primary)?; + + let secondary = dir.join(name).with_extension("secondary"); + let secondary = std::fs::File::open(secondary)?; + let secondary = secondary::Reader::open(primary, secondary)?; + + let chunk = dir.join(name).with_extension("chunk"); + let chunk = std::fs::File::open(chunk)?; + Reader::open(secondary, chunk) +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + #[test] + fn it_can_decode_all_blocks() { + let chunk = super::read_blocks(Path::new("../test_data"), "01285").unwrap(); + + for block in chunk { + let block = block.unwrap(); + pallas_traverse::MultiEraBlock::decode(&block).unwrap(); + } + } +} diff --git a/pallas-hardano/src/storage/immutable/mod.rs b/pallas-hardano/src/storage/immutable/mod.rs new file mode 100644 index 0000000..3432c37 --- /dev/null +++ b/pallas-hardano/src/storage/immutable/mod.rs @@ -0,0 +1,138 @@ +use std::{ + ffi::OsStr, + path::{Path, PathBuf}, +}; + +use tap::Tap; +use tracing::debug; + +pub mod chunk; +pub mod primary; +pub mod secondary; + +fn build_stack_of_chunk_names(dir: &Path) -> Result { + let mut chunks = std::fs::read_dir(dir)? + .map_while(|e| e.ok()) + .filter(|e| { + e.path() + .extension() + .map(|e| e.to_string_lossy() == "chunk") + .unwrap_or_default() + }) + .filter_map(|e| e.path().file_stem().map(OsStr::to_owned)) + .map(|s| s.to_string_lossy().to_string()) + .collect::>(); + + chunks.sort(); + chunks.reverse(); + + Ok(chunks) +} + +pub type Block = Vec; + +pub type ChunkName = String; +pub type ChunkNameSack = Vec; + +pub struct ChunkReaders(PathBuf, ChunkNameSack); + +impl Iterator for ChunkReaders { + type Item = Result; + + fn next(&mut self) -> Option { + self.1 + .pop() + .tap(|name| debug!(name, "switched to new chunk")) + .map(|name| chunk::read_blocks(&self.0, &name)) + } +} + +pub type FallibleBlock = Result; + +pub fn read_blocks(dir: &Path) -> Result, std::io::Error> { + let names = build_stack_of_chunk_names(dir)?; + + let iter = ChunkReaders(dir.to_owned(), names) + .map_while(Result::ok) + .flatten(); + + Ok(iter) +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + use pallas_traverse::MultiEraBlock; + use tracing::trace; + + #[test] + fn can_read_multiple_chunks_from_folder() { + let reader = super::read_blocks(Path::new("../test_data")).unwrap(); + + let mut count = 0; + let mut last_slot = None; + + for block in reader { + let block = block.unwrap(); + let block = MultiEraBlock::decode(&block).unwrap(); + + if let Some(last_slot) = last_slot { + assert!(last_slot < block.slot()); + } + + last_slot = Some(block.slot()); + count += 1; + } + + assert_eq!(count, 1778); + } + + #[test] + #[ignore] + fn can_read_whole_mithril_snapshot() { + tracing::subscriber::set_global_default( + tracing_subscriber::FmtSubscriber::builder() + .with_max_level(tracing::Level::DEBUG) + .finish(), + ) + .unwrap(); + + let path = option_env!("PALLAS_MITHRIL_SNAPSHOT_PATH").unwrap(); + let reader = super::read_blocks(Path::new(path)).unwrap(); + + let mut count = 0; + let mut last_slot = None; + let mut last_height = None; + let mut last_hash = None; + + for block in reader.take_while(Result::is_ok) { + let block = block.unwrap(); + let block = MultiEraBlock::decode(&block).unwrap(); + + trace!("{}", block.hash()); + + if let Some(last_slot) = last_slot { + assert!(last_slot < block.slot()); + } + + if let Some(last_height) = last_height { + assert_eq!(last_height + 1, block.number()); + } + + if let Some(last_hash) = last_hash { + if let Some(expected) = block.header().previous_hash() { + assert_eq!(last_hash, expected) + } + } + + last_slot = Some(block.slot()); + last_height = Some(block.number()); + last_hash = Some(block.hash()); + + count += 1; + } + + assert_eq!(count, 1_563_646); + } +} diff --git a/pallas-hardano/src/storage/immutable/primary.rs b/pallas-hardano/src/storage/immutable/primary.rs new file mode 100644 index 0000000..ed2546a --- /dev/null +++ b/pallas-hardano/src/storage/immutable/primary.rs @@ -0,0 +1,187 @@ +use std::{ + fs::File, + io::{BufReader, Read}, +}; + +use binary_layout::prelude::*; + +// See https://input-output-hk.github.io/ouroboros-consensus/pdfs/report.pdf, section 8.2.2 +define_layout!(layout, BigEndian, { + secondary_offset: u32, +}); + +pub type RelativeSlot = u32; +pub type SecondaryOffset = u32; + +#[derive(Debug)] +pub enum Entry { + Empty(RelativeSlot), + Occupied(RelativeSlot, SecondaryOffset), +} + +impl Entry { + pub fn offset(&self) -> Option { + match self { + Entry::Empty(_) => None, + Entry::Occupied(_, x) => Some(*x), + } + } +} + +pub struct Reader { + inner: BufReader, + version: u8, + last_slot: Option, + last_offset: Option, + next_offset: Option, +} + +impl Reader { + fn read_version(inner: &mut BufReader) -> Result { + let mut buf = vec![0u8; 1]; + inner.read_exact(&mut buf)?; + let version = buf.first().unwrap(); + + Ok(*version) + } + + pub fn open(file: File) -> Result { + let mut inner = BufReader::new(file); + let version = Reader::read_version(&mut inner)?; + + let last_offset = match Self::read_offset(&mut inner) { + Some(offset) => Some(offset?), + None => None, + }; + + let next_offset = match Self::read_offset(&mut inner) { + Some(offset) => Some(offset?), + None => None, + }; + + Ok(Self { + inner, + version, + last_slot: None, + last_offset, + next_offset, + }) + } + + pub fn version(&self) -> u8 { + self.version + } + + pub fn next_occupied(&mut self) -> Option> { + loop { + let next = self.next(); + + match next { + None => break None, + Some(Err(err)) => break Some(Err(err)), + Some(Ok(entry)) => match &entry { + Entry::Occupied(..) => break Some(Ok(entry)), + Entry::Empty(_) => continue, + }, + } + } + } + + fn read_offset(file: &mut BufReader) -> Option> { + let mut buf = vec![0u8; layout::SIZE.unwrap()]; + + match file.read_exact(&mut buf) { + Err(err) if err.kind() == std::io::ErrorKind::UnexpectedEof => None, + Err(err) => Some(Err(err)), + Ok(_) => { + let view = layout::View::new(&buf); + let offset = view.secondary_offset().read(); + Some(Ok(offset)) + } + } + } +} + +impl Iterator for Reader { + type Item = Result; + + fn next(&mut self) -> Option { + match (self.last_offset, self.next_offset) { + (None, _) => None, + (Some(_), None) => None, + (Some(last), Some(next)) => { + let slot = self.last_slot.map(|x| x + 1).unwrap_or_default(); + + let entry = if next > last { + Entry::Occupied(slot, last) + } else { + Entry::Empty(slot) + }; + + self.last_slot = Some(slot); + self.last_offset = Some(next); + self.next_offset = Self::read_offset(&mut self.inner).map(|x| x.unwrap()); + + Some(Ok(entry)) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn can_parse_a_real_world_file() { + let file = std::fs::File::open("../test_data/01836.primary").unwrap(); + let reader = super::Reader::open(file).unwrap(); + + assert_eq!(reader.version(), 1); + + let mut last_slot = None; + let mut last_offset = None; + + for entry in reader { + let entry = entry.unwrap(); + + match entry { + Entry::Occupied(slot, offset) => { + if let Some(last_slot) = last_slot { + assert!(slot > last_slot); + } + + if let Some(last_offset) = last_offset { + assert!(offset > last_offset); + } + + last_slot = Some(slot); + last_offset = Some(offset); + } + Entry::Empty(slot) => { + if let Some(last_slot) = last_slot { + assert!(slot > last_slot); + } + + last_slot = Some(slot); + } + } + } + } + + #[test] + fn yield_occupied_correctly() { + let file = std::fs::File::open("../test_data/01836.primary").unwrap(); + + let mut count = 0; + let mut reader = super::Reader::open(file).unwrap(); + + while let Some(entry) = reader.next_occupied() { + // make sure that it has an offset since it's occupied + entry.unwrap().offset().unwrap(); + count += 1; + } + + assert_eq!(count, 913); + } +} diff --git a/pallas-hardano/src/storage/immutable/secondary.rs b/pallas-hardano/src/storage/immutable/secondary.rs new file mode 100644 index 0000000..11e6760 --- /dev/null +++ b/pallas-hardano/src/storage/immutable/secondary.rs @@ -0,0 +1,129 @@ +use std::{ + fs::File, + io::{BufReader, Read, Seek}, + path::Path, +}; + +pub type PrimaryIndex = super::primary::Reader; + +use binary_layout::prelude::*; + +use crate::storage::immutable::{primary, secondary}; + +// See https://input-output-hk.github.io/ouroboros-consensus/pdfs/report.pdf, section 8.2.2 +define_layout!(layout, BigEndian, { + block_offset: u64, + header_offset: u16, + header_size: u16, + checksum: u32, + header_hash: [u8; 32], + block_or_ebb: [u8; 8], +}); + +#[derive(Debug)] +pub struct Entry { + pub block_offset: u64, + pub header_offset: u16, + pub header_size: u16, + pub checksum: u32, + pub header_hash: [u8; 32], + pub block_or_ebb: [u8; 8], +} + +impl Entry { + fn from(view: layout::View) -> Self + where + S: AsRef<[u8]>, + { + Self { + block_offset: view.block_offset().read(), + header_offset: view.header_offset().read(), + header_size: view.header_size().read(), + checksum: view.checksum().read(), + header_hash: *view.header_hash(), + block_or_ebb: *view.block_or_ebb(), + } + } +} + +pub type SecondaryOffset = u32; + +pub struct Reader { + inner: BufReader, + index: PrimaryIndex, + current: Option, +} + +impl Reader { + pub fn open(mut index: PrimaryIndex, file: File) -> Result { + let inner = BufReader::new(file); + + match index.next_occupied() { + Some(result) => Ok(Self { + inner, + index, + current: result?.offset(), + }), + None => Ok(Self { + inner, + index, + current: None, + }), + } + } +} + +impl Iterator for Reader { + type Item = Result; + + fn next(&mut self) -> Option { + let current = self.current?; + + let start = self.inner.stream_position().unwrap(); + let delta = current as u64 - start; + self.inner.seek_relative(delta as i64).unwrap(); + + let mut buf = vec![0u8; layout::SIZE.unwrap()]; + + match self.inner.read_exact(&mut buf) { + Err(err) => Some(Err(err)), + Ok(_) => { + let view = layout::View::new(&buf); + let entry = Entry::from(view); + + self.current = self + .index + .next_occupied() + .map(|x| x.unwrap()) + .and_then(|x| x.offset()); + + Some(Ok(entry)) + } + } + } +} + +pub fn read_entries(dir: &Path, name: &str) -> Result { + let primary = dir.join(name).with_extension("primary"); + let primary = std::fs::File::open(primary)?; + let primary = primary::Reader::open(primary)?; + + let secondary = dir.join(name).with_extension("secondary"); + let secondary = std::fs::File::open(secondary)?; + + secondary::Reader::open(primary, secondary) +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + #[test] + fn can_parse_all_entries() { + let reader = super::read_entries(Path::new("../test_data"), "01836").unwrap(); + + for entry in reader { + entry.unwrap(); + } + } +} diff --git a/pallas-hardano/src/storage/mod.rs b/pallas-hardano/src/storage/mod.rs new file mode 100644 index 0000000..1c31b5e --- /dev/null +++ b/pallas-hardano/src/storage/mod.rs @@ -0,0 +1,3 @@ +//! Storage compatible with the Haskell Cardano node implementation + +pub mod immutable; diff --git a/pallas/Cargo.toml b/pallas/Cargo.toml index d40b1d0..ab299cd 100644 --- a/pallas/Cargo.toml +++ b/pallas/Cargo.toml @@ -22,7 +22,8 @@ pallas-utxorpc = { version = "=0.20.0", path = "../pallas-utxorpc/" } pallas-configs = { version = "=0.20.0", path = "../pallas-configs/" } pallas-rolldb = { version = "=0.20.0", path = "../pallas-rolldb/", optional = true } pallas-wallet = { version = "=0.20.0", path = "../pallas-wallet/", optional = true } -pallas-txbuilder = { version = "=0.20.0", path = "../pallas-txbuilder/" } +pallas-hardano = { version = "=0.20.0", path = "../pallas-hardano/", optional = true } +pallas-txbuilder = { version = "=0.20.0", path = "../pallas-txbuilder/", optional = true } [features] -unstable = ["pallas-rolldb", "pallas-wallet"] +unstable = ["pallas-rolldb", "pallas-wallet", "pallas-hardano"] diff --git a/pallas/src/lib.rs b/pallas/src/lib.rs index 4980552..345d1b4 100644 --- a/pallas/src/lib.rs +++ b/pallas/src/lib.rs @@ -47,6 +47,10 @@ pub mod storage { #[cfg(feature = "unstable")] #[doc(inline)] pub use pallas_rolldb as rolldb; + + #[cfg(feature = "unstable")] + #[doc(inline)] + pub use pallas_hardano::storage as hardano; } #[doc(inline)] diff --git a/test_data/01285.chunk b/test_data/01285.chunk new file mode 100644 index 0000000..a5a70ae Binary files /dev/null and b/test_data/01285.chunk differ diff --git a/test_data/01285.primary b/test_data/01285.primary new file mode 100644 index 0000000..fa35c80 Binary files /dev/null and b/test_data/01285.primary differ diff --git a/test_data/01285.secondary b/test_data/01285.secondary new file mode 100644 index 0000000..d044324 Binary files /dev/null and b/test_data/01285.secondary differ diff --git a/test_data/01836.chunk b/test_data/01836.chunk new file mode 100644 index 0000000..e9f7851 Binary files /dev/null and b/test_data/01836.chunk differ diff --git a/test_data/01836.primary b/test_data/01836.primary new file mode 100644 index 0000000..b68c86d Binary files /dev/null and b/test_data/01836.primary differ diff --git a/test_data/01836.secondary b/test_data/01836.secondary new file mode 100644 index 0000000..f5b8e97 Binary files /dev/null and b/test_data/01836.secondary differ diff --git a/test_data/02019.chunk b/test_data/02019.chunk new file mode 100644 index 0000000..0c173c2 Binary files /dev/null and b/test_data/02019.chunk differ diff --git a/test_data/02019.primary b/test_data/02019.primary new file mode 100644 index 0000000..633f0cb Binary files /dev/null and b/test_data/02019.primary differ diff --git a/test_data/02019.secondary b/test_data/02019.secondary new file mode 100644 index 0000000..cd38884 Binary files /dev/null and b/test_data/02019.secondary differ