feat(hardano): implement immutable db chunk parsing (#328)

This commit is contained in:
Santiago Carmuega 2023-12-10 11:28:27 -03:00 committed by GitHub
parent fcbaf4ed97
commit fa913e4799
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
19 changed files with 608 additions and 2 deletions

View file

@ -12,6 +12,7 @@ members = [
"pallas-traverse", "pallas-traverse",
"pallas-txbuilder", "pallas-txbuilder",
"pallas-utxorpc", "pallas-utxorpc",
"pallas-hardano",
"pallas-wallet", "pallas-wallet",
"pallas", "pallas",
"examples/block-download", "examples/block-download",

21
pallas-hardano/Cargo.toml Normal file
View file

@ -0,0 +1,21 @@
[package]
name = "pallas-hardano"
description = "Pallas interoperability with the Haskel Cardano node implementation"
version = "0.20.0"
edition = "2021"
repository = "https://github.com/txpipe/pallas"
homepage = "https://github.com/txpipe/pallas"
documentation = "https://docs.rs/pallas-hardano"
license = "Apache-2.0"
readme = "README.md"
authors = ["Santiago Carmuega <santiago@carmuega.me>"]
[dependencies]
binary-layout = "3.2.0"
tap = "1.0.1"
tracing = "0.1.40"
[dev-dependencies]
tracing-subscriber = "0.3.17"
hex = "0.4.3"
pallas-traverse = { version = "0.20.0", path = "../pallas-traverse" }

View file

@ -0,0 +1 @@
pub mod storage;

View file

@ -0,0 +1,121 @@
use std::{
fs::File,
io::{BufReader, Read, Seek},
path::Path,
};
use immutable::secondary;
use tracing::trace;
use crate::storage::immutable;
pub type SecondaryIndex = super::secondary::Reader;
pub struct Reader {
inner: BufReader<File>,
index: SecondaryIndex,
current: Option<super::secondary::Entry>,
next: Option<super::secondary::Entry>,
}
impl Reader {
fn open(mut index: SecondaryIndex, chunks: File) -> Result<Self, std::io::Error> {
let inner = BufReader::new(chunks);
let current = match index.next() {
Some(x) => Some(x?),
None => None,
};
let next = match index.next() {
Some(x) => Some(x?),
None => None,
};
Ok(Self {
inner,
index,
current,
next,
})
}
fn read_middle_block(
file: &mut BufReader<File>,
next_offset: u64,
) -> Result<Vec<u8>, std::io::Error> {
let start = file.stream_position().unwrap();
let delta = next_offset - start;
trace!(start, delta, "reading chunk middle block");
let mut buf = vec![0u8; delta as usize];
file.read_exact(&mut buf)?;
Ok(buf)
}
fn read_last_block(file: &mut BufReader<File>) -> Result<Vec<u8>, std::io::Error> {
let start = file.stream_position().unwrap();
trace!(start, "reading chunk last block");
let mut buf = vec![];
file.read_to_end(&mut buf)?;
Ok(buf)
}
}
impl Iterator for Reader {
type Item = Result<Vec<u8>, std::io::Error>;
fn next(&mut self) -> Option<Self::Item> {
match (self.current.take(), self.next.take()) {
(None, _) => None,
(Some(_), Some(next)) => {
let block = Self::read_middle_block(&mut self.inner, next.block_offset);
self.current = Some(next);
self.next = self.index.next().map(|x| x.unwrap());
Some(block)
}
(Some(_), None) => {
let block = Self::read_last_block(&mut self.inner);
self.current = None;
self.next = None;
Some(block)
}
}
}
}
pub fn read_blocks(dir: &Path, name: &str) -> Result<Reader, std::io::Error> {
let primary = dir.join(name).with_extension("primary");
let primary = std::fs::File::open(primary)?;
let primary = immutable::primary::Reader::open(primary)?;
let secondary = dir.join(name).with_extension("secondary");
let secondary = std::fs::File::open(secondary)?;
let secondary = secondary::Reader::open(primary, secondary)?;
let chunk = dir.join(name).with_extension("chunk");
let chunk = std::fs::File::open(chunk)?;
Reader::open(secondary, chunk)
}
#[cfg(test)]
mod tests {
use std::path::Path;
#[test]
fn it_can_decode_all_blocks() {
let chunk = super::read_blocks(Path::new("../test_data"), "01285").unwrap();
for block in chunk {
let block = block.unwrap();
pallas_traverse::MultiEraBlock::decode(&block).unwrap();
}
}
}

View file

@ -0,0 +1,138 @@
use std::{
ffi::OsStr,
path::{Path, PathBuf},
};
use tap::Tap;
use tracing::debug;
pub mod chunk;
pub mod primary;
pub mod secondary;
fn build_stack_of_chunk_names(dir: &Path) -> Result<ChunkNameSack, std::io::Error> {
let mut chunks = std::fs::read_dir(dir)?
.map_while(|e| e.ok())
.filter(|e| {
e.path()
.extension()
.map(|e| e.to_string_lossy() == "chunk")
.unwrap_or_default()
})
.filter_map(|e| e.path().file_stem().map(OsStr::to_owned))
.map(|s| s.to_string_lossy().to_string())
.collect::<Vec<_>>();
chunks.sort();
chunks.reverse();
Ok(chunks)
}
pub type Block = Vec<u8>;
pub type ChunkName = String;
pub type ChunkNameSack = Vec<ChunkName>;
pub struct ChunkReaders(PathBuf, ChunkNameSack);
impl Iterator for ChunkReaders {
type Item = Result<chunk::Reader, std::io::Error>;
fn next(&mut self) -> Option<Self::Item> {
self.1
.pop()
.tap(|name| debug!(name, "switched to new chunk"))
.map(|name| chunk::read_blocks(&self.0, &name))
}
}
pub type FallibleBlock = Result<Block, std::io::Error>;
pub fn read_blocks(dir: &Path) -> Result<impl Iterator<Item = FallibleBlock>, std::io::Error> {
let names = build_stack_of_chunk_names(dir)?;
let iter = ChunkReaders(dir.to_owned(), names)
.map_while(Result::ok)
.flatten();
Ok(iter)
}
#[cfg(test)]
mod tests {
use std::path::Path;
use pallas_traverse::MultiEraBlock;
use tracing::trace;
#[test]
fn can_read_multiple_chunks_from_folder() {
let reader = super::read_blocks(Path::new("../test_data")).unwrap();
let mut count = 0;
let mut last_slot = None;
for block in reader {
let block = block.unwrap();
let block = MultiEraBlock::decode(&block).unwrap();
if let Some(last_slot) = last_slot {
assert!(last_slot < block.slot());
}
last_slot = Some(block.slot());
count += 1;
}
assert_eq!(count, 1778);
}
#[test]
#[ignore]
fn can_read_whole_mithril_snapshot() {
tracing::subscriber::set_global_default(
tracing_subscriber::FmtSubscriber::builder()
.with_max_level(tracing::Level::DEBUG)
.finish(),
)
.unwrap();
let path = option_env!("PALLAS_MITHRIL_SNAPSHOT_PATH").unwrap();
let reader = super::read_blocks(Path::new(path)).unwrap();
let mut count = 0;
let mut last_slot = None;
let mut last_height = None;
let mut last_hash = None;
for block in reader.take_while(Result::is_ok) {
let block = block.unwrap();
let block = MultiEraBlock::decode(&block).unwrap();
trace!("{}", block.hash());
if let Some(last_slot) = last_slot {
assert!(last_slot < block.slot());
}
if let Some(last_height) = last_height {
assert_eq!(last_height + 1, block.number());
}
if let Some(last_hash) = last_hash {
if let Some(expected) = block.header().previous_hash() {
assert_eq!(last_hash, expected)
}
}
last_slot = Some(block.slot());
last_height = Some(block.number());
last_hash = Some(block.hash());
count += 1;
}
assert_eq!(count, 1_563_646);
}
}

View file

@ -0,0 +1,187 @@
use std::{
fs::File,
io::{BufReader, Read},
};
use binary_layout::prelude::*;
// See https://input-output-hk.github.io/ouroboros-consensus/pdfs/report.pdf, section 8.2.2
define_layout!(layout, BigEndian, {
secondary_offset: u32,
});
pub type RelativeSlot = u32;
pub type SecondaryOffset = u32;
#[derive(Debug)]
pub enum Entry {
Empty(RelativeSlot),
Occupied(RelativeSlot, SecondaryOffset),
}
impl Entry {
pub fn offset(&self) -> Option<u32> {
match self {
Entry::Empty(_) => None,
Entry::Occupied(_, x) => Some(*x),
}
}
}
pub struct Reader {
inner: BufReader<File>,
version: u8,
last_slot: Option<RelativeSlot>,
last_offset: Option<SecondaryOffset>,
next_offset: Option<SecondaryOffset>,
}
impl Reader {
fn read_version(inner: &mut BufReader<File>) -> Result<u8, std::io::Error> {
let mut buf = vec![0u8; 1];
inner.read_exact(&mut buf)?;
let version = buf.first().unwrap();
Ok(*version)
}
pub fn open(file: File) -> Result<Self, std::io::Error> {
let mut inner = BufReader::new(file);
let version = Reader::read_version(&mut inner)?;
let last_offset = match Self::read_offset(&mut inner) {
Some(offset) => Some(offset?),
None => None,
};
let next_offset = match Self::read_offset(&mut inner) {
Some(offset) => Some(offset?),
None => None,
};
Ok(Self {
inner,
version,
last_slot: None,
last_offset,
next_offset,
})
}
pub fn version(&self) -> u8 {
self.version
}
pub fn next_occupied(&mut self) -> Option<Result<Entry, std::io::Error>> {
loop {
let next = self.next();
match next {
None => break None,
Some(Err(err)) => break Some(Err(err)),
Some(Ok(entry)) => match &entry {
Entry::Occupied(..) => break Some(Ok(entry)),
Entry::Empty(_) => continue,
},
}
}
}
fn read_offset(file: &mut BufReader<File>) -> Option<Result<SecondaryOffset, std::io::Error>> {
let mut buf = vec![0u8; layout::SIZE.unwrap()];
match file.read_exact(&mut buf) {
Err(err) if err.kind() == std::io::ErrorKind::UnexpectedEof => None,
Err(err) => Some(Err(err)),
Ok(_) => {
let view = layout::View::new(&buf);
let offset = view.secondary_offset().read();
Some(Ok(offset))
}
}
}
}
impl Iterator for Reader {
type Item = Result<Entry, std::io::Error>;
fn next(&mut self) -> Option<Self::Item> {
match (self.last_offset, self.next_offset) {
(None, _) => None,
(Some(_), None) => None,
(Some(last), Some(next)) => {
let slot = self.last_slot.map(|x| x + 1).unwrap_or_default();
let entry = if next > last {
Entry::Occupied(slot, last)
} else {
Entry::Empty(slot)
};
self.last_slot = Some(slot);
self.last_offset = Some(next);
self.next_offset = Self::read_offset(&mut self.inner).map(|x| x.unwrap());
Some(Ok(entry))
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn can_parse_a_real_world_file() {
let file = std::fs::File::open("../test_data/01836.primary").unwrap();
let reader = super::Reader::open(file).unwrap();
assert_eq!(reader.version(), 1);
let mut last_slot = None;
let mut last_offset = None;
for entry in reader {
let entry = entry.unwrap();
match entry {
Entry::Occupied(slot, offset) => {
if let Some(last_slot) = last_slot {
assert!(slot > last_slot);
}
if let Some(last_offset) = last_offset {
assert!(offset > last_offset);
}
last_slot = Some(slot);
last_offset = Some(offset);
}
Entry::Empty(slot) => {
if let Some(last_slot) = last_slot {
assert!(slot > last_slot);
}
last_slot = Some(slot);
}
}
}
}
#[test]
fn yield_occupied_correctly() {
let file = std::fs::File::open("../test_data/01836.primary").unwrap();
let mut count = 0;
let mut reader = super::Reader::open(file).unwrap();
while let Some(entry) = reader.next_occupied() {
// make sure that it has an offset since it's occupied
entry.unwrap().offset().unwrap();
count += 1;
}
assert_eq!(count, 913);
}
}

View file

@ -0,0 +1,129 @@
use std::{
fs::File,
io::{BufReader, Read, Seek},
path::Path,
};
pub type PrimaryIndex = super::primary::Reader;
use binary_layout::prelude::*;
use crate::storage::immutable::{primary, secondary};
// See https://input-output-hk.github.io/ouroboros-consensus/pdfs/report.pdf, section 8.2.2
define_layout!(layout, BigEndian, {
block_offset: u64,
header_offset: u16,
header_size: u16,
checksum: u32,
header_hash: [u8; 32],
block_or_ebb: [u8; 8],
});
#[derive(Debug)]
pub struct Entry {
pub block_offset: u64,
pub header_offset: u16,
pub header_size: u16,
pub checksum: u32,
pub header_hash: [u8; 32],
pub block_or_ebb: [u8; 8],
}
impl Entry {
fn from<S>(view: layout::View<S>) -> Self
where
S: AsRef<[u8]>,
{
Self {
block_offset: view.block_offset().read(),
header_offset: view.header_offset().read(),
header_size: view.header_size().read(),
checksum: view.checksum().read(),
header_hash: *view.header_hash(),
block_or_ebb: *view.block_or_ebb(),
}
}
}
pub type SecondaryOffset = u32;
pub struct Reader {
inner: BufReader<File>,
index: PrimaryIndex,
current: Option<SecondaryOffset>,
}
impl Reader {
pub fn open(mut index: PrimaryIndex, file: File) -> Result<Self, std::io::Error> {
let inner = BufReader::new(file);
match index.next_occupied() {
Some(result) => Ok(Self {
inner,
index,
current: result?.offset(),
}),
None => Ok(Self {
inner,
index,
current: None,
}),
}
}
}
impl Iterator for Reader {
type Item = Result<Entry, std::io::Error>;
fn next(&mut self) -> Option<Self::Item> {
let current = self.current?;
let start = self.inner.stream_position().unwrap();
let delta = current as u64 - start;
self.inner.seek_relative(delta as i64).unwrap();
let mut buf = vec![0u8; layout::SIZE.unwrap()];
match self.inner.read_exact(&mut buf) {
Err(err) => Some(Err(err)),
Ok(_) => {
let view = layout::View::new(&buf);
let entry = Entry::from(view);
self.current = self
.index
.next_occupied()
.map(|x| x.unwrap())
.and_then(|x| x.offset());
Some(Ok(entry))
}
}
}
}
pub fn read_entries(dir: &Path, name: &str) -> Result<Reader, std::io::Error> {
let primary = dir.join(name).with_extension("primary");
let primary = std::fs::File::open(primary)?;
let primary = primary::Reader::open(primary)?;
let secondary = dir.join(name).with_extension("secondary");
let secondary = std::fs::File::open(secondary)?;
secondary::Reader::open(primary, secondary)
}
#[cfg(test)]
mod tests {
use std::path::Path;
#[test]
fn can_parse_all_entries() {
let reader = super::read_entries(Path::new("../test_data"), "01836").unwrap();
for entry in reader {
entry.unwrap();
}
}
}

View file

@ -0,0 +1,3 @@
//! Storage compatible with the Haskell Cardano node implementation
pub mod immutable;

View file

@ -22,7 +22,8 @@ pallas-utxorpc = { version = "=0.20.0", path = "../pallas-utxorpc/" }
pallas-configs = { version = "=0.20.0", path = "../pallas-configs/" } pallas-configs = { version = "=0.20.0", path = "../pallas-configs/" }
pallas-rolldb = { version = "=0.20.0", path = "../pallas-rolldb/", optional = true } pallas-rolldb = { version = "=0.20.0", path = "../pallas-rolldb/", optional = true }
pallas-wallet = { version = "=0.20.0", path = "../pallas-wallet/", optional = true } pallas-wallet = { version = "=0.20.0", path = "../pallas-wallet/", optional = true }
pallas-txbuilder = { version = "=0.20.0", path = "../pallas-txbuilder/" } pallas-hardano = { version = "=0.20.0", path = "../pallas-hardano/", optional = true }
pallas-txbuilder = { version = "=0.20.0", path = "../pallas-txbuilder/", optional = true }
[features] [features]
unstable = ["pallas-rolldb", "pallas-wallet"] unstable = ["pallas-rolldb", "pallas-wallet", "pallas-hardano"]

View file

@ -47,6 +47,10 @@ pub mod storage {
#[cfg(feature = "unstable")] #[cfg(feature = "unstable")]
#[doc(inline)] #[doc(inline)]
pub use pallas_rolldb as rolldb; pub use pallas_rolldb as rolldb;
#[cfg(feature = "unstable")]
#[doc(inline)]
pub use pallas_hardano::storage as hardano;
} }
#[doc(inline)] #[doc(inline)]

BIN
test_data/01285.chunk Normal file

Binary file not shown.

BIN
test_data/01285.primary Normal file

Binary file not shown.

BIN
test_data/01285.secondary Normal file

Binary file not shown.

BIN
test_data/01836.chunk Normal file

Binary file not shown.

BIN
test_data/01836.primary Normal file

Binary file not shown.

BIN
test_data/01836.secondary Normal file

Binary file not shown.

BIN
test_data/02019.chunk Normal file

Binary file not shown.

BIN
test_data/02019.primary Normal file

Binary file not shown.

BIN
test_data/02019.secondary Normal file

Binary file not shown.