feat(hardano): implement immutable db chunk parsing (#328)
This commit is contained in:
parent
fcbaf4ed97
commit
fa913e4799
19 changed files with 608 additions and 2 deletions
21
pallas-hardano/Cargo.toml
Normal file
21
pallas-hardano/Cargo.toml
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
[package]
|
||||
name = "pallas-hardano"
|
||||
description = "Pallas interoperability with the Haskel Cardano node implementation"
|
||||
version = "0.20.0"
|
||||
edition = "2021"
|
||||
repository = "https://github.com/txpipe/pallas"
|
||||
homepage = "https://github.com/txpipe/pallas"
|
||||
documentation = "https://docs.rs/pallas-hardano"
|
||||
license = "Apache-2.0"
|
||||
readme = "README.md"
|
||||
authors = ["Santiago Carmuega <santiago@carmuega.me>"]
|
||||
|
||||
[dependencies]
|
||||
binary-layout = "3.2.0"
|
||||
tap = "1.0.1"
|
||||
tracing = "0.1.40"
|
||||
|
||||
[dev-dependencies]
|
||||
tracing-subscriber = "0.3.17"
|
||||
hex = "0.4.3"
|
||||
pallas-traverse = { version = "0.20.0", path = "../pallas-traverse" }
|
||||
1
pallas-hardano/src/lib.rs
Normal file
1
pallas-hardano/src/lib.rs
Normal file
|
|
@ -0,0 +1 @@
|
|||
pub mod storage;
|
||||
121
pallas-hardano/src/storage/immutable/chunk.rs
Normal file
121
pallas-hardano/src/storage/immutable/chunk.rs
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
use std::{
|
||||
fs::File,
|
||||
io::{BufReader, Read, Seek},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
use immutable::secondary;
|
||||
use tracing::trace;
|
||||
|
||||
use crate::storage::immutable;
|
||||
|
||||
pub type SecondaryIndex = super::secondary::Reader;
|
||||
|
||||
pub struct Reader {
|
||||
inner: BufReader<File>,
|
||||
index: SecondaryIndex,
|
||||
current: Option<super::secondary::Entry>,
|
||||
next: Option<super::secondary::Entry>,
|
||||
}
|
||||
|
||||
impl Reader {
|
||||
fn open(mut index: SecondaryIndex, chunks: File) -> Result<Self, std::io::Error> {
|
||||
let inner = BufReader::new(chunks);
|
||||
|
||||
let current = match index.next() {
|
||||
Some(x) => Some(x?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let next = match index.next() {
|
||||
Some(x) => Some(x?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
inner,
|
||||
index,
|
||||
current,
|
||||
next,
|
||||
})
|
||||
}
|
||||
|
||||
fn read_middle_block(
|
||||
file: &mut BufReader<File>,
|
||||
next_offset: u64,
|
||||
) -> Result<Vec<u8>, std::io::Error> {
|
||||
let start = file.stream_position().unwrap();
|
||||
let delta = next_offset - start;
|
||||
trace!(start, delta, "reading chunk middle block");
|
||||
|
||||
let mut buf = vec![0u8; delta as usize];
|
||||
file.read_exact(&mut buf)?;
|
||||
|
||||
Ok(buf)
|
||||
}
|
||||
|
||||
fn read_last_block(file: &mut BufReader<File>) -> Result<Vec<u8>, std::io::Error> {
|
||||
let start = file.stream_position().unwrap();
|
||||
trace!(start, "reading chunk last block");
|
||||
|
||||
let mut buf = vec![];
|
||||
file.read_to_end(&mut buf)?;
|
||||
|
||||
Ok(buf)
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Reader {
|
||||
type Item = Result<Vec<u8>, std::io::Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match (self.current.take(), self.next.take()) {
|
||||
(None, _) => None,
|
||||
(Some(_), Some(next)) => {
|
||||
let block = Self::read_middle_block(&mut self.inner, next.block_offset);
|
||||
|
||||
self.current = Some(next);
|
||||
self.next = self.index.next().map(|x| x.unwrap());
|
||||
|
||||
Some(block)
|
||||
}
|
||||
(Some(_), None) => {
|
||||
let block = Self::read_last_block(&mut self.inner);
|
||||
|
||||
self.current = None;
|
||||
self.next = None;
|
||||
|
||||
Some(block)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_blocks(dir: &Path, name: &str) -> Result<Reader, std::io::Error> {
|
||||
let primary = dir.join(name).with_extension("primary");
|
||||
let primary = std::fs::File::open(primary)?;
|
||||
let primary = immutable::primary::Reader::open(primary)?;
|
||||
|
||||
let secondary = dir.join(name).with_extension("secondary");
|
||||
let secondary = std::fs::File::open(secondary)?;
|
||||
let secondary = secondary::Reader::open(primary, secondary)?;
|
||||
|
||||
let chunk = dir.join(name).with_extension("chunk");
|
||||
let chunk = std::fs::File::open(chunk)?;
|
||||
Reader::open(secondary, chunk)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
#[test]
|
||||
fn it_can_decode_all_blocks() {
|
||||
let chunk = super::read_blocks(Path::new("../test_data"), "01285").unwrap();
|
||||
|
||||
for block in chunk {
|
||||
let block = block.unwrap();
|
||||
pallas_traverse::MultiEraBlock::decode(&block).unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
138
pallas-hardano/src/storage/immutable/mod.rs
Normal file
138
pallas-hardano/src/storage/immutable/mod.rs
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
use std::{
|
||||
ffi::OsStr,
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use tap::Tap;
|
||||
use tracing::debug;
|
||||
|
||||
pub mod chunk;
|
||||
pub mod primary;
|
||||
pub mod secondary;
|
||||
|
||||
fn build_stack_of_chunk_names(dir: &Path) -> Result<ChunkNameSack, std::io::Error> {
|
||||
let mut chunks = std::fs::read_dir(dir)?
|
||||
.map_while(|e| e.ok())
|
||||
.filter(|e| {
|
||||
e.path()
|
||||
.extension()
|
||||
.map(|e| e.to_string_lossy() == "chunk")
|
||||
.unwrap_or_default()
|
||||
})
|
||||
.filter_map(|e| e.path().file_stem().map(OsStr::to_owned))
|
||||
.map(|s| s.to_string_lossy().to_string())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
chunks.sort();
|
||||
chunks.reverse();
|
||||
|
||||
Ok(chunks)
|
||||
}
|
||||
|
||||
pub type Block = Vec<u8>;
|
||||
|
||||
pub type ChunkName = String;
|
||||
pub type ChunkNameSack = Vec<ChunkName>;
|
||||
|
||||
pub struct ChunkReaders(PathBuf, ChunkNameSack);
|
||||
|
||||
impl Iterator for ChunkReaders {
|
||||
type Item = Result<chunk::Reader, std::io::Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.1
|
||||
.pop()
|
||||
.tap(|name| debug!(name, "switched to new chunk"))
|
||||
.map(|name| chunk::read_blocks(&self.0, &name))
|
||||
}
|
||||
}
|
||||
|
||||
pub type FallibleBlock = Result<Block, std::io::Error>;
|
||||
|
||||
pub fn read_blocks(dir: &Path) -> Result<impl Iterator<Item = FallibleBlock>, std::io::Error> {
|
||||
let names = build_stack_of_chunk_names(dir)?;
|
||||
|
||||
let iter = ChunkReaders(dir.to_owned(), names)
|
||||
.map_while(Result::ok)
|
||||
.flatten();
|
||||
|
||||
Ok(iter)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
use pallas_traverse::MultiEraBlock;
|
||||
use tracing::trace;
|
||||
|
||||
#[test]
|
||||
fn can_read_multiple_chunks_from_folder() {
|
||||
let reader = super::read_blocks(Path::new("../test_data")).unwrap();
|
||||
|
||||
let mut count = 0;
|
||||
let mut last_slot = None;
|
||||
|
||||
for block in reader {
|
||||
let block = block.unwrap();
|
||||
let block = MultiEraBlock::decode(&block).unwrap();
|
||||
|
||||
if let Some(last_slot) = last_slot {
|
||||
assert!(last_slot < block.slot());
|
||||
}
|
||||
|
||||
last_slot = Some(block.slot());
|
||||
count += 1;
|
||||
}
|
||||
|
||||
assert_eq!(count, 1778);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn can_read_whole_mithril_snapshot() {
|
||||
tracing::subscriber::set_global_default(
|
||||
tracing_subscriber::FmtSubscriber::builder()
|
||||
.with_max_level(tracing::Level::DEBUG)
|
||||
.finish(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let path = option_env!("PALLAS_MITHRIL_SNAPSHOT_PATH").unwrap();
|
||||
let reader = super::read_blocks(Path::new(path)).unwrap();
|
||||
|
||||
let mut count = 0;
|
||||
let mut last_slot = None;
|
||||
let mut last_height = None;
|
||||
let mut last_hash = None;
|
||||
|
||||
for block in reader.take_while(Result::is_ok) {
|
||||
let block = block.unwrap();
|
||||
let block = MultiEraBlock::decode(&block).unwrap();
|
||||
|
||||
trace!("{}", block.hash());
|
||||
|
||||
if let Some(last_slot) = last_slot {
|
||||
assert!(last_slot < block.slot());
|
||||
}
|
||||
|
||||
if let Some(last_height) = last_height {
|
||||
assert_eq!(last_height + 1, block.number());
|
||||
}
|
||||
|
||||
if let Some(last_hash) = last_hash {
|
||||
if let Some(expected) = block.header().previous_hash() {
|
||||
assert_eq!(last_hash, expected)
|
||||
}
|
||||
}
|
||||
|
||||
last_slot = Some(block.slot());
|
||||
last_height = Some(block.number());
|
||||
last_hash = Some(block.hash());
|
||||
|
||||
count += 1;
|
||||
}
|
||||
|
||||
assert_eq!(count, 1_563_646);
|
||||
}
|
||||
}
|
||||
187
pallas-hardano/src/storage/immutable/primary.rs
Normal file
187
pallas-hardano/src/storage/immutable/primary.rs
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
use std::{
|
||||
fs::File,
|
||||
io::{BufReader, Read},
|
||||
};
|
||||
|
||||
use binary_layout::prelude::*;
|
||||
|
||||
// See https://input-output-hk.github.io/ouroboros-consensus/pdfs/report.pdf, section 8.2.2
|
||||
define_layout!(layout, BigEndian, {
|
||||
secondary_offset: u32,
|
||||
});
|
||||
|
||||
pub type RelativeSlot = u32;
|
||||
pub type SecondaryOffset = u32;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Entry {
|
||||
Empty(RelativeSlot),
|
||||
Occupied(RelativeSlot, SecondaryOffset),
|
||||
}
|
||||
|
||||
impl Entry {
|
||||
pub fn offset(&self) -> Option<u32> {
|
||||
match self {
|
||||
Entry::Empty(_) => None,
|
||||
Entry::Occupied(_, x) => Some(*x),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Reader {
|
||||
inner: BufReader<File>,
|
||||
version: u8,
|
||||
last_slot: Option<RelativeSlot>,
|
||||
last_offset: Option<SecondaryOffset>,
|
||||
next_offset: Option<SecondaryOffset>,
|
||||
}
|
||||
|
||||
impl Reader {
|
||||
fn read_version(inner: &mut BufReader<File>) -> Result<u8, std::io::Error> {
|
||||
let mut buf = vec![0u8; 1];
|
||||
inner.read_exact(&mut buf)?;
|
||||
let version = buf.first().unwrap();
|
||||
|
||||
Ok(*version)
|
||||
}
|
||||
|
||||
pub fn open(file: File) -> Result<Self, std::io::Error> {
|
||||
let mut inner = BufReader::new(file);
|
||||
let version = Reader::read_version(&mut inner)?;
|
||||
|
||||
let last_offset = match Self::read_offset(&mut inner) {
|
||||
Some(offset) => Some(offset?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let next_offset = match Self::read_offset(&mut inner) {
|
||||
Some(offset) => Some(offset?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
Ok(Self {
|
||||
inner,
|
||||
version,
|
||||
last_slot: None,
|
||||
last_offset,
|
||||
next_offset,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn version(&self) -> u8 {
|
||||
self.version
|
||||
}
|
||||
|
||||
pub fn next_occupied(&mut self) -> Option<Result<Entry, std::io::Error>> {
|
||||
loop {
|
||||
let next = self.next();
|
||||
|
||||
match next {
|
||||
None => break None,
|
||||
Some(Err(err)) => break Some(Err(err)),
|
||||
Some(Ok(entry)) => match &entry {
|
||||
Entry::Occupied(..) => break Some(Ok(entry)),
|
||||
Entry::Empty(_) => continue,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn read_offset(file: &mut BufReader<File>) -> Option<Result<SecondaryOffset, std::io::Error>> {
|
||||
let mut buf = vec![0u8; layout::SIZE.unwrap()];
|
||||
|
||||
match file.read_exact(&mut buf) {
|
||||
Err(err) if err.kind() == std::io::ErrorKind::UnexpectedEof => None,
|
||||
Err(err) => Some(Err(err)),
|
||||
Ok(_) => {
|
||||
let view = layout::View::new(&buf);
|
||||
let offset = view.secondary_offset().read();
|
||||
Some(Ok(offset))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Reader {
|
||||
type Item = Result<Entry, std::io::Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match (self.last_offset, self.next_offset) {
|
||||
(None, _) => None,
|
||||
(Some(_), None) => None,
|
||||
(Some(last), Some(next)) => {
|
||||
let slot = self.last_slot.map(|x| x + 1).unwrap_or_default();
|
||||
|
||||
let entry = if next > last {
|
||||
Entry::Occupied(slot, last)
|
||||
} else {
|
||||
Entry::Empty(slot)
|
||||
};
|
||||
|
||||
self.last_slot = Some(slot);
|
||||
self.last_offset = Some(next);
|
||||
self.next_offset = Self::read_offset(&mut self.inner).map(|x| x.unwrap());
|
||||
|
||||
Some(Ok(entry))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn can_parse_a_real_world_file() {
|
||||
let file = std::fs::File::open("../test_data/01836.primary").unwrap();
|
||||
let reader = super::Reader::open(file).unwrap();
|
||||
|
||||
assert_eq!(reader.version(), 1);
|
||||
|
||||
let mut last_slot = None;
|
||||
let mut last_offset = None;
|
||||
|
||||
for entry in reader {
|
||||
let entry = entry.unwrap();
|
||||
|
||||
match entry {
|
||||
Entry::Occupied(slot, offset) => {
|
||||
if let Some(last_slot) = last_slot {
|
||||
assert!(slot > last_slot);
|
||||
}
|
||||
|
||||
if let Some(last_offset) = last_offset {
|
||||
assert!(offset > last_offset);
|
||||
}
|
||||
|
||||
last_slot = Some(slot);
|
||||
last_offset = Some(offset);
|
||||
}
|
||||
Entry::Empty(slot) => {
|
||||
if let Some(last_slot) = last_slot {
|
||||
assert!(slot > last_slot);
|
||||
}
|
||||
|
||||
last_slot = Some(slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn yield_occupied_correctly() {
|
||||
let file = std::fs::File::open("../test_data/01836.primary").unwrap();
|
||||
|
||||
let mut count = 0;
|
||||
let mut reader = super::Reader::open(file).unwrap();
|
||||
|
||||
while let Some(entry) = reader.next_occupied() {
|
||||
// make sure that it has an offset since it's occupied
|
||||
entry.unwrap().offset().unwrap();
|
||||
count += 1;
|
||||
}
|
||||
|
||||
assert_eq!(count, 913);
|
||||
}
|
||||
}
|
||||
129
pallas-hardano/src/storage/immutable/secondary.rs
Normal file
129
pallas-hardano/src/storage/immutable/secondary.rs
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
use std::{
|
||||
fs::File,
|
||||
io::{BufReader, Read, Seek},
|
||||
path::Path,
|
||||
};
|
||||
|
||||
pub type PrimaryIndex = super::primary::Reader;
|
||||
|
||||
use binary_layout::prelude::*;
|
||||
|
||||
use crate::storage::immutable::{primary, secondary};
|
||||
|
||||
// See https://input-output-hk.github.io/ouroboros-consensus/pdfs/report.pdf, section 8.2.2
|
||||
define_layout!(layout, BigEndian, {
|
||||
block_offset: u64,
|
||||
header_offset: u16,
|
||||
header_size: u16,
|
||||
checksum: u32,
|
||||
header_hash: [u8; 32],
|
||||
block_or_ebb: [u8; 8],
|
||||
});
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Entry {
|
||||
pub block_offset: u64,
|
||||
pub header_offset: u16,
|
||||
pub header_size: u16,
|
||||
pub checksum: u32,
|
||||
pub header_hash: [u8; 32],
|
||||
pub block_or_ebb: [u8; 8],
|
||||
}
|
||||
|
||||
impl Entry {
|
||||
fn from<S>(view: layout::View<S>) -> Self
|
||||
where
|
||||
S: AsRef<[u8]>,
|
||||
{
|
||||
Self {
|
||||
block_offset: view.block_offset().read(),
|
||||
header_offset: view.header_offset().read(),
|
||||
header_size: view.header_size().read(),
|
||||
checksum: view.checksum().read(),
|
||||
header_hash: *view.header_hash(),
|
||||
block_or_ebb: *view.block_or_ebb(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type SecondaryOffset = u32;
|
||||
|
||||
pub struct Reader {
|
||||
inner: BufReader<File>,
|
||||
index: PrimaryIndex,
|
||||
current: Option<SecondaryOffset>,
|
||||
}
|
||||
|
||||
impl Reader {
|
||||
pub fn open(mut index: PrimaryIndex, file: File) -> Result<Self, std::io::Error> {
|
||||
let inner = BufReader::new(file);
|
||||
|
||||
match index.next_occupied() {
|
||||
Some(result) => Ok(Self {
|
||||
inner,
|
||||
index,
|
||||
current: result?.offset(),
|
||||
}),
|
||||
None => Ok(Self {
|
||||
inner,
|
||||
index,
|
||||
current: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Reader {
|
||||
type Item = Result<Entry, std::io::Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let current = self.current?;
|
||||
|
||||
let start = self.inner.stream_position().unwrap();
|
||||
let delta = current as u64 - start;
|
||||
self.inner.seek_relative(delta as i64).unwrap();
|
||||
|
||||
let mut buf = vec![0u8; layout::SIZE.unwrap()];
|
||||
|
||||
match self.inner.read_exact(&mut buf) {
|
||||
Err(err) => Some(Err(err)),
|
||||
Ok(_) => {
|
||||
let view = layout::View::new(&buf);
|
||||
let entry = Entry::from(view);
|
||||
|
||||
self.current = self
|
||||
.index
|
||||
.next_occupied()
|
||||
.map(|x| x.unwrap())
|
||||
.and_then(|x| x.offset());
|
||||
|
||||
Some(Ok(entry))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_entries(dir: &Path, name: &str) -> Result<Reader, std::io::Error> {
|
||||
let primary = dir.join(name).with_extension("primary");
|
||||
let primary = std::fs::File::open(primary)?;
|
||||
let primary = primary::Reader::open(primary)?;
|
||||
|
||||
let secondary = dir.join(name).with_extension("secondary");
|
||||
let secondary = std::fs::File::open(secondary)?;
|
||||
|
||||
secondary::Reader::open(primary, secondary)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
#[test]
|
||||
fn can_parse_all_entries() {
|
||||
let reader = super::read_entries(Path::new("../test_data"), "01836").unwrap();
|
||||
|
||||
for entry in reader {
|
||||
entry.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
3
pallas-hardano/src/storage/mod.rs
Normal file
3
pallas-hardano/src/storage/mod.rs
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
//! Storage compatible with the Haskell Cardano node implementation
|
||||
|
||||
pub mod immutable;
|
||||
Loading…
Add table
Add a link
Reference in a new issue