diff --git a/Cargo.toml b/Cargo.toml index 284bcde..2fb20d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,17 +1,17 @@ [workspace] - +resolver = "2" members = [ - "pallas-codec", - "pallas-addresses", - "pallas-network", - "pallas-crypto", - "pallas-configs", - "pallas-primitives", - "pallas-traverse", - "pallas-utxorpc", - "pallas", - "examples/block-download", - "examples/block-decode", - "examples/n2n-miniprotocols", - "examples/n2c-miniprotocols", + "pallas-codec", + "pallas-addresses", + "pallas-network", + "pallas-crypto", + "pallas-configs", + "pallas-primitives", + "pallas-traverse", + "pallas-utxorpc", + "pallas", + "examples/block-download", + "examples/block-decode", + "examples/n2n-miniprotocols", + "examples/n2c-miniprotocols", ] diff --git a/pallas-codec/Cargo.toml b/pallas-codec/Cargo.toml index 8b728f2..0bccfa9 100644 --- a/pallas-codec/Cargo.toml +++ b/pallas-codec/Cargo.toml @@ -8,9 +8,17 @@ homepage = "https://github.com/txpipe/pallas" documentation = "https://docs.rs/pallas-codec" license = "Apache-2.0" readme = "README.md" -authors = ["Santiago Carmuega "] +authors = [ + "Santiago Carmuega ", + "Lucas Rosa ", + "Kasey White ", +] [dependencies] hex = "0.4.3" minicbor = { version = "0.19", features = ["std", "half", "derive"] } serde = { version = "1.0.143", features = ["derive"] } +thiserror = "1.0.39" + +[dev-dependencies] +proptest = "1.1.0" diff --git a/pallas-codec/README.md b/pallas-codec/README.md index 6f6cb36..4e83364 100644 --- a/pallas-codec/README.md +++ b/pallas-codec/README.md @@ -1,2 +1,5 @@ # Pallas Codec +## Flat + +A Rust port of the [Haskell reference implementation](https://github.com/Quid2/flat). diff --git a/pallas-codec/src/flat/decode/decoder.rs b/pallas-codec/src/flat/decode/decoder.rs new file mode 100644 index 0000000..c79b15b --- /dev/null +++ b/pallas-codec/src/flat/decode/decoder.rs @@ -0,0 +1,336 @@ +use super::Decode; +use crate::flat::zigzag; + +use super::Error; + +#[derive(Debug)] +pub struct Decoder<'b> { + pub buffer: &'b [u8], + pub used_bits: i64, + pub pos: usize, +} + +impl<'b> Decoder<'b> { + pub fn new(bytes: &'b [u8]) -> Decoder { + Decoder { + buffer: bytes, + pos: 0, + used_bits: 0, + } + } + + /// Decode any type that implements [`Decode`]. + pub fn decode>(&mut self) -> Result { + T::decode(self) + } + + /// Decode an integer of any size. + /// This is byte alignment agnostic. + /// First we decode the next 8 bits of the buffer. + /// We take the 7 least significant bits as the 7 least significant bits of the current unsigned integer. + /// If the most significant bit of the 8 bits is 1 then we take the next 8 and repeat the process above, + /// filling in the next 7 least significant bits of the unsigned integer and so on. + /// If the most significant bit was instead 0 we stop decoding any more bits. + /// Finally we use zigzag to convert the unsigned integer back to a signed integer. + pub fn integer(&mut self) -> Result { + Ok(zigzag::to_isize(self.word()?)) + } + + /// Decode an integer of 128 bits size. + /// This is byte alignment agnostic. + /// First we decode the next 8 bits of the buffer. + /// We take the 7 least significant bits as the 7 least significant bits of the current unsigned integer. + /// If the most significant bit of the 8 bits is 1 then we take the next 8 and repeat the process above, + /// filling in the next 7 least significant bits of the unsigned integer and so on. + /// If the most significant bit was instead 0 we stop decoding any more bits. + /// Finally we use zigzag to convert the unsigned integer back to a signed integer. + pub fn big_integer(&mut self) -> Result { + Ok(zigzag::to_i128(self.big_word()?)) + } + + /// Decode a single bit of the buffer to get a bool. + /// We mask out a single bit of the buffer based on used bits. + /// and check if it is 0 for false or 1 for true. + // TODO: use bit() instead of this custom implementation. + pub fn bool(&mut self) -> Result { + let current_byte = self.buffer[self.pos]; + let b = 0 != (current_byte & (128 >> self.used_bits)); + self.increment_buffer_by_bit(); + Ok(b) + } + + /// Decode a byte from the buffer. + /// This byte alignment agnostic. + /// We use the next 8 bits in the buffer and return the resulting byte. + pub fn u8(&mut self) -> Result { + self.bits8(8) + } + + /// Decode a byte array. + /// Decodes a filler to byte align the buffer, + /// then decodes the next byte to get the array length up to a max of 255. + /// We decode bytes equal to the array length to form the byte array. + /// If the following byte for array length is not 0 we decode it and repeat above to continue decoding the byte array. + /// We stop once we hit a byte array length of 0. + /// If array length is 0 for first byte array length the we return a empty array. + pub fn bytes(&mut self) -> Result, Error> { + self.filler()?; + self.byte_array() + } + + /// Decode a 32 bit char. + /// This is byte alignment agnostic. + /// First we decode the next 8 bits of the buffer. + /// We take the 7 least significant bits as the 7 least significant bits of the current unsigned integer. + /// If the most significant bit of the 8 bits is 1 then we take the next 8 and repeat the process above, + /// filling in the next 7 least significant bits of the unsigned integer and so on. + /// If the most significant bit was instead 0 we stop decoding any more bits. + pub fn char(&mut self) -> Result { + let character = self.word()? as u32; + + char::from_u32(character).ok_or(Error::DecodeChar(character)) + } + + // TODO: Do we need this? + pub fn string(&mut self) -> Result { + let mut s = String::new(); + while self.bit()? { + s += &self.char()?.to_string(); + } + Ok(s) + } + + /// Decode a string. + /// Convert to byte array and then use byte array decoding. + /// Decodes a filler to byte align the buffer, + /// then decodes the next byte to get the array length up to a max of 255. + /// We decode bytes equal to the array length to form the byte array. + /// If the following byte for array length is not 0 we decode it and repeat above to continue decoding the byte array. + /// We stop once we hit a byte array length of 0. + /// If array length is 0 for first byte array length the we return a empty array. + pub fn utf8(&mut self) -> Result { + // TODO: Better Error Handling + String::from_utf8(Vec::::decode(self)?).map_err(Error::from) + } + + /// Decodes a filler of max one byte size. + /// Decodes bits until we hit a bit that is 1. + /// Expects that the 1 is at the end of the current byte in the buffer. + pub fn filler(&mut self) -> Result<(), Error> { + while self.zero()? {} + Ok(()) + } + + /// Decode a word of any size. + /// This is byte alignment agnostic. + /// First we decode the next 8 bits of the buffer. + /// We take the 7 least significant bits as the 7 least significant bits of the current unsigned integer. + /// If the most significant bit of the 8 bits is 1 then we take the next 8 and repeat the process above, + /// filling in the next 7 least significant bits of the unsigned integer and so on. + /// If the most significant bit was instead 0 we stop decoding any more bits. + pub fn word(&mut self) -> Result { + let mut leading_bit = 1; + let mut final_word: usize = 0; + let mut shl: usize = 0; + // continue looping if lead bit is 1 which is 128 as a u8 otherwise exit + while leading_bit > 0 { + let word8 = self.bits8(8)?; + let word7 = word8 & 127; + final_word |= (word7 as usize) << shl; + shl += 7; + leading_bit = word8 & 128; + } + Ok(final_word) + } + + /// Decode a word of 128 bits size. + /// This is byte alignment agnostic. + /// First we decode the next 8 bits of the buffer. + /// We take the 7 least significant bits as the 7 least significant bits of the current unsigned integer. + /// If the most significant bit of the 8 bits is 1 then we take the next 8 and repeat the process above, + /// filling in the next 7 least significant bits of the unsigned integer and so on. + /// If the most significant bit was instead 0 we stop decoding any more bits. + pub fn big_word(&mut self) -> Result { + let mut leading_bit = 1; + let mut final_word: u128 = 0; + let mut shl: u128 = 0; + // continue looping if lead bit is 1 which is 128 as a u8 otherwise exit + while leading_bit > 0 { + let word8 = self.bits8(8)?; + let word7 = word8 & 127; + final_word |= (word7 as u128) << shl; + shl += 7; + leading_bit = word8 & 128; + } + Ok(final_word) + } + + /// Decode a list of items with a decoder function. + /// This is byte alignment agnostic. + /// Decode a bit from the buffer. + /// If 0 then stop. + /// Otherwise we decode an item in the list with the decoder function passed in. + /// Then decode the next bit in the buffer and repeat above. + /// Returns a list of items decoded with the decoder function. + pub fn decode_list_with(&mut self, decoder_func: F) -> Result, Error> + where + F: Copy + FnOnce(&mut Decoder) -> Result, + { + let mut vec_array: Vec = Vec::new(); + while self.bit()? { + vec_array.push(decoder_func(self)?) + } + Ok(vec_array) + } + + pub fn decode_list_with_debug( + &mut self, + decoder_func: F, + state_log: &mut Vec, + ) -> Result, Error> + where + F: Copy + FnOnce(&mut Decoder, &mut Vec) -> Result, + { + let mut vec_array: Vec = Vec::new(); + while self.bit()? { + vec_array.push(decoder_func(self, state_log)?) + } + Ok(vec_array) + } + + /// Decode the next bit in the buffer. + /// If the bit was 0 then return true. + /// Otherwise return false. + /// Throws EndOfBuffer error if used at the end of the array. + fn zero(&mut self) -> Result { + let current_bit = self.bit()?; + + Ok(!current_bit) + } + + /// Decode the next bit in the buffer. + /// If the bit was 1 then return true. + /// Otherwise return false. + /// Throws EndOfBuffer error if used at the end of the array. + fn bit(&mut self) -> Result { + if self.pos >= self.buffer.len() { + return Err(Error::EndOfBuffer); + } + + let b = self.buffer[self.pos] & (128 >> self.used_bits) > 0; + + self.increment_buffer_by_bit(); + + Ok(b) + } + + /// Decode a byte array. + /// Throws a BufferNotByteAligned error if the buffer is not byte aligned + /// Decodes the next byte to get the array length up to a max of 255. + /// We decode bytes equal to the array length to form the byte array. + /// If the following byte for array length is not 0 we decode it and repeat above to continue decoding the byte array. + /// We stop once we hit a byte array length of 0. + /// If array length is 0 for first byte array length the we return a empty array. + fn byte_array(&mut self) -> Result, Error> { + if self.used_bits != 0 { + return Err(Error::BufferNotByteAligned); + } + + self.ensure_bytes(1)?; + + let mut blk_len = self.buffer[self.pos]; + + self.pos += 1; + + let mut blk_array: Vec = Vec::new(); + + while blk_len != 0 { + self.ensure_bytes(blk_len as usize + 1)?; + + let decoded_array = &self.buffer[self.pos..self.pos + blk_len as usize]; + + blk_array.extend(decoded_array); + + self.pos += blk_len as usize; + + blk_len = self.buffer[self.pos]; + + self.pos += 1 + } + + Ok(blk_array) + } + + /// Decode up to 8 bits. + /// This is byte alignment agnostic. + /// If num_bits is greater than the 8 we throw an IncorrectNumBits error. + /// First we decode the next num_bits of bits in the buffer. + /// If there are less unused bits in the current byte in the buffer than num_bits, + /// then we decode the remaining bits from the most significant bits in the next byte in the buffer. + /// Otherwise we decode the unused bits from the current byte. + /// Returns the decoded value up to a byte in size. + pub fn bits8(&mut self, num_bits: usize) -> Result { + if num_bits > 8 { + return Err(Error::IncorrectNumBits); + } + + self.ensure_bits(num_bits)?; + + let unused_bits = 8 - self.used_bits as usize; + let leading_zeroes = 8 - num_bits; + let r = (self.buffer[self.pos] << self.used_bits as usize) >> leading_zeroes; + + let x = if num_bits > unused_bits { + r | (self.buffer[self.pos + 1] >> (unused_bits + leading_zeroes)) + } else { + r + }; + + self.drop_bits(num_bits); + + Ok(x) + } + + /// Ensures the buffer has the required bytes passed in by required_bytes. + /// Throws a NotEnoughBytes error if there are less bytes remaining in the buffer than required_bytes. + fn ensure_bytes(&mut self, required_bytes: usize) -> Result<(), Error> { + if required_bytes as isize > self.buffer.len() as isize - self.pos as isize { + Err(Error::NotEnoughBytes(required_bytes)) + } else { + Ok(()) + } + } + + /// Ensures the buffer has the required bits passed in by required_bits. + /// Throws a NotEnoughBits error if there are less bits remaining in the buffer than required_bits. + fn ensure_bits(&mut self, required_bits: usize) -> Result<(), Error> { + if required_bits as isize + > (self.buffer.len() as isize - self.pos as isize) * 8 - self.used_bits as isize + { + Err(Error::NotEnoughBits(required_bits)) + } else { + Ok(()) + } + } + + /// Increment buffer by num_bits. + /// If num_bits + used bits is greater than 8, + /// then increment position by (num_bits + used bits) / 8 + /// Use the left over remainder as the new amount of used bits. + fn drop_bits(&mut self, num_bits: usize) { + let all_used_bits = num_bits as i64 + self.used_bits; + self.used_bits = all_used_bits % 8; + self.pos += all_used_bits as usize / 8; + } + + /// Increment used bits by 1. + /// If all 8 bits are used then increment buffer position by 1. + fn increment_buffer_by_bit(&mut self) { + if self.used_bits == 7 { + self.pos += 1; + self.used_bits = 0; + } else { + self.used_bits += 1; + } + } +} diff --git a/pallas-codec/src/flat/decode/error.rs b/pallas-codec/src/flat/decode/error.rs new file mode 100644 index 0000000..59b8c1f --- /dev/null +++ b/pallas-codec/src/flat/decode/error.rs @@ -0,0 +1,23 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum Error { + #[error("Reached end of buffer")] + EndOfBuffer, + #[error("Buffer is not byte aligned")] + BufferNotByteAligned, + #[error("Incorrect value of num_bits, must be less than 9")] + IncorrectNumBits, + #[error("Not enough data available, required {0} bytes")] + NotEnoughBytes(usize), + #[error("Not enough data available, required {0} bits")] + NotEnoughBits(usize), + #[error(transparent)] + DecodeUtf8(#[from] std::string::FromUtf8Error), + #[error("Decoding u32 to char {0}")] + DecodeChar(u32), + #[error("{0}")] + Message(String), + #[error("Unknown term constructor tag: {0}.\n\nHere are the buffer bytes ({1} preceding) {2}\n\nBuffer position is {3} and buffer length is {4}")] + UnknownTermConstructor(u8, usize, String, usize, usize), +} diff --git a/pallas-codec/src/flat/decode/mod.rs b/pallas-codec/src/flat/decode/mod.rs new file mode 100644 index 0000000..431c88a --- /dev/null +++ b/pallas-codec/src/flat/decode/mod.rs @@ -0,0 +1,67 @@ +mod decoder; +mod error; + +use crate::flat::filler::Filler; + +pub use decoder::Decoder; +pub use error::Error; + +pub trait Decode<'b>: Sized { + fn decode(d: &mut Decoder) -> Result; +} + +impl Decode<'_> for Filler { + fn decode(d: &mut Decoder) -> Result { + d.filler()?; + + Ok(Filler::FillerEnd) + } +} + +impl Decode<'_> for Vec { + fn decode(d: &mut Decoder) -> Result { + d.bytes() + } +} + +impl Decode<'_> for u8 { + fn decode(d: &mut Decoder) -> Result { + d.u8() + } +} + +impl Decode<'_> for isize { + fn decode(d: &mut Decoder) -> Result { + d.integer() + } +} + +impl Decode<'_> for i128 { + fn decode(d: &mut Decoder) -> Result { + d.big_integer() + } +} + +impl Decode<'_> for usize { + fn decode(d: &mut Decoder) -> Result { + d.word() + } +} + +impl Decode<'_> for char { + fn decode(d: &mut Decoder) -> Result { + d.char() + } +} + +impl Decode<'_> for String { + fn decode(d: &mut Decoder) -> Result { + d.utf8() + } +} + +impl Decode<'_> for bool { + fn decode(d: &mut Decoder) -> Result { + d.bool() + } +} diff --git a/pallas-codec/src/flat/encode/encoder.rs b/pallas-codec/src/flat/encode/encoder.rs new file mode 100644 index 0000000..844ed08 --- /dev/null +++ b/pallas-codec/src/flat/encode/encoder.rs @@ -0,0 +1,323 @@ +use super::Encode; +use crate::flat::zigzag; + +use super::Error; + +pub struct Encoder { + pub buffer: Vec, + // Int + used_bits: i64, + // Int + current_byte: u8, +} + +impl Default for Encoder { + fn default() -> Self { + Self::new() + } +} + +impl Encoder { + pub fn new() -> Encoder { + Encoder { + buffer: Vec::new(), + used_bits: 0, + current_byte: 0, + } + } + + /// Encode any type that implements [`Encode`]. + pub fn encode(&mut self, x: T) -> Result<&mut Self, Error> { + x.encode(self)?; + + Ok(self) + } + + /// Encode 1 unsigned byte. + /// Uses the next 8 bits in the buffer, can be byte aligned or byte unaligned + pub fn u8(&mut self, x: u8) -> Result<&mut Self, Error> { + if self.used_bits == 0 { + self.current_byte = x; + self.next_word(); + } else { + self.byte_unaligned(x); + } + + Ok(self) + } + + /// Encode a `bool` value. This is byte alignment agnostic. + /// Uses the next unused bit in the current byte to encode this information. + /// One for true and Zero for false + pub fn bool(&mut self, x: bool) -> &mut Self { + if x { + self.one(); + } else { + self.zero(); + } + + self + } + + /// Encode a byte array. + /// Uses filler to byte align the buffer, then writes byte array length up to 255. + /// Following that it writes the next 255 bytes from the array. + /// We repeat writing length up to 255 and the next 255 bytes until we reach the end of the byte array. + /// After reaching the end of the byte array we write a 0 byte. Only write 0 byte if the byte array is empty. + pub fn bytes(&mut self, x: &[u8]) -> Result<&mut Self, Error> { + // use filler to write current buffer so bits used gets reset + self.filler(); + + self.byte_array(x) + } + + /// Encode a byte array in a byte aligned buffer. Throws exception if any bits for the current byte were used. + /// Writes byte array length up to 255 + /// Following that it writes the next 255 bytes from the array. + /// We repeat writing length up to 255 and the next 255 bytes until we reach the end of the byte array. + /// After reaching the end of the buffer we write a 0 byte. Only write 0 if the byte array is empty. + pub fn byte_array(&mut self, arr: &[u8]) -> Result<&mut Self, Error> { + if self.used_bits != 0 { + return Err(Error::BufferNotByteAligned); + } + + self.write_blk(arr); + + Ok(self) + } + + /// Encode an integer of any size. + /// This is byte alignment agnostic. + /// First we use zigzag once to double the number and encode the negative sign as the least significant bit. + /// Next we encode the 7 least significant bits of the unsigned integer. If the number is greater than + /// 127 we encode a leading 1 followed by repeating the encoding above for the next 7 bits and so on. + pub fn integer(&mut self, i: isize) -> &mut Self { + let i = zigzag::to_usize(i); + + self.word(i); + + self + } + + /// Encode an integer of 128 bits size. + /// This is byte alignment agnostic. + /// First we use zigzag once to double the number and encode the negative sign as the least significant bit. + /// Next we encode the 7 least significant bits of the unsigned integer. If the number is greater than + /// 127 we encode a leading 1 followed by repeating the encoding above for the next 7 bits and so on. + pub fn big_integer(&mut self, i: i128) -> &mut Self { + let i = zigzag::to_u128(i); + + self.big_word(i); + + self + } + + /// Encode a char of 32 bits. + /// This is byte alignment agnostic. + /// We encode the 7 least significant bits of the unsigned byte. If the char value is greater than + /// 127 we encode a leading 1 followed by repeating the above for the next 7 bits and so on. + pub fn char(&mut self, c: char) -> &mut Self { + self.word(c as usize); + + self + } + + // TODO: Do we need this? + pub fn string(&mut self, s: &str) -> &mut Self { + for i in s.chars() { + self.one(); + self.char(i); + } + + self.zero(); + + self + } + + /// Encode a string. + /// Convert to byte array and then use byte array encoding. + /// Uses filler to byte align the buffer, then writes byte array length up to 255. + /// Following that it writes the next 255 bytes from the array. + /// After reaching the end of the buffer we write a 0 byte. Only write 0 byte if the byte array is empty. + pub fn utf8(&mut self, s: &str) -> Result<&mut Self, Error> { + self.bytes(s.as_bytes()) + } + + /// Encode a unsigned integer of any size. + /// This is byte alignment agnostic. + /// We encode the 7 least significant bits of the unsigned byte. If the char value is greater than + /// 127 we encode a leading 1 followed by repeating the above for the next 7 bits and so on. + pub fn word(&mut self, c: usize) -> &mut Self { + let mut d = c; + loop { + let mut w = (d & 127) as u8; + d >>= 7; + + if d != 0 { + w |= 128; + } + self.bits(8, w); + + if d == 0 { + break; + } + } + + self + } + + /// Encode a unsigned integer of 128 bits size. + /// This is byte alignment agnostic. + /// We encode the 7 least significant bits of the unsigned byte. If the char value is greater than + /// 127 we encode a leading 1 followed by repeating the above for the next 7 bits and so on. + pub fn big_word(&mut self, c: u128) -> &mut Self { + let mut d = c; + loop { + let mut w = (d & 127) as u8; + d >>= 7; + + if d != 0 { + w |= 128; + } + self.bits(8, w); + + if d == 0 { + break; + } + } + + self + } + + /// Encode a list of bytes with a function + /// This is byte alignment agnostic. + /// If there are bytes in a list then write 1 bit followed by the functions encoding. + /// After the last item write a 0 bit. If the list is empty only encode a 0 bit. + pub fn encode_list_with( + &mut self, + list: &[T], + encoder_func: for<'r> fn(&T, &'r mut Encoder) -> Result<(), Error>, + ) -> Result<&mut Self, Error> { + for item in list { + self.one(); + encoder_func(item, self)?; + } + + self.zero(); + + Ok(self) + } + + /// Encodes up to 8 bits of information and is byte alignment agnostic. + /// Uses unused bits in the current byte to write out the passed in byte value. + /// Overflows to the most significant digits of the next byte if number of bits to use is greater than unused bits. + /// Expects that number of bits to use is greater than or equal to required bits by the value. + /// The param num_bits is i64 to match unused_bits type. + pub fn bits(&mut self, num_bits: i64, val: u8) -> &mut Self { + match (num_bits, val) { + (1, 0) => self.zero(), + (1, 1) => self.one(), + (2, 0) => { + self.zero(); + self.zero(); + } + (2, 1) => { + self.zero(); + self.one(); + } + (2, 2) => { + self.one(); + self.zero(); + } + (2, 3) => { + self.one(); + self.one(); + } + (_, _) => { + self.used_bits += num_bits; + let unused_bits = 8 - self.used_bits; + match unused_bits { + x if x > 0 => { + self.current_byte |= val << x; + } + x if x == 0 => { + self.current_byte |= val; + self.next_word(); + } + x => { + let used = -x; + self.current_byte |= val >> used; + self.next_word(); + self.current_byte = val << (8 - used); + self.used_bits = used; + } + } + } + } + + self + } + + /// A filler amount of end 0's followed by a 1 at the end of a byte. + /// Used to byte align the buffer by padding out the rest of the byte. + pub(crate) fn filler(&mut self) -> &mut Self { + self.current_byte |= 1; + self.next_word(); + + self + } + + /// Write a 0 bit into the current byte. + /// Write out to buffer if last used bit in the current byte. + fn zero(&mut self) { + if self.used_bits == 7 { + self.next_word(); + } else { + self.used_bits += 1; + } + } + + /// Write a 1 bit into the current byte. + /// Write out to buffer if last used bit in the current byte. + fn one(&mut self) { + if self.used_bits == 7 { + self.current_byte |= 1; + self.next_word(); + } else { + self.current_byte |= 128 >> self.used_bits; + self.used_bits += 1; + } + } + /// Write out byte regardless of current buffer alignment. + /// Write most significant bits in remaining unused bits for the current byte, + /// then write out the remaining bits at the beginning of the next byte. + fn byte_unaligned(&mut self, x: u8) { + let x_shift = self.current_byte | (x >> self.used_bits); + self.buffer.push(x_shift); + + self.current_byte = x << (8 - self.used_bits); + } + + /// Write the current byte out to the buffer and begin next byte to write out. + /// Add current byte to the buffer and set current byte and used bits to 0. + fn next_word(&mut self) { + self.buffer.push(self.current_byte); + + self.current_byte = 0; + self.used_bits = 0; + } + + /// Writes byte array length up to 255 + /// Following that it writes the next 255 bytes from the array. + /// After reaching the end of the buffer we write a 0 byte. Only write 0 if the byte array is empty. + /// This is byte alignment agnostic. + fn write_blk(&mut self, arr: &[u8]) { + let chunks = arr.chunks(255); + + for chunk in chunks { + self.buffer.push(chunk.len() as u8); + self.buffer.extend(chunk); + } + self.buffer.push(0_u8); + } +} diff --git a/pallas-codec/src/flat/encode/error.rs b/pallas-codec/src/flat/encode/error.rs new file mode 100644 index 0000000..97b2cd8 --- /dev/null +++ b/pallas-codec/src/flat/encode/error.rs @@ -0,0 +1,9 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum Error { + #[error("Buffer is not byte aligned")] + BufferNotByteAligned, + #[error("{0}")] + Message(String), +} diff --git a/pallas-codec/src/flat/encode/mod.rs b/pallas-codec/src/flat/encode/mod.rs new file mode 100644 index 0000000..8b30bc9 --- /dev/null +++ b/pallas-codec/src/flat/encode/mod.rs @@ -0,0 +1,107 @@ +mod encoder; +mod error; + +use crate::flat::filler::Filler; + +pub use encoder::Encoder; +pub use error::Error; + +pub trait Encode { + fn encode(&self, e: &mut Encoder) -> Result<(), Error>; +} + +impl Encode for bool { + fn encode(&self, e: &mut Encoder) -> Result<(), Error> { + e.bool(*self); + + Ok(()) + } +} + +impl Encode for u8 { + fn encode(&self, e: &mut Encoder) -> Result<(), Error> { + e.u8(*self)?; + + Ok(()) + } +} + +impl Encode for i128 { + fn encode(&self, e: &mut Encoder) -> Result<(), Error> { + e.big_integer(*self); + + Ok(()) + } +} + +impl Encode for isize { + fn encode(&self, e: &mut Encoder) -> Result<(), Error> { + e.integer(*self); + + Ok(()) + } +} + +impl Encode for usize { + fn encode(&self, e: &mut Encoder) -> Result<(), Error> { + e.word(*self); + + Ok(()) + } +} + +impl Encode for char { + fn encode(&self, e: &mut Encoder) -> Result<(), Error> { + e.char(*self); + + Ok(()) + } +} + +impl Encode for &str { + fn encode(&self, e: &mut Encoder) -> Result<(), Error> { + e.utf8(self)?; + + Ok(()) + } +} + +impl Encode for String { + fn encode(&self, e: &mut Encoder) -> Result<(), Error> { + e.utf8(self)?; + + Ok(()) + } +} + +impl Encode for Vec { + fn encode(&self, e: &mut Encoder) -> Result<(), Error> { + e.bytes(self)?; + + Ok(()) + } +} + +impl Encode for &[u8] { + fn encode(&self, e: &mut Encoder) -> Result<(), Error> { + e.bytes(self)?; + + Ok(()) + } +} + +impl Encode for Box { + fn encode(&self, e: &mut Encoder) -> Result<(), Error> { + self.as_ref().encode(e)?; + + Ok(()) + } +} + +impl Encode for Filler { + fn encode(&self, e: &mut Encoder) -> Result<(), Error> { + e.filler(); + + Ok(()) + } +} diff --git a/pallas-codec/src/flat/filler.rs b/pallas-codec/src/flat/filler.rs new file mode 100644 index 0000000..0efe8db --- /dev/null +++ b/pallas-codec/src/flat/filler.rs @@ -0,0 +1,13 @@ +pub enum Filler { + FillerStart(Box), + FillerEnd, +} + +impl Filler { + pub fn length(&self) -> usize { + match self { + Filler::FillerStart(f) => f.length() + 1, + Filler::FillerEnd => 1, + } + } +} diff --git a/pallas-codec/src/flat/mod.rs b/pallas-codec/src/flat/mod.rs new file mode 100644 index 0000000..dbaecdf --- /dev/null +++ b/pallas-codec/src/flat/mod.rs @@ -0,0 +1,47 @@ +mod decode; +mod encode; +pub mod filler; +pub mod zigzag; + +pub mod en { + pub use super::encode::*; +} + +pub mod de { + pub use super::decode::*; +} + +pub trait Flat<'b>: en::Encode + de::Decode<'b> { + fn flat(&self) -> Result, en::Error> { + encode(self) + } + + fn unflat(bytes: &'b [u8]) -> Result { + decode(bytes) + } +} + +pub fn encode(value: &T) -> Result, en::Error> +where + T: en::Encode, +{ + let mut e = en::Encoder::new(); + + value.encode(&mut e)?; + e.encode(filler::Filler::FillerEnd)?; + + Ok(e.buffer) +} + +pub fn decode<'b, T>(bytes: &'b [u8]) -> Result +where + T: de::Decode<'b>, +{ + let mut d = de::Decoder::new(bytes); + + let value = d.decode()?; + + d.decode::()?; + + Ok(value) +} diff --git a/pallas-codec/src/flat/zigzag.rs b/pallas-codec/src/flat/zigzag.rs new file mode 100644 index 0000000..4b9f4c4 --- /dev/null +++ b/pallas-codec/src/flat/zigzag.rs @@ -0,0 +1,27 @@ +pub fn to_usize(x: isize) -> usize { + let double_x = x << 1; + + if x.is_positive() || x == 0 { + double_x as usize + } else { + (-double_x - 1) as usize + } +} + +pub fn to_isize(u: usize) -> isize { + ((u >> 1) as isize) ^ (-((u & 1) as isize)) +} + +pub fn to_u128(x: i128) -> u128 { + let double_x = x << 1; + + if x.is_positive() || x == 0 { + double_x as u128 + } else { + (-double_x - 1) as u128 + } +} + +pub fn to_i128(u: u128) -> i128 { + ((u >> 1) as i128) ^ (-((u & 1) as i128)) +} diff --git a/pallas-codec/src/lib.rs b/pallas-codec/src/lib.rs index 11787c4..41776b2 100644 --- a/pallas-codec/src/lib.rs +++ b/pallas-codec/src/lib.rs @@ -1,3 +1,6 @@ +/// Flat encoding/decoding for Plutus Core +pub mod flat; + /// Shared re-export of minicbor lib across all Pallas pub use minicbor; diff --git a/pallas-codec/tests/flat.rs b/pallas-codec/tests/flat.rs new file mode 100644 index 0000000..9d503b6 --- /dev/null +++ b/pallas-codec/tests/flat.rs @@ -0,0 +1,123 @@ +use pallas_codec::flat::filler::Filler; +use pallas_codec::flat::{decode, encode}; +use proptest::prelude::*; + +prop_compose! { + fn arb_big_vec()(size in 255..300, element in any::()) -> Vec { + (0..size).map(|_| element).collect() + } +} + +#[test] +fn encode_bool() { + let bytes = encode(&true).unwrap(); + + assert_eq!(bytes, vec![0b10000001]); + + let decoded: bool = decode(bytes.as_slice()).unwrap(); + + assert!(decoded); + + let bytes = encode(&false).unwrap(); + + assert_eq!(bytes, vec![0b00000001]); + + let decoded: bool = decode(bytes.as_slice()).unwrap(); + + assert!(!decoded); +} + +#[test] +fn encode_u8() { + let bytes = encode(&3_u8).unwrap(); + + assert_eq!(bytes, vec![0b00000011, 0b00000001]); + + let decoded: u8 = decode(bytes.as_slice()).unwrap(); + + assert_eq!(decoded, 3_u8); +} + +proptest! { + #[test] + fn encode_isize(x: isize) { + let bytes = encode(&x).unwrap(); + let decoded: isize = decode(&bytes).unwrap(); + assert_eq!(decoded, x); + } + + #[test] + fn encode_usize(x: usize) { + let bytes = encode(&x).unwrap(); + let decoded: usize = decode(&bytes).unwrap(); + assert_eq!(decoded, x); + } + + #[test] + fn encode_char(c: char) { + let bytes = encode(&c).unwrap(); + let decoded: char = decode(&bytes).unwrap(); + assert_eq!(decoded, c); + } + + #[test] + fn encode_string(str: String) { + let bytes = encode(&str).unwrap(); + let decoded: String = decode(&bytes).unwrap(); + assert_eq!(decoded, str); + } + + #[test] + fn encode_vec_u8(xs: Vec) { + let bytes = encode(&xs).unwrap(); + let decoded: Vec = decode(&bytes).unwrap(); + assert_eq!(decoded, xs); + } + + #[test] + fn encode_big_vec_u8(xs in arb_big_vec()) { + let bytes = encode(&xs).unwrap(); + let decoded: Vec = decode(&bytes).unwrap(); + assert_eq!(decoded, xs); + } + + #[test] + fn encode_arr_u8(xs: Vec) { + let bytes = encode(&xs.as_slice()).unwrap(); + let decoded: Vec = decode(&bytes).unwrap(); + assert_eq!(decoded, xs); + } + + #[test] + fn encode_big_arr_u8(xs in arb_big_vec()) { + let bytes = encode(&xs.as_slice()).unwrap(); + let decoded: Vec = decode(&bytes).unwrap(); + assert_eq!(decoded, xs); + } + + #[test] + fn encode_boxed(c: char) { + let boxed = Box::new(c); + let bytes = encode(&boxed).unwrap(); + let decoded: char = decode(&bytes).unwrap(); + assert_eq!(decoded, c); + } +} + +#[test] +fn encode_filler() { + let bytes = encode(&Filler::FillerEnd).unwrap(); + + assert_eq!(bytes, vec![0b0000001, 0b00000001]); + + let bytes = encode(&Filler::FillerStart(Box::new(Filler::FillerEnd))).unwrap(); + + assert_eq!(bytes, vec![0b0000001, 0b00000001]); + + let bytes = encode(&Filler::FillerStart(Box::new(Filler::FillerStart( + Box::new(Filler::FillerEnd), + )))) + .unwrap(); + + assert_eq!(bytes, vec![0b0000001, 0b00000001]); +} diff --git a/pallas-codec/tests/zigzag.rs b/pallas-codec/tests/zigzag.rs new file mode 100644 index 0000000..2901b74 --- /dev/null +++ b/pallas-codec/tests/zigzag.rs @@ -0,0 +1,18 @@ +use pallas_codec::flat::zigzag::{to_isize, to_usize}; +use proptest::prelude::*; + +proptest! { + #[test] + fn zigzag(i: isize) { + let u = to_usize(i); + let converted_i = to_isize(u); + assert_eq!(converted_i, i); + } + + #[test] + fn zagzig(u: usize) { + let i = to_isize(u); + let converted_u = to_usize(i); + assert_eq!(converted_u, u); + } +}