From 7cb1ffe100befaa6236f14c14081961c44d6ad21 Mon Sep 17 00:00:00 2001 From: Matthias Benkort <5680256+KtorZ@users.noreply.github.com> Date: Sat, 13 Jan 2024 14:09:16 +0100 Subject: [PATCH] fix(codec): Fix flat encoding and decoding of arbitrarily size integers (#378) This commits fixes the flat encoding and decoding (and consequently, the zigzag) for large integers in the following ways: - It removes support for encoding and decoding i128 values. - It optionally (feature = "num-bigint") introduces encoding and decoding of large sized integers through the num-bigint::BigInt type. Without the feature enabled, it is still possible to encode and decode isize values; but the use of i128 is now prohibited (as it would overflow on boundaries) in favor of arbitrarily sized integers. The commit also introduces a missing property roundtrip for encoding and decoding large integers, which was missing and thus, failed to identify the overflow problem. See related issue: https://github.com/aiken-lang/aiken/issues/796 --- pallas-codec/Cargo.toml | 4 ++ pallas-codec/src/flat/decode/decoder.rs | 26 ++++++----- pallas-codec/src/flat/decode/mod.rs | 8 +++- pallas-codec/src/flat/encode/encoder.rs | 40 ++++++++++------- pallas-codec/src/flat/encode/mod.rs | 8 +++- pallas-codec/src/flat/zigzag.rs | 60 +++++++++++++++++-------- pallas-codec/tests/flat.rs | 31 +++++++++++++ pallas-codec/tests/zigzag.rs | 10 ++--- 8 files changed, 133 insertions(+), 54 deletions(-) diff --git a/pallas-codec/Cargo.toml b/pallas-codec/Cargo.toml index 0b70455..419d22b 100644 --- a/pallas-codec/Cargo.toml +++ b/pallas-codec/Cargo.toml @@ -14,9 +14,13 @@ authors = [ "Kasey White ", ] +[features] +default = [] + [dependencies] hex = "0.4.3" minicbor = { version = "0.20", features = ["std", "half", "derive"] } +num-bigint = { version = "0.4.4", optional = true } serde = { version = "1.0.143", features = ["derive"] } thiserror = "1.0.39" diff --git a/pallas-codec/src/flat/decode/decoder.rs b/pallas-codec/src/flat/decode/decoder.rs index 63dc8fd..24ef006 100644 --- a/pallas-codec/src/flat/decode/decoder.rs +++ b/pallas-codec/src/flat/decode/decoder.rs @@ -1,7 +1,9 @@ use super::Decode; -use crate::flat::zigzag; - use super::Error; +use crate::flat::zigzag::ZigZag; + +#[cfg(feature = "num-bigint")] +use num_bigint::{BigInt, BigUint}; #[derive(Debug)] pub struct Decoder<'b> { @@ -24,7 +26,8 @@ impl<'b> Decoder<'b> { T::decode(self) } - /// Decode an integer of any size. + /// Decode an isize integer. + /// /// This is byte alignment agnostic. /// First we decode the next 8 bits of the buffer. /// We take the 7 least significant bits as the 7 least significant bits of @@ -35,10 +38,11 @@ impl<'b> Decoder<'b> { /// any more bits. Finally we use zigzag to convert the unsigned integer /// back to a signed integer. pub fn integer(&mut self) -> Result { - Ok(zigzag::to_isize(self.word()?)) + Ok(self.word()?.zigzag()) } - /// Decode an integer of 128 bits size. + /// Decode an integer of an arbitrary size.. + /// /// This is byte alignment agnostic. /// First we decode the next 8 bits of the buffer. /// We take the 7 least significant bits as the 7 least significant bits of @@ -48,8 +52,9 @@ impl<'b> Decoder<'b> { /// so on. If the most significant bit was instead 0 we stop decoding /// any more bits. Finally we use zigzag to convert the unsigned integer /// back to a signed integer. - pub fn big_integer(&mut self) -> Result { - Ok(zigzag::to_i128(self.big_word()?)) + #[cfg(feature = "num-bigint")] + pub fn big_integer(&mut self) -> Result { + Ok(self.big_word()?.zigzag()) } /// Decode a single bit of the buffer to get a bool. @@ -162,15 +167,16 @@ impl<'b> Decoder<'b> { /// filling in the next 7 least significant bits of the unsigned integer and /// so on. If the most significant bit was instead 0 we stop decoding /// any more bits. - pub fn big_word(&mut self) -> Result { + #[cfg(feature = "num-bigint")] + pub fn big_word(&mut self) -> Result { let mut leading_bit = 1; - let mut final_word: u128 = 0; + let mut final_word: BigUint = (0 as u8).into(); let mut shl: u128 = 0; // continue looping if lead bit is 1 which is 128 as a u8 otherwise exit while leading_bit > 0 { let word8 = self.bits8(8)?; let word7 = word8 & 127; - final_word |= (word7 as u128) << shl; + final_word |= >::into(word7) << shl; shl += 7; leading_bit = word8 & 128; } diff --git a/pallas-codec/src/flat/decode/mod.rs b/pallas-codec/src/flat/decode/mod.rs index 431c88a..4e004ca 100644 --- a/pallas-codec/src/flat/decode/mod.rs +++ b/pallas-codec/src/flat/decode/mod.rs @@ -3,6 +3,9 @@ mod error; use crate::flat::filler::Filler; +#[cfg(feature = "num-bigint")] +use num_bigint::BigInt; + pub use decoder::Decoder; pub use error::Error; @@ -36,9 +39,10 @@ impl Decode<'_> for isize { } } -impl Decode<'_> for i128 { +#[cfg(feature = "num-bigint")] +impl Decode<'_> for BigInt { fn decode(d: &mut Decoder) -> Result { - d.big_integer() + Ok(d.big_integer()?.into()) } } diff --git a/pallas-codec/src/flat/encode/encoder.rs b/pallas-codec/src/flat/encode/encoder.rs index d17525b..8a95238 100644 --- a/pallas-codec/src/flat/encode/encoder.rs +++ b/pallas-codec/src/flat/encode/encoder.rs @@ -1,7 +1,9 @@ use super::Encode; -use crate::flat::zigzag; - use super::Error; +use crate::flat::zigzag::ZigZag; + +#[cfg(feature = "num-bigint")] +use num_bigint::{BigInt, BigUint}; pub struct Encoder { pub buffer: Vec, @@ -89,7 +91,8 @@ impl Encoder { Ok(self) } - /// Encode an integer of any size. + /// Encode an isize integer. + /// /// This is byte alignment agnostic. /// First we use zigzag once to double the number and encode the negative /// sign as the least significant bit. Next we encode the 7 least @@ -97,25 +100,21 @@ impl Encoder { /// 127 we encode a leading 1 followed by repeating the encoding above for /// the next 7 bits and so on. pub fn integer(&mut self, i: isize) -> &mut Self { - let i = zigzag::to_usize(i); - - self.word(i); - + self.word(i.zigzag()); self } - /// Encode an integer of 128 bits size. + /// Encode an arbitrarily sized integer. + /// /// This is byte alignment agnostic. /// First we use zigzag once to double the number and encode the negative /// sign as the least significant bit. Next we encode the 7 least /// significant bits of the unsigned integer. If the number is greater than /// 127 we encode a leading 1 followed by repeating the encoding above for /// the next 7 bits and so on. - pub fn big_integer(&mut self, i: i128) -> &mut Self { - let i = zigzag::to_u128(i); - - self.big_word(i); - + #[cfg(feature = "num-bigint")] + pub fn big_integer(&mut self, i: BigInt) -> &mut Self { + self.big_word(i.zigzag()); self } @@ -181,18 +180,25 @@ impl Encoder { /// We encode the 7 least significant bits of the unsigned byte. If the char /// value is greater than 127 we encode a leading 1 followed by /// repeating the above for the next 7 bits and so on. - pub fn big_word(&mut self, c: u128) -> &mut Self { + #[cfg(feature = "num-bigint")] + pub fn big_word(&mut self, c: BigUint) -> &mut Self { let mut d = c; + let zero = (0 as u8).into(); loop { - let mut w = (d & 127) as u8; + let m: usize = 127; + let mut w = (d.clone() & >::into(m)) + .to_bytes_be() + .pop() + .unwrap(); + d >>= 7; - if d != 0 { + if d != zero { w |= 128; } self.bits(8, w); - if d == 0 { + if d == zero { break; } } diff --git a/pallas-codec/src/flat/encode/mod.rs b/pallas-codec/src/flat/encode/mod.rs index 8b30bc9..7d81a5f 100644 --- a/pallas-codec/src/flat/encode/mod.rs +++ b/pallas-codec/src/flat/encode/mod.rs @@ -3,6 +3,9 @@ mod error; use crate::flat::filler::Filler; +#[cfg(feature = "num-bigint")] +use num_bigint::BigInt; + pub use encoder::Encoder; pub use error::Error; @@ -26,9 +29,10 @@ impl Encode for u8 { } } -impl Encode for i128 { +#[cfg(feature = "num-bigint")] +impl Encode for BigInt { fn encode(&self, e: &mut Encoder) -> Result<(), Error> { - e.big_integer(*self); + e.big_integer(self.clone()); Ok(()) } diff --git a/pallas-codec/src/flat/zigzag.rs b/pallas-codec/src/flat/zigzag.rs index 4b9f4c4..9631e29 100644 --- a/pallas-codec/src/flat/zigzag.rs +++ b/pallas-codec/src/flat/zigzag.rs @@ -1,27 +1,51 @@ -pub fn to_usize(x: isize) -> usize { - let double_x = x << 1; +#[cfg(feature = "num-bigint")] +use num_bigint::{BigInt, BigUint, ToBigInt}; - if x.is_positive() || x == 0 { - double_x as usize - } else { - (-double_x - 1) as usize +pub trait ZigZag { + type Zag; + fn zigzag(self) -> Self::Zag; +} + +#[cfg(feature = "num-bigint")] +impl ZigZag for BigInt { + type Zag = BigUint; + + fn zigzag(self) -> Self::Zag where { + if self >= 0.into() { + self << 1 + } else { + let double: BigInt = self << 1; + -double - >::into(1) + } + .to_biguint() + .expect("number is positive") } } -pub fn to_isize(u: usize) -> isize { - ((u >> 1) as isize) ^ (-((u & 1) as isize)) -} +impl ZigZag for isize { + type Zag = usize; -pub fn to_u128(x: i128) -> u128 { - let double_x = x << 1; - - if x.is_positive() || x == 0 { - double_x as u128 - } else { - (-double_x - 1) as u128 + fn zigzag(self) -> Self::Zag where { + let bits = isize::BITS as i128; + let i = self as i128; + ((i << 1) ^ (i >> (bits - 1))) as usize } } -pub fn to_i128(u: u128) -> i128 { - ((u >> 1) as i128) ^ (-((u & 1) as i128)) +#[cfg(feature = "num-bigint")] +impl ZigZag for BigUint { + type Zag = BigInt; + + fn zigzag(self) -> Self::Zag where { + let i = self.to_bigint().expect("always possible"); + (i.clone() >> 1) ^ -(i & >::into(1)) + } +} + +impl ZigZag for usize { + type Zag = isize; + + fn zigzag(self) -> Self::Zag where { + ((self >> 1) as isize) ^ -((self & 1) as isize) + } } diff --git a/pallas-codec/tests/flat.rs b/pallas-codec/tests/flat.rs index 9d503b6..e63724f 100644 --- a/pallas-codec/tests/flat.rs +++ b/pallas-codec/tests/flat.rs @@ -8,6 +8,37 @@ prop_compose! { } } +#[cfg(feature = "num-bigint")] +mod bigint { + use super::arb_big_vec; + use num_bigint::{BigInt, Sign}; + use pallas_codec::flat::{decode, encode}; + use proptest::prelude::*; + + prop_compose! { + fn arb_isize()(i: isize) -> BigInt { + i.into() + } + } + + fn arb_bigint() -> impl Strategy { + prop_oneof![ + arb_isize(), + arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Plus, &xs)), + arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Minus, &xs)) + ] + } + + proptest! { + #[test] + fn encode_bigint(x in arb_bigint()) { + let bytes = encode(&x).unwrap(); + let decoded: BigInt = decode(&bytes).unwrap(); + assert_eq!(decoded, x); + } + } +} + #[test] fn encode_bool() { let bytes = encode(&true).unwrap(); diff --git a/pallas-codec/tests/zigzag.rs b/pallas-codec/tests/zigzag.rs index 2901b74..f859a01 100644 --- a/pallas-codec/tests/zigzag.rs +++ b/pallas-codec/tests/zigzag.rs @@ -1,18 +1,18 @@ -use pallas_codec::flat::zigzag::{to_isize, to_usize}; +use pallas_codec::flat::zigzag::ZigZag; use proptest::prelude::*; proptest! { #[test] fn zigzag(i: isize) { - let u = to_usize(i); - let converted_i = to_isize(u); + let u = i.zigzag(); + let converted_i = u.zigzag(); assert_eq!(converted_i, i); } #[test] fn zagzig(u: usize) { - let i = to_isize(u); - let converted_u = to_usize(i); + let i = u.zigzag(); + let converted_u = i.zigzag(); assert_eq!(converted_u, u); } }