fix(codec): Fix flat encoding and decoding of arbitrarily size integers (#378)

This commits fixes the flat encoding and decoding (and consequently, the zigzag) for large integers in the following ways: - It removes support for encoding and decoding i128 values. - It optionally (feature = "num-bigint") introduces encoding and decoding of large sized integers through the num-bigint::BigInt type. Without the feature enabled, it is still possible to encode and decode isize values; but the use of i128 is now prohibited (as it would overflow on boundaries) in favor of arbitrarily sized integers. The commit also introduces a missing property roundtrip for encoding and decoding large integers, which was missing and thus, failed to identify the overflow problem. See related issue: https://github.com/aiken-lang/aiken/issues/796
2024-01-13 14:09:16 +01:00 · 2024-01-13 14:09:16 +01:00 · 7cb1ffe100
commit 7cb1ffe100
parent 14e0809ea2
8 changed files with 133 additions and 54 deletions
--- a/pallas-codec/Cargo.toml
+++ b/pallas-codec/Cargo.toml
@ -14,9 +14,13 @@ authors = [
  "Kasey White <kwhitemsg@gmail.com>",
 ]
 [features]
 default = []
 [dependencies]
 hex = "0.4.3"
 minicbor = { version = "0.20", features = ["std", "half", "derive"] }
 num-bigint = { version = "0.4.4", optional = true }
 serde = { version = "1.0.143", features = ["derive"] }
 thiserror = "1.0.39"
--- a/pallas-codec/src/flat/decode/decoder.rs
+++ b/pallas-codec/src/flat/decode/decoder.rs
@ -1,7 +1,9 @@
 use super::Decode;
 use crate::flat::zigzag;
 use super::Error;
 use crate::flat::zigzag::ZigZag;
 #[cfg(feature = "num-bigint")]
 use num_bigint::{BigInt, BigUint};
 #[derive(Debug)]
 pub struct Decoder<'b> {
@ -24,7 +26,8 @@ impl<'b> Decoder<'b> {
        T::decode(self)
    }
-    /// Decode an integer of any size.
+    /// Decode an isize integer.
    ///
    /// This is byte alignment agnostic.
    /// First we decode the next 8 bits of the buffer.
    /// We take the 7 least significant bits as the 7 least significant bits of
@ -35,10 +38,11 @@ impl<'b> Decoder<'b> {
    /// any more bits. Finally we use zigzag to convert the unsigned integer
    /// back to a signed integer.
    pub fn integer(&mut self) -> Result<isize, Error> {
-        Ok(zigzag::to_isize(self.word()?))
+        Ok(self.word()?.zigzag())
    }
-    /// Decode an integer of 128 bits size.
+    /// Decode an integer of an arbitrary size..
    ///
    /// This is byte alignment agnostic.
    /// First we decode the next 8 bits of the buffer.
    /// We take the 7 least significant bits as the 7 least significant bits of
@ -48,8 +52,9 @@ impl<'b> Decoder<'b> {
    /// so on. If the most significant bit was instead 0 we stop decoding
    /// any more bits. Finally we use zigzag to convert the unsigned integer
    /// back to a signed integer.
-    pub fn big_integer(&mut self) -> Result<i128, Error> {
+    #[cfg(feature = "num-bigint")]
-        Ok(zigzag::to_i128(self.big_word()?))
+    pub fn big_integer(&mut self) -> Result<BigInt, Error> {
        Ok(self.big_word()?.zigzag())
    }
    /// Decode a single bit of the buffer to get a bool.
@ -162,15 +167,16 @@ impl<'b> Decoder<'b> {
    /// filling in the next 7 least significant bits of the unsigned integer and
    /// so on. If the most significant bit was instead 0 we stop decoding
    /// any more bits.
-    pub fn big_word(&mut self) -> Result<u128, Error> {
+    #[cfg(feature = "num-bigint")]
    pub fn big_word(&mut self) -> Result<BigUint, Error> {
        let mut leading_bit = 1;
-        let mut final_word: u128 = 0;
+        let mut final_word: BigUint = (0 as u8).into();
        let mut shl: u128 = 0;
        // continue looping if lead bit is 1 which is 128 as a u8 otherwise exit
        while leading_bit > 0 {
            let word8 = self.bits8(8)?;
            let word7 = word8 & 127;
-            final_word |= (word7 as u128) << shl;
+            final_word |= <u8 as Into<BigUint>>::into(word7) << shl;
            shl += 7;
            leading_bit = word8 & 128;
        }
--- a/pallas-codec/src/flat/decode/mod.rs
+++ b/pallas-codec/src/flat/decode/mod.rs
@ -3,6 +3,9 @@ mod error;
 use crate::flat::filler::Filler;
 #[cfg(feature = "num-bigint")]
 use num_bigint::BigInt;
 pub use decoder::Decoder;
 pub use error::Error;
@ -36,9 +39,10 @@ impl Decode<'_> for isize {
    }
 }
-impl Decode<'_> for i128 {
+#[cfg(feature = "num-bigint")]
 impl Decode<'_> for BigInt {
    fn decode(d: &mut Decoder) -> Result<Self, Error> {
-        d.big_integer()
+        Ok(d.big_integer()?.into())
    }
 }
--- a/pallas-codec/src/flat/encode/encoder.rs
+++ b/pallas-codec/src/flat/encode/encoder.rs
@ -1,7 +1,9 @@
 use super::Encode;
 use crate::flat::zigzag;
 use super::Error;
 use crate::flat::zigzag::ZigZag;
 #[cfg(feature = "num-bigint")]
 use num_bigint::{BigInt, BigUint};
 pub struct Encoder {
    pub buffer: Vec<u8>,
@ -89,7 +91,8 @@ impl Encoder {
        Ok(self)
    }
-    /// Encode an integer of any size.
+    /// Encode an isize integer.
    ///
    /// This is byte alignment agnostic.
    /// First we use zigzag once to double the number and encode the negative
    /// sign as the least significant bit. Next we encode the 7 least
@ -97,25 +100,21 @@ impl Encoder {
    /// 127 we encode a leading 1 followed by repeating the encoding above for
    /// the next 7 bits and so on.
    pub fn integer(&mut self, i: isize) -> &mut Self {
-        let i = zigzag::to_usize(i);
+        self.word(i.zigzag());
        self.word(i);
        self
    }
-    /// Encode an integer of 128 bits size.
+    /// Encode an arbitrarily sized integer.
    ///
    /// This is byte alignment agnostic.
    /// First we use zigzag once to double the number and encode the negative
    /// sign as the least significant bit. Next we encode the 7 least
    /// significant bits of the unsigned integer. If the number is greater than
    /// 127 we encode a leading 1 followed by repeating the encoding above for
    /// the next 7 bits and so on.
-    pub fn big_integer(&mut self, i: i128) -> &mut Self {
+    #[cfg(feature = "num-bigint")]
-        let i = zigzag::to_u128(i);
+    pub fn big_integer(&mut self, i: BigInt) -> &mut Self {
-
+        self.big_word(i.zigzag());
        self.big_word(i);
        self
    }
@ -181,18 +180,25 @@ impl Encoder {
    /// We encode the 7 least significant bits of the unsigned byte. If the char
    /// value is greater than 127 we encode a leading 1 followed by
    /// repeating the above for the next 7 bits and so on.
-    pub fn big_word(&mut self, c: u128) -> &mut Self {
+    #[cfg(feature = "num-bigint")]
    pub fn big_word(&mut self, c: BigUint) -> &mut Self {
        let mut d = c;
        let zero = (0 as u8).into();
        loop {
-            let mut w = (d & 127) as u8;
+            let m: usize = 127;
            let mut w = (d.clone() & <usize as Into<BigUint>>::into(m))
                .to_bytes_be()
                .pop()
                .unwrap();
            d >>= 7;
-            if d != 0 {
+            if d != zero {
                w |= 128;
            }
            self.bits(8, w);
-            if d == 0 {
+            if d == zero {
                break;
            }
        }
--- a/pallas-codec/src/flat/encode/mod.rs
+++ b/pallas-codec/src/flat/encode/mod.rs
@ -3,6 +3,9 @@ mod error;
 use crate::flat::filler::Filler;
 #[cfg(feature = "num-bigint")]
 use num_bigint::BigInt;
 pub use encoder::Encoder;
 pub use error::Error;
@ -26,9 +29,10 @@ impl Encode for u8 {
    }
 }
-impl Encode for i128 {
+#[cfg(feature = "num-bigint")]
 impl Encode for BigInt {
    fn encode(&self, e: &mut Encoder) -> Result<(), Error> {
-        e.big_integer(*self);
+        e.big_integer(self.clone());
        Ok(())
    }
--- a/pallas-codec/src/flat/zigzag.rs
+++ b/pallas-codec/src/flat/zigzag.rs
@ -1,27 +1,51 @@
-pub fn to_usize(x: isize) -> usize {
+#[cfg(feature = "num-bigint")]
-    let double_x = x << 1;
+use num_bigint::{BigInt, BigUint, ToBigInt};
-    if x.is_positive() || x == 0 {
+pub trait ZigZag {
-        double_x as usize
+    type Zag;
-    } else {
+    fn zigzag(self) -> Self::Zag;
-        (-double_x - 1) as usize
+}
 #[cfg(feature = "num-bigint")]
 impl ZigZag for BigInt {
    type Zag = BigUint;
    fn zigzag(self) -> Self::Zag where {
        if self >= 0.into() {
            self << 1
        } else {
            let double: BigInt = self << 1;
            -double - <u8 as Into<BigInt>>::into(1)
        }
        .to_biguint()
        .expect("number is positive")
    }
 }
-pub fn to_isize(u: usize) -> isize {
+impl ZigZag for isize {
-    ((u >> 1) as isize) ^ (-((u & 1) as isize))
+    type Zag = usize;
 }
-pub fn to_u128(x: i128) -> u128 {
+    fn zigzag(self) -> Self::Zag where {
-    let double_x = x << 1;
+        let bits = isize::BITS as i128;
-
+        let i = self as i128;
-    if x.is_positive() || x == 0 {
+        ((i << 1) ^ (i >> (bits - 1))) as usize
        double_x as u128
    } else {
        (-double_x - 1) as u128
    }
 }
-pub fn to_i128(u: u128) -> i128 {
+#[cfg(feature = "num-bigint")]
-    ((u >> 1) as i128) ^ (-((u & 1) as i128))
+impl ZigZag for BigUint {
    type Zag = BigInt;
    fn zigzag(self) -> Self::Zag where {
        let i = self.to_bigint().expect("always possible");
        (i.clone() >> 1) ^ -(i & <u8 as Into<BigInt>>::into(1))
    }
 }
 impl ZigZag for usize {
    type Zag = isize;
    fn zigzag(self) -> Self::Zag where {
        ((self >> 1) as isize) ^ -((self & 1) as isize)
    }
 }
--- a/pallas-codec/tests/flat.rs
+++ b/pallas-codec/tests/flat.rs
@ -8,6 +8,37 @@ prop_compose! {
    }
 }
 #[cfg(feature = "num-bigint")]
 mod bigint {
    use super::arb_big_vec;
    use num_bigint::{BigInt, Sign};
    use pallas_codec::flat::{decode, encode};
    use proptest::prelude::*;
    prop_compose! {
        fn arb_isize()(i: isize) -> BigInt {
            i.into()
        }
    }
    fn arb_bigint() -> impl Strategy<Value = BigInt> {
        prop_oneof![
            arb_isize(),
            arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Plus, &xs)),
            arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Minus, &xs))
        ]
    }
    proptest! {
        #[test]
        fn encode_bigint(x in arb_bigint()) {
            let bytes = encode(&x).unwrap();
            let decoded: BigInt = decode(&bytes).unwrap();
            assert_eq!(decoded, x);
        }
    }
 }
 #[test]
 fn encode_bool() {
    let bytes = encode(&true).unwrap();
--- a/pallas-codec/tests/zigzag.rs
+++ b/pallas-codec/tests/zigzag.rs
@ -1,18 +1,18 @@
-use pallas_codec::flat::zigzag::{to_isize, to_usize};
+use pallas_codec::flat::zigzag::ZigZag;
 use proptest::prelude::*;
 proptest! {
    #[test]
    fn zigzag(i: isize) {
-        let u = to_usize(i);
+        let u = i.zigzag();
-        let converted_i = to_isize(u);
+        let converted_i = u.zigzag();
        assert_eq!(converted_i, i);
    }
    #[test]
    fn zagzig(u: usize) {
-        let i = to_isize(u);
+        let i = u.zigzag();
-        let converted_u = to_usize(i);
+        let converted_u = i.zigzag();
        assert_eq!(converted_u, u);
    }
 }