fix(codec): Fix flat encoding and decoding of arbitrarily size integers (#378)

This commits fixes the flat encoding and decoding (and consequently, the zigzag) for large integers in the following ways: - It removes support for encoding and decoding i128 values. - It optionally (feature = "num-bigint") introduces encoding and decoding of large sized integers through the num-bigint::BigInt type. Without the feature enabled, it is still possible to encode and decode isize values; but the use of i128 is now prohibited (as it would overflow on boundaries) in favor of arbitrarily sized integers. The commit also introduces a missing property roundtrip for encoding and decoding large integers, which was missing and thus, failed to identify the overflow problem. See related issue: https://github.com/aiken-lang/aiken/issues/796
2024-01-13 14:09:16 +01:00 · 2024-01-13 14:09:16 +01:00 · 7cb1ffe100
commit 7cb1ffe100
parent 14e0809ea2
8 changed files with 133 additions and 54 deletions
--- a/pallas-codec/Cargo.toml
+++ b/pallas-codec/Cargo.toml
@ -14,9 +14,13 @@ authors = [
  "Kasey White <kwhitemsg@gmail.com>",
 ]

+[features]
+default = []
+
 [dependencies]
 hex = "0.4.3"
 minicbor = { version = "0.20", features = ["std", "half", "derive"] }
+num-bigint = { version = "0.4.4", optional = true }
 serde = { version = "1.0.143", features = ["derive"] }
 thiserror = "1.0.39"

--- a/pallas-codec/src/flat/decode/decoder.rs
+++ b/pallas-codec/src/flat/decode/decoder.rs
@ -1,7 +1,9 @@
 use super::Decode;
-use crate::flat::zigzag;
-
 use super::Error;
+use crate::flat::zigzag::ZigZag;
+
+#[cfg(feature = "num-bigint")]
+use num_bigint::{BigInt, BigUint};

 #[derive(Debug)]
 pub struct Decoder<'b> {
@ -24,7 +26,8 @@ impl<'b> Decoder<'b> {
        T::decode(self)
    }

-    /// Decode an integer of any size.
+    /// Decode an isize integer.
+    ///
    /// This is byte alignment agnostic.
    /// First we decode the next 8 bits of the buffer.
    /// We take the 7 least significant bits as the 7 least significant bits of
@ -35,10 +38,11 @@ impl<'b> Decoder<'b> {
    /// any more bits. Finally we use zigzag to convert the unsigned integer
    /// back to a signed integer.
    pub fn integer(&mut self) -> Result<isize, Error> {
-        Ok(zigzag::to_isize(self.word()?))
+        Ok(self.word()?.zigzag())
    }

-    /// Decode an integer of 128 bits size.
+    /// Decode an integer of an arbitrary size..
+    ///
    /// This is byte alignment agnostic.
    /// First we decode the next 8 bits of the buffer.
    /// We take the 7 least significant bits as the 7 least significant bits of
@ -48,8 +52,9 @@ impl<'b> Decoder<'b> {
    /// so on. If the most significant bit was instead 0 we stop decoding
    /// any more bits. Finally we use zigzag to convert the unsigned integer
    /// back to a signed integer.
-    pub fn big_integer(&mut self) -> Result<i128, Error> {
-        Ok(zigzag::to_i128(self.big_word()?))
+    #[cfg(feature = "num-bigint")]
+    pub fn big_integer(&mut self) -> Result<BigInt, Error> {
+        Ok(self.big_word()?.zigzag())
    }

    /// Decode a single bit of the buffer to get a bool.
@ -162,15 +167,16 @@ impl<'b> Decoder<'b> {
    /// filling in the next 7 least significant bits of the unsigned integer and
    /// so on. If the most significant bit was instead 0 we stop decoding
    /// any more bits.
-    pub fn big_word(&mut self) -> Result<u128, Error> {
+    #[cfg(feature = "num-bigint")]
+    pub fn big_word(&mut self) -> Result<BigUint, Error> {
        let mut leading_bit = 1;
-        let mut final_word: u128 = 0;
+        let mut final_word: BigUint = (0 as u8).into();
        let mut shl: u128 = 0;
        // continue looping if lead bit is 1 which is 128 as a u8 otherwise exit
        while leading_bit > 0 {
            let word8 = self.bits8(8)?;
            let word7 = word8 & 127;
-            final_word |= (word7 as u128) << shl;
+            final_word |= <u8 as Into<BigUint>>::into(word7) << shl;
            shl += 7;
            leading_bit = word8 & 128;
        }
--- a/pallas-codec/src/flat/decode/mod.rs
+++ b/pallas-codec/src/flat/decode/mod.rs
@ -3,6 +3,9 @@ mod error;

 use crate::flat::filler::Filler;

+#[cfg(feature = "num-bigint")]
+use num_bigint::BigInt;
+
 pub use decoder::Decoder;
 pub use error::Error;

@ -36,9 +39,10 @@ impl Decode<'_> for isize {
    }
 }

-impl Decode<'_> for i128 {
+#[cfg(feature = "num-bigint")]
+impl Decode<'_> for BigInt {
    fn decode(d: &mut Decoder) -> Result<Self, Error> {
-        d.big_integer()
+        Ok(d.big_integer()?.into())
    }
 }

--- a/pallas-codec/src/flat/encode/encoder.rs
+++ b/pallas-codec/src/flat/encode/encoder.rs
@ -1,7 +1,9 @@
 use super::Encode;
-use crate::flat::zigzag;
-
 use super::Error;
+use crate::flat::zigzag::ZigZag;
+
+#[cfg(feature = "num-bigint")]
+use num_bigint::{BigInt, BigUint};

 pub struct Encoder {
    pub buffer: Vec<u8>,
@ -89,7 +91,8 @@ impl Encoder {
        Ok(self)
    }

-    /// Encode an integer of any size.
+    /// Encode an isize integer.
+    ///
    /// This is byte alignment agnostic.
    /// First we use zigzag once to double the number and encode the negative
    /// sign as the least significant bit. Next we encode the 7 least
@ -97,25 +100,21 @@ impl Encoder {
    /// 127 we encode a leading 1 followed by repeating the encoding above for
    /// the next 7 bits and so on.
    pub fn integer(&mut self, i: isize) -> &mut Self {
-        let i = zigzag::to_usize(i);
-
-        self.word(i);
-
+        self.word(i.zigzag());
        self
    }

-    /// Encode an integer of 128 bits size.
+    /// Encode an arbitrarily sized integer.
+    ///
    /// This is byte alignment agnostic.
    /// First we use zigzag once to double the number and encode the negative
    /// sign as the least significant bit. Next we encode the 7 least
    /// significant bits of the unsigned integer. If the number is greater than
    /// 127 we encode a leading 1 followed by repeating the encoding above for
    /// the next 7 bits and so on.
-    pub fn big_integer(&mut self, i: i128) -> &mut Self {
-        let i = zigzag::to_u128(i);
-
-        self.big_word(i);
-
+    #[cfg(feature = "num-bigint")]
+    pub fn big_integer(&mut self, i: BigInt) -> &mut Self {
+        self.big_word(i.zigzag());
        self
    }

@ -181,18 +180,25 @@ impl Encoder {
    /// We encode the 7 least significant bits of the unsigned byte. If the char
    /// value is greater than 127 we encode a leading 1 followed by
    /// repeating the above for the next 7 bits and so on.
-    pub fn big_word(&mut self, c: u128) -> &mut Self {
+    #[cfg(feature = "num-bigint")]
+    pub fn big_word(&mut self, c: BigUint) -> &mut Self {
        let mut d = c;
+        let zero = (0 as u8).into();
        loop {
-            let mut w = (d & 127) as u8;
+            let m: usize = 127;
+            let mut w = (d.clone() & <usize as Into<BigUint>>::into(m))
+                .to_bytes_be()
+                .pop()
+                .unwrap();
+
            d >>= 7;

-            if d != 0 {
+            if d != zero {
                w |= 128;
            }
            self.bits(8, w);

-            if d == 0 {
+            if d == zero {
                break;
            }
        }
--- a/pallas-codec/src/flat/encode/mod.rs
+++ b/pallas-codec/src/flat/encode/mod.rs
@ -3,6 +3,9 @@ mod error;

 use crate::flat::filler::Filler;

+#[cfg(feature = "num-bigint")]
+use num_bigint::BigInt;
+
 pub use encoder::Encoder;
 pub use error::Error;

@ -26,9 +29,10 @@ impl Encode for u8 {
    }
 }

-impl Encode for i128 {
+#[cfg(feature = "num-bigint")]
+impl Encode for BigInt {
    fn encode(&self, e: &mut Encoder) -> Result<(), Error> {
-        e.big_integer(*self);
+        e.big_integer(self.clone());

        Ok(())
    }
--- a/pallas-codec/src/flat/zigzag.rs
+++ b/pallas-codec/src/flat/zigzag.rs
@ -1,27 +1,51 @@
-pub fn to_usize(x: isize) -> usize {
-    let double_x = x << 1;
+#[cfg(feature = "num-bigint")]
+use num_bigint::{BigInt, BigUint, ToBigInt};

-    if x.is_positive() || x == 0 {
-        double_x as usize
-    } else {
-        (-double_x - 1) as usize
+pub trait ZigZag {
+    type Zag;
+    fn zigzag(self) -> Self::Zag;
+}
+
+#[cfg(feature = "num-bigint")]
+impl ZigZag for BigInt {
+    type Zag = BigUint;
+
+    fn zigzag(self) -> Self::Zag where {
+        if self >= 0.into() {
+            self << 1
+        } else {
+            let double: BigInt = self << 1;
+            -double - <u8 as Into<BigInt>>::into(1)
+        }
+        .to_biguint()
+        .expect("number is positive")
    }
 }

-pub fn to_isize(u: usize) -> isize {
-    ((u >> 1) as isize) ^ (-((u & 1) as isize))
-}
+impl ZigZag for isize {
+    type Zag = usize;

-pub fn to_u128(x: i128) -> u128 {
-    let double_x = x << 1;
-
-    if x.is_positive() || x == 0 {
-        double_x as u128
-    } else {
-        (-double_x - 1) as u128
+    fn zigzag(self) -> Self::Zag where {
+        let bits = isize::BITS as i128;
+        let i = self as i128;
+        ((i << 1) ^ (i >> (bits - 1))) as usize
    }
 }

-pub fn to_i128(u: u128) -> i128 {
-    ((u >> 1) as i128) ^ (-((u & 1) as i128))
+#[cfg(feature = "num-bigint")]
+impl ZigZag for BigUint {
+    type Zag = BigInt;
+
+    fn zigzag(self) -> Self::Zag where {
+        let i = self.to_bigint().expect("always possible");
+        (i.clone() >> 1) ^ -(i & <u8 as Into<BigInt>>::into(1))
+    }
+}
+
+impl ZigZag for usize {
+    type Zag = isize;
+
+    fn zigzag(self) -> Self::Zag where {
+        ((self >> 1) as isize) ^ -((self & 1) as isize)
+    }
 }
--- a/pallas-codec/tests/flat.rs
+++ b/pallas-codec/tests/flat.rs
@ -8,6 +8,37 @@ prop_compose! {
    }
 }

+#[cfg(feature = "num-bigint")]
+mod bigint {
+    use super::arb_big_vec;
+    use num_bigint::{BigInt, Sign};
+    use pallas_codec::flat::{decode, encode};
+    use proptest::prelude::*;
+
+    prop_compose! {
+        fn arb_isize()(i: isize) -> BigInt {
+            i.into()
+        }
+    }
+
+    fn arb_bigint() -> impl Strategy<Value = BigInt> {
+        prop_oneof![
+            arb_isize(),
+            arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Plus, &xs)),
+            arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Minus, &xs))
+        ]
+    }
+
+    proptest! {
+        #[test]
+        fn encode_bigint(x in arb_bigint()) {
+            let bytes = encode(&x).unwrap();
+            let decoded: BigInt = decode(&bytes).unwrap();
+            assert_eq!(decoded, x);
+        }
+    }
+}
+
 #[test]
 fn encode_bool() {
    let bytes = encode(&true).unwrap();
--- a/pallas-codec/tests/zigzag.rs
+++ b/pallas-codec/tests/zigzag.rs
@ -1,18 +1,18 @@
-use pallas_codec::flat::zigzag::{to_isize, to_usize};
+use pallas_codec::flat::zigzag::ZigZag;
 use proptest::prelude::*;

 proptest! {
    #[test]
    fn zigzag(i: isize) {
-        let u = to_usize(i);
-        let converted_i = to_isize(u);
+        let u = i.zigzag();
+        let converted_i = u.zigzag();
        assert_eq!(converted_i, i);
    }

    #[test]
    fn zagzig(u: usize) {
-        let i = to_isize(u);
-        let converted_u = to_usize(i);
+        let i = u.zigzag();
+        let converted_u = i.zigzag();
        assert_eq!(converted_u, u);
    }
 }