fix(codec): Fix flat encoding and decoding of arbitrarily size integers (#378)

This commits fixes the flat encoding and decoding (and consequently,
  the zigzag) for large integers in the following ways:

  - It removes support for encoding and decoding i128 values.

  - It optionally (feature = "num-bigint") introduces encoding and
    decoding of large sized integers through the num-bigint::BigInt
    type.

  Without the feature enabled, it is still possible to encode and decode
  isize values; but the use of i128 is now prohibited (as it would
  overflow on boundaries) in favor of arbitrarily sized integers.

  The commit also introduces a missing property roundtrip for encoding
  and decoding large integers, which was missing and thus, failed to
  identify the overflow problem.

  See related issue: https://github.com/aiken-lang/aiken/issues/796
This commit is contained in:
Matthias Benkort 2024-01-13 14:09:16 +01:00 committed by GitHub
parent 14e0809ea2
commit 7cb1ffe100
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 133 additions and 54 deletions

View file

@ -14,9 +14,13 @@ authors = [
"Kasey White <kwhitemsg@gmail.com>", "Kasey White <kwhitemsg@gmail.com>",
] ]
[features]
default = []
[dependencies] [dependencies]
hex = "0.4.3" hex = "0.4.3"
minicbor = { version = "0.20", features = ["std", "half", "derive"] } minicbor = { version = "0.20", features = ["std", "half", "derive"] }
num-bigint = { version = "0.4.4", optional = true }
serde = { version = "1.0.143", features = ["derive"] } serde = { version = "1.0.143", features = ["derive"] }
thiserror = "1.0.39" thiserror = "1.0.39"

View file

@ -1,7 +1,9 @@
use super::Decode; use super::Decode;
use crate::flat::zigzag;
use super::Error; use super::Error;
use crate::flat::zigzag::ZigZag;
#[cfg(feature = "num-bigint")]
use num_bigint::{BigInt, BigUint};
#[derive(Debug)] #[derive(Debug)]
pub struct Decoder<'b> { pub struct Decoder<'b> {
@ -24,7 +26,8 @@ impl<'b> Decoder<'b> {
T::decode(self) T::decode(self)
} }
/// Decode an integer of any size. /// Decode an isize integer.
///
/// This is byte alignment agnostic. /// This is byte alignment agnostic.
/// First we decode the next 8 bits of the buffer. /// First we decode the next 8 bits of the buffer.
/// We take the 7 least significant bits as the 7 least significant bits of /// We take the 7 least significant bits as the 7 least significant bits of
@ -35,10 +38,11 @@ impl<'b> Decoder<'b> {
/// any more bits. Finally we use zigzag to convert the unsigned integer /// any more bits. Finally we use zigzag to convert the unsigned integer
/// back to a signed integer. /// back to a signed integer.
pub fn integer(&mut self) -> Result<isize, Error> { pub fn integer(&mut self) -> Result<isize, Error> {
Ok(zigzag::to_isize(self.word()?)) Ok(self.word()?.zigzag())
} }
/// Decode an integer of 128 bits size. /// Decode an integer of an arbitrary size..
///
/// This is byte alignment agnostic. /// This is byte alignment agnostic.
/// First we decode the next 8 bits of the buffer. /// First we decode the next 8 bits of the buffer.
/// We take the 7 least significant bits as the 7 least significant bits of /// We take the 7 least significant bits as the 7 least significant bits of
@ -48,8 +52,9 @@ impl<'b> Decoder<'b> {
/// so on. If the most significant bit was instead 0 we stop decoding /// so on. If the most significant bit was instead 0 we stop decoding
/// any more bits. Finally we use zigzag to convert the unsigned integer /// any more bits. Finally we use zigzag to convert the unsigned integer
/// back to a signed integer. /// back to a signed integer.
pub fn big_integer(&mut self) -> Result<i128, Error> { #[cfg(feature = "num-bigint")]
Ok(zigzag::to_i128(self.big_word()?)) pub fn big_integer(&mut self) -> Result<BigInt, Error> {
Ok(self.big_word()?.zigzag())
} }
/// Decode a single bit of the buffer to get a bool. /// Decode a single bit of the buffer to get a bool.
@ -162,15 +167,16 @@ impl<'b> Decoder<'b> {
/// filling in the next 7 least significant bits of the unsigned integer and /// filling in the next 7 least significant bits of the unsigned integer and
/// so on. If the most significant bit was instead 0 we stop decoding /// so on. If the most significant bit was instead 0 we stop decoding
/// any more bits. /// any more bits.
pub fn big_word(&mut self) -> Result<u128, Error> { #[cfg(feature = "num-bigint")]
pub fn big_word(&mut self) -> Result<BigUint, Error> {
let mut leading_bit = 1; let mut leading_bit = 1;
let mut final_word: u128 = 0; let mut final_word: BigUint = (0 as u8).into();
let mut shl: u128 = 0; let mut shl: u128 = 0;
// continue looping if lead bit is 1 which is 128 as a u8 otherwise exit // continue looping if lead bit is 1 which is 128 as a u8 otherwise exit
while leading_bit > 0 { while leading_bit > 0 {
let word8 = self.bits8(8)?; let word8 = self.bits8(8)?;
let word7 = word8 & 127; let word7 = word8 & 127;
final_word |= (word7 as u128) << shl; final_word |= <u8 as Into<BigUint>>::into(word7) << shl;
shl += 7; shl += 7;
leading_bit = word8 & 128; leading_bit = word8 & 128;
} }

View file

@ -3,6 +3,9 @@ mod error;
use crate::flat::filler::Filler; use crate::flat::filler::Filler;
#[cfg(feature = "num-bigint")]
use num_bigint::BigInt;
pub use decoder::Decoder; pub use decoder::Decoder;
pub use error::Error; pub use error::Error;
@ -36,9 +39,10 @@ impl Decode<'_> for isize {
} }
} }
impl Decode<'_> for i128 { #[cfg(feature = "num-bigint")]
impl Decode<'_> for BigInt {
fn decode(d: &mut Decoder) -> Result<Self, Error> { fn decode(d: &mut Decoder) -> Result<Self, Error> {
d.big_integer() Ok(d.big_integer()?.into())
} }
} }

View file

@ -1,7 +1,9 @@
use super::Encode; use super::Encode;
use crate::flat::zigzag;
use super::Error; use super::Error;
use crate::flat::zigzag::ZigZag;
#[cfg(feature = "num-bigint")]
use num_bigint::{BigInt, BigUint};
pub struct Encoder { pub struct Encoder {
pub buffer: Vec<u8>, pub buffer: Vec<u8>,
@ -89,7 +91,8 @@ impl Encoder {
Ok(self) Ok(self)
} }
/// Encode an integer of any size. /// Encode an isize integer.
///
/// This is byte alignment agnostic. /// This is byte alignment agnostic.
/// First we use zigzag once to double the number and encode the negative /// First we use zigzag once to double the number and encode the negative
/// sign as the least significant bit. Next we encode the 7 least /// sign as the least significant bit. Next we encode the 7 least
@ -97,25 +100,21 @@ impl Encoder {
/// 127 we encode a leading 1 followed by repeating the encoding above for /// 127 we encode a leading 1 followed by repeating the encoding above for
/// the next 7 bits and so on. /// the next 7 bits and so on.
pub fn integer(&mut self, i: isize) -> &mut Self { pub fn integer(&mut self, i: isize) -> &mut Self {
let i = zigzag::to_usize(i); self.word(i.zigzag());
self.word(i);
self self
} }
/// Encode an integer of 128 bits size. /// Encode an arbitrarily sized integer.
///
/// This is byte alignment agnostic. /// This is byte alignment agnostic.
/// First we use zigzag once to double the number and encode the negative /// First we use zigzag once to double the number and encode the negative
/// sign as the least significant bit. Next we encode the 7 least /// sign as the least significant bit. Next we encode the 7 least
/// significant bits of the unsigned integer. If the number is greater than /// significant bits of the unsigned integer. If the number is greater than
/// 127 we encode a leading 1 followed by repeating the encoding above for /// 127 we encode a leading 1 followed by repeating the encoding above for
/// the next 7 bits and so on. /// the next 7 bits and so on.
pub fn big_integer(&mut self, i: i128) -> &mut Self { #[cfg(feature = "num-bigint")]
let i = zigzag::to_u128(i); pub fn big_integer(&mut self, i: BigInt) -> &mut Self {
self.big_word(i.zigzag());
self.big_word(i);
self self
} }
@ -181,18 +180,25 @@ impl Encoder {
/// We encode the 7 least significant bits of the unsigned byte. If the char /// We encode the 7 least significant bits of the unsigned byte. If the char
/// value is greater than 127 we encode a leading 1 followed by /// value is greater than 127 we encode a leading 1 followed by
/// repeating the above for the next 7 bits and so on. /// repeating the above for the next 7 bits and so on.
pub fn big_word(&mut self, c: u128) -> &mut Self { #[cfg(feature = "num-bigint")]
pub fn big_word(&mut self, c: BigUint) -> &mut Self {
let mut d = c; let mut d = c;
let zero = (0 as u8).into();
loop { loop {
let mut w = (d & 127) as u8; let m: usize = 127;
let mut w = (d.clone() & <usize as Into<BigUint>>::into(m))
.to_bytes_be()
.pop()
.unwrap();
d >>= 7; d >>= 7;
if d != 0 { if d != zero {
w |= 128; w |= 128;
} }
self.bits(8, w); self.bits(8, w);
if d == 0 { if d == zero {
break; break;
} }
} }

View file

@ -3,6 +3,9 @@ mod error;
use crate::flat::filler::Filler; use crate::flat::filler::Filler;
#[cfg(feature = "num-bigint")]
use num_bigint::BigInt;
pub use encoder::Encoder; pub use encoder::Encoder;
pub use error::Error; pub use error::Error;
@ -26,9 +29,10 @@ impl Encode for u8 {
} }
} }
impl Encode for i128 { #[cfg(feature = "num-bigint")]
impl Encode for BigInt {
fn encode(&self, e: &mut Encoder) -> Result<(), Error> { fn encode(&self, e: &mut Encoder) -> Result<(), Error> {
e.big_integer(*self); e.big_integer(self.clone());
Ok(()) Ok(())
} }

View file

@ -1,27 +1,51 @@
pub fn to_usize(x: isize) -> usize { #[cfg(feature = "num-bigint")]
let double_x = x << 1; use num_bigint::{BigInt, BigUint, ToBigInt};
if x.is_positive() || x == 0 { pub trait ZigZag {
double_x as usize type Zag;
} else { fn zigzag(self) -> Self::Zag;
(-double_x - 1) as usize }
#[cfg(feature = "num-bigint")]
impl ZigZag for BigInt {
type Zag = BigUint;
fn zigzag(self) -> Self::Zag where {
if self >= 0.into() {
self << 1
} else {
let double: BigInt = self << 1;
-double - <u8 as Into<BigInt>>::into(1)
}
.to_biguint()
.expect("number is positive")
} }
} }
pub fn to_isize(u: usize) -> isize { impl ZigZag for isize {
((u >> 1) as isize) ^ (-((u & 1) as isize)) type Zag = usize;
}
pub fn to_u128(x: i128) -> u128 { fn zigzag(self) -> Self::Zag where {
let double_x = x << 1; let bits = isize::BITS as i128;
let i = self as i128;
if x.is_positive() || x == 0 { ((i << 1) ^ (i >> (bits - 1))) as usize
double_x as u128
} else {
(-double_x - 1) as u128
} }
} }
pub fn to_i128(u: u128) -> i128 { #[cfg(feature = "num-bigint")]
((u >> 1) as i128) ^ (-((u & 1) as i128)) impl ZigZag for BigUint {
type Zag = BigInt;
fn zigzag(self) -> Self::Zag where {
let i = self.to_bigint().expect("always possible");
(i.clone() >> 1) ^ -(i & <u8 as Into<BigInt>>::into(1))
}
}
impl ZigZag for usize {
type Zag = isize;
fn zigzag(self) -> Self::Zag where {
((self >> 1) as isize) ^ -((self & 1) as isize)
}
} }

View file

@ -8,6 +8,37 @@ prop_compose! {
} }
} }
#[cfg(feature = "num-bigint")]
mod bigint {
use super::arb_big_vec;
use num_bigint::{BigInt, Sign};
use pallas_codec::flat::{decode, encode};
use proptest::prelude::*;
prop_compose! {
fn arb_isize()(i: isize) -> BigInt {
i.into()
}
}
fn arb_bigint() -> impl Strategy<Value = BigInt> {
prop_oneof![
arb_isize(),
arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Plus, &xs)),
arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Minus, &xs))
]
}
proptest! {
#[test]
fn encode_bigint(x in arb_bigint()) {
let bytes = encode(&x).unwrap();
let decoded: BigInt = decode(&bytes).unwrap();
assert_eq!(decoded, x);
}
}
}
#[test] #[test]
fn encode_bool() { fn encode_bool() {
let bytes = encode(&true).unwrap(); let bytes = encode(&true).unwrap();

View file

@ -1,18 +1,18 @@
use pallas_codec::flat::zigzag::{to_isize, to_usize}; use pallas_codec::flat::zigzag::ZigZag;
use proptest::prelude::*; use proptest::prelude::*;
proptest! { proptest! {
#[test] #[test]
fn zigzag(i: isize) { fn zigzag(i: isize) {
let u = to_usize(i); let u = i.zigzag();
let converted_i = to_isize(u); let converted_i = u.zigzag();
assert_eq!(converted_i, i); assert_eq!(converted_i, i);
} }
#[test] #[test]
fn zagzig(u: usize) { fn zagzig(u: usize) {
let i = to_isize(u); let i = u.zigzag();
let converted_u = to_usize(i); let converted_u = i.zigzag();
assert_eq!(converted_u, u); assert_eq!(converted_u, u);
} }
} }