fix(codec): Fix flat encoding and decoding of arbitrarily size integers (#378)

This commits fixes the flat encoding and decoding (and consequently,
  the zigzag) for large integers in the following ways:

  - It removes support for encoding and decoding i128 values.

  - It optionally (feature = "num-bigint") introduces encoding and
    decoding of large sized integers through the num-bigint::BigInt
    type.

  Without the feature enabled, it is still possible to encode and decode
  isize values; but the use of i128 is now prohibited (as it would
  overflow on boundaries) in favor of arbitrarily sized integers.

  The commit also introduces a missing property roundtrip for encoding
  and decoding large integers, which was missing and thus, failed to
  identify the overflow problem.

  See related issue: https://github.com/aiken-lang/aiken/issues/796
This commit is contained in:
Matthias Benkort 2024-01-13 14:09:16 +01:00 committed by GitHub
parent 14e0809ea2
commit 7cb1ffe100
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 133 additions and 54 deletions

View file

@ -14,9 +14,13 @@ authors = [
"Kasey White <kwhitemsg@gmail.com>",
]
[features]
default = []
[dependencies]
hex = "0.4.3"
minicbor = { version = "0.20", features = ["std", "half", "derive"] }
num-bigint = { version = "0.4.4", optional = true }
serde = { version = "1.0.143", features = ["derive"] }
thiserror = "1.0.39"

View file

@ -1,7 +1,9 @@
use super::Decode;
use crate::flat::zigzag;
use super::Error;
use crate::flat::zigzag::ZigZag;
#[cfg(feature = "num-bigint")]
use num_bigint::{BigInt, BigUint};
#[derive(Debug)]
pub struct Decoder<'b> {
@ -24,7 +26,8 @@ impl<'b> Decoder<'b> {
T::decode(self)
}
/// Decode an integer of any size.
/// Decode an isize integer.
///
/// This is byte alignment agnostic.
/// First we decode the next 8 bits of the buffer.
/// We take the 7 least significant bits as the 7 least significant bits of
@ -35,10 +38,11 @@ impl<'b> Decoder<'b> {
/// any more bits. Finally we use zigzag to convert the unsigned integer
/// back to a signed integer.
pub fn integer(&mut self) -> Result<isize, Error> {
Ok(zigzag::to_isize(self.word()?))
Ok(self.word()?.zigzag())
}
/// Decode an integer of 128 bits size.
/// Decode an integer of an arbitrary size..
///
/// This is byte alignment agnostic.
/// First we decode the next 8 bits of the buffer.
/// We take the 7 least significant bits as the 7 least significant bits of
@ -48,8 +52,9 @@ impl<'b> Decoder<'b> {
/// so on. If the most significant bit was instead 0 we stop decoding
/// any more bits. Finally we use zigzag to convert the unsigned integer
/// back to a signed integer.
pub fn big_integer(&mut self) -> Result<i128, Error> {
Ok(zigzag::to_i128(self.big_word()?))
#[cfg(feature = "num-bigint")]
pub fn big_integer(&mut self) -> Result<BigInt, Error> {
Ok(self.big_word()?.zigzag())
}
/// Decode a single bit of the buffer to get a bool.
@ -162,15 +167,16 @@ impl<'b> Decoder<'b> {
/// filling in the next 7 least significant bits of the unsigned integer and
/// so on. If the most significant bit was instead 0 we stop decoding
/// any more bits.
pub fn big_word(&mut self) -> Result<u128, Error> {
#[cfg(feature = "num-bigint")]
pub fn big_word(&mut self) -> Result<BigUint, Error> {
let mut leading_bit = 1;
let mut final_word: u128 = 0;
let mut final_word: BigUint = (0 as u8).into();
let mut shl: u128 = 0;
// continue looping if lead bit is 1 which is 128 as a u8 otherwise exit
while leading_bit > 0 {
let word8 = self.bits8(8)?;
let word7 = word8 & 127;
final_word |= (word7 as u128) << shl;
final_word |= <u8 as Into<BigUint>>::into(word7) << shl;
shl += 7;
leading_bit = word8 & 128;
}

View file

@ -3,6 +3,9 @@ mod error;
use crate::flat::filler::Filler;
#[cfg(feature = "num-bigint")]
use num_bigint::BigInt;
pub use decoder::Decoder;
pub use error::Error;
@ -36,9 +39,10 @@ impl Decode<'_> for isize {
}
}
impl Decode<'_> for i128 {
#[cfg(feature = "num-bigint")]
impl Decode<'_> for BigInt {
fn decode(d: &mut Decoder) -> Result<Self, Error> {
d.big_integer()
Ok(d.big_integer()?.into())
}
}

View file

@ -1,7 +1,9 @@
use super::Encode;
use crate::flat::zigzag;
use super::Error;
use crate::flat::zigzag::ZigZag;
#[cfg(feature = "num-bigint")]
use num_bigint::{BigInt, BigUint};
pub struct Encoder {
pub buffer: Vec<u8>,
@ -89,7 +91,8 @@ impl Encoder {
Ok(self)
}
/// Encode an integer of any size.
/// Encode an isize integer.
///
/// This is byte alignment agnostic.
/// First we use zigzag once to double the number and encode the negative
/// sign as the least significant bit. Next we encode the 7 least
@ -97,25 +100,21 @@ impl Encoder {
/// 127 we encode a leading 1 followed by repeating the encoding above for
/// the next 7 bits and so on.
pub fn integer(&mut self, i: isize) -> &mut Self {
let i = zigzag::to_usize(i);
self.word(i);
self.word(i.zigzag());
self
}
/// Encode an integer of 128 bits size.
/// Encode an arbitrarily sized integer.
///
/// This is byte alignment agnostic.
/// First we use zigzag once to double the number and encode the negative
/// sign as the least significant bit. Next we encode the 7 least
/// significant bits of the unsigned integer. If the number is greater than
/// 127 we encode a leading 1 followed by repeating the encoding above for
/// the next 7 bits and so on.
pub fn big_integer(&mut self, i: i128) -> &mut Self {
let i = zigzag::to_u128(i);
self.big_word(i);
#[cfg(feature = "num-bigint")]
pub fn big_integer(&mut self, i: BigInt) -> &mut Self {
self.big_word(i.zigzag());
self
}
@ -181,18 +180,25 @@ impl Encoder {
/// We encode the 7 least significant bits of the unsigned byte. If the char
/// value is greater than 127 we encode a leading 1 followed by
/// repeating the above for the next 7 bits and so on.
pub fn big_word(&mut self, c: u128) -> &mut Self {
#[cfg(feature = "num-bigint")]
pub fn big_word(&mut self, c: BigUint) -> &mut Self {
let mut d = c;
let zero = (0 as u8).into();
loop {
let mut w = (d & 127) as u8;
let m: usize = 127;
let mut w = (d.clone() & <usize as Into<BigUint>>::into(m))
.to_bytes_be()
.pop()
.unwrap();
d >>= 7;
if d != 0 {
if d != zero {
w |= 128;
}
self.bits(8, w);
if d == 0 {
if d == zero {
break;
}
}

View file

@ -3,6 +3,9 @@ mod error;
use crate::flat::filler::Filler;
#[cfg(feature = "num-bigint")]
use num_bigint::BigInt;
pub use encoder::Encoder;
pub use error::Error;
@ -26,9 +29,10 @@ impl Encode for u8 {
}
}
impl Encode for i128 {
#[cfg(feature = "num-bigint")]
impl Encode for BigInt {
fn encode(&self, e: &mut Encoder) -> Result<(), Error> {
e.big_integer(*self);
e.big_integer(self.clone());
Ok(())
}

View file

@ -1,27 +1,51 @@
pub fn to_usize(x: isize) -> usize {
let double_x = x << 1;
#[cfg(feature = "num-bigint")]
use num_bigint::{BigInt, BigUint, ToBigInt};
if x.is_positive() || x == 0 {
double_x as usize
} else {
(-double_x - 1) as usize
pub trait ZigZag {
type Zag;
fn zigzag(self) -> Self::Zag;
}
#[cfg(feature = "num-bigint")]
impl ZigZag for BigInt {
type Zag = BigUint;
fn zigzag(self) -> Self::Zag where {
if self >= 0.into() {
self << 1
} else {
let double: BigInt = self << 1;
-double - <u8 as Into<BigInt>>::into(1)
}
.to_biguint()
.expect("number is positive")
}
}
pub fn to_isize(u: usize) -> isize {
((u >> 1) as isize) ^ (-((u & 1) as isize))
}
impl ZigZag for isize {
type Zag = usize;
pub fn to_u128(x: i128) -> u128 {
let double_x = x << 1;
if x.is_positive() || x == 0 {
double_x as u128
} else {
(-double_x - 1) as u128
fn zigzag(self) -> Self::Zag where {
let bits = isize::BITS as i128;
let i = self as i128;
((i << 1) ^ (i >> (bits - 1))) as usize
}
}
pub fn to_i128(u: u128) -> i128 {
((u >> 1) as i128) ^ (-((u & 1) as i128))
#[cfg(feature = "num-bigint")]
impl ZigZag for BigUint {
type Zag = BigInt;
fn zigzag(self) -> Self::Zag where {
let i = self.to_bigint().expect("always possible");
(i.clone() >> 1) ^ -(i & <u8 as Into<BigInt>>::into(1))
}
}
impl ZigZag for usize {
type Zag = isize;
fn zigzag(self) -> Self::Zag where {
((self >> 1) as isize) ^ -((self & 1) as isize)
}
}

View file

@ -8,6 +8,37 @@ prop_compose! {
}
}
#[cfg(feature = "num-bigint")]
mod bigint {
use super::arb_big_vec;
use num_bigint::{BigInt, Sign};
use pallas_codec::flat::{decode, encode};
use proptest::prelude::*;
prop_compose! {
fn arb_isize()(i: isize) -> BigInt {
i.into()
}
}
fn arb_bigint() -> impl Strategy<Value = BigInt> {
prop_oneof![
arb_isize(),
arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Plus, &xs)),
arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Minus, &xs))
]
}
proptest! {
#[test]
fn encode_bigint(x in arb_bigint()) {
let bytes = encode(&x).unwrap();
let decoded: BigInt = decode(&bytes).unwrap();
assert_eq!(decoded, x);
}
}
}
#[test]
fn encode_bool() {
let bytes = encode(&true).unwrap();

View file

@ -1,18 +1,18 @@
use pallas_codec::flat::zigzag::{to_isize, to_usize};
use pallas_codec::flat::zigzag::ZigZag;
use proptest::prelude::*;
proptest! {
#[test]
fn zigzag(i: isize) {
let u = to_usize(i);
let converted_i = to_isize(u);
let u = i.zigzag();
let converted_i = u.zigzag();
assert_eq!(converted_i, i);
}
#[test]
fn zagzig(u: usize) {
let i = to_isize(u);
let converted_u = to_usize(i);
let i = u.zigzag();
let converted_u = i.zigzag();
assert_eq!(converted_u, u);
}
}