fix(codec): Fix flat encoding and decoding of arbitrarily size integers (#378)
This commits fixes the flat encoding and decoding (and consequently,
the zigzag) for large integers in the following ways:
- It removes support for encoding and decoding i128 values.
- It optionally (feature = "num-bigint") introduces encoding and
decoding of large sized integers through the num-bigint::BigInt
type.
Without the feature enabled, it is still possible to encode and decode
isize values; but the use of i128 is now prohibited (as it would
overflow on boundaries) in favor of arbitrarily sized integers.
The commit also introduces a missing property roundtrip for encoding
and decoding large integers, which was missing and thus, failed to
identify the overflow problem.
See related issue: https://github.com/aiken-lang/aiken/issues/796
This commit is contained in:
parent
14e0809ea2
commit
7cb1ffe100
8 changed files with 133 additions and 54 deletions
|
|
@ -14,9 +14,13 @@ authors = [
|
|||
"Kasey White <kwhitemsg@gmail.com>",
|
||||
]
|
||||
|
||||
[features]
|
||||
default = []
|
||||
|
||||
[dependencies]
|
||||
hex = "0.4.3"
|
||||
minicbor = { version = "0.20", features = ["std", "half", "derive"] }
|
||||
num-bigint = { version = "0.4.4", optional = true }
|
||||
serde = { version = "1.0.143", features = ["derive"] }
|
||||
thiserror = "1.0.39"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
use super::Decode;
|
||||
use crate::flat::zigzag;
|
||||
|
||||
use super::Error;
|
||||
use crate::flat::zigzag::ZigZag;
|
||||
|
||||
#[cfg(feature = "num-bigint")]
|
||||
use num_bigint::{BigInt, BigUint};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Decoder<'b> {
|
||||
|
|
@ -24,7 +26,8 @@ impl<'b> Decoder<'b> {
|
|||
T::decode(self)
|
||||
}
|
||||
|
||||
/// Decode an integer of any size.
|
||||
/// Decode an isize integer.
|
||||
///
|
||||
/// This is byte alignment agnostic.
|
||||
/// First we decode the next 8 bits of the buffer.
|
||||
/// We take the 7 least significant bits as the 7 least significant bits of
|
||||
|
|
@ -35,10 +38,11 @@ impl<'b> Decoder<'b> {
|
|||
/// any more bits. Finally we use zigzag to convert the unsigned integer
|
||||
/// back to a signed integer.
|
||||
pub fn integer(&mut self) -> Result<isize, Error> {
|
||||
Ok(zigzag::to_isize(self.word()?))
|
||||
Ok(self.word()?.zigzag())
|
||||
}
|
||||
|
||||
/// Decode an integer of 128 bits size.
|
||||
/// Decode an integer of an arbitrary size..
|
||||
///
|
||||
/// This is byte alignment agnostic.
|
||||
/// First we decode the next 8 bits of the buffer.
|
||||
/// We take the 7 least significant bits as the 7 least significant bits of
|
||||
|
|
@ -48,8 +52,9 @@ impl<'b> Decoder<'b> {
|
|||
/// so on. If the most significant bit was instead 0 we stop decoding
|
||||
/// any more bits. Finally we use zigzag to convert the unsigned integer
|
||||
/// back to a signed integer.
|
||||
pub fn big_integer(&mut self) -> Result<i128, Error> {
|
||||
Ok(zigzag::to_i128(self.big_word()?))
|
||||
#[cfg(feature = "num-bigint")]
|
||||
pub fn big_integer(&mut self) -> Result<BigInt, Error> {
|
||||
Ok(self.big_word()?.zigzag())
|
||||
}
|
||||
|
||||
/// Decode a single bit of the buffer to get a bool.
|
||||
|
|
@ -162,15 +167,16 @@ impl<'b> Decoder<'b> {
|
|||
/// filling in the next 7 least significant bits of the unsigned integer and
|
||||
/// so on. If the most significant bit was instead 0 we stop decoding
|
||||
/// any more bits.
|
||||
pub fn big_word(&mut self) -> Result<u128, Error> {
|
||||
#[cfg(feature = "num-bigint")]
|
||||
pub fn big_word(&mut self) -> Result<BigUint, Error> {
|
||||
let mut leading_bit = 1;
|
||||
let mut final_word: u128 = 0;
|
||||
let mut final_word: BigUint = (0 as u8).into();
|
||||
let mut shl: u128 = 0;
|
||||
// continue looping if lead bit is 1 which is 128 as a u8 otherwise exit
|
||||
while leading_bit > 0 {
|
||||
let word8 = self.bits8(8)?;
|
||||
let word7 = word8 & 127;
|
||||
final_word |= (word7 as u128) << shl;
|
||||
final_word |= <u8 as Into<BigUint>>::into(word7) << shl;
|
||||
shl += 7;
|
||||
leading_bit = word8 & 128;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@ mod error;
|
|||
|
||||
use crate::flat::filler::Filler;
|
||||
|
||||
#[cfg(feature = "num-bigint")]
|
||||
use num_bigint::BigInt;
|
||||
|
||||
pub use decoder::Decoder;
|
||||
pub use error::Error;
|
||||
|
||||
|
|
@ -36,9 +39,10 @@ impl Decode<'_> for isize {
|
|||
}
|
||||
}
|
||||
|
||||
impl Decode<'_> for i128 {
|
||||
#[cfg(feature = "num-bigint")]
|
||||
impl Decode<'_> for BigInt {
|
||||
fn decode(d: &mut Decoder) -> Result<Self, Error> {
|
||||
d.big_integer()
|
||||
Ok(d.big_integer()?.into())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
use super::Encode;
|
||||
use crate::flat::zigzag;
|
||||
|
||||
use super::Error;
|
||||
use crate::flat::zigzag::ZigZag;
|
||||
|
||||
#[cfg(feature = "num-bigint")]
|
||||
use num_bigint::{BigInt, BigUint};
|
||||
|
||||
pub struct Encoder {
|
||||
pub buffer: Vec<u8>,
|
||||
|
|
@ -89,7 +91,8 @@ impl Encoder {
|
|||
Ok(self)
|
||||
}
|
||||
|
||||
/// Encode an integer of any size.
|
||||
/// Encode an isize integer.
|
||||
///
|
||||
/// This is byte alignment agnostic.
|
||||
/// First we use zigzag once to double the number and encode the negative
|
||||
/// sign as the least significant bit. Next we encode the 7 least
|
||||
|
|
@ -97,25 +100,21 @@ impl Encoder {
|
|||
/// 127 we encode a leading 1 followed by repeating the encoding above for
|
||||
/// the next 7 bits and so on.
|
||||
pub fn integer(&mut self, i: isize) -> &mut Self {
|
||||
let i = zigzag::to_usize(i);
|
||||
|
||||
self.word(i);
|
||||
|
||||
self.word(i.zigzag());
|
||||
self
|
||||
}
|
||||
|
||||
/// Encode an integer of 128 bits size.
|
||||
/// Encode an arbitrarily sized integer.
|
||||
///
|
||||
/// This is byte alignment agnostic.
|
||||
/// First we use zigzag once to double the number and encode the negative
|
||||
/// sign as the least significant bit. Next we encode the 7 least
|
||||
/// significant bits of the unsigned integer. If the number is greater than
|
||||
/// 127 we encode a leading 1 followed by repeating the encoding above for
|
||||
/// the next 7 bits and so on.
|
||||
pub fn big_integer(&mut self, i: i128) -> &mut Self {
|
||||
let i = zigzag::to_u128(i);
|
||||
|
||||
self.big_word(i);
|
||||
|
||||
#[cfg(feature = "num-bigint")]
|
||||
pub fn big_integer(&mut self, i: BigInt) -> &mut Self {
|
||||
self.big_word(i.zigzag());
|
||||
self
|
||||
}
|
||||
|
||||
|
|
@ -181,18 +180,25 @@ impl Encoder {
|
|||
/// We encode the 7 least significant bits of the unsigned byte. If the char
|
||||
/// value is greater than 127 we encode a leading 1 followed by
|
||||
/// repeating the above for the next 7 bits and so on.
|
||||
pub fn big_word(&mut self, c: u128) -> &mut Self {
|
||||
#[cfg(feature = "num-bigint")]
|
||||
pub fn big_word(&mut self, c: BigUint) -> &mut Self {
|
||||
let mut d = c;
|
||||
let zero = (0 as u8).into();
|
||||
loop {
|
||||
let mut w = (d & 127) as u8;
|
||||
let m: usize = 127;
|
||||
let mut w = (d.clone() & <usize as Into<BigUint>>::into(m))
|
||||
.to_bytes_be()
|
||||
.pop()
|
||||
.unwrap();
|
||||
|
||||
d >>= 7;
|
||||
|
||||
if d != 0 {
|
||||
if d != zero {
|
||||
w |= 128;
|
||||
}
|
||||
self.bits(8, w);
|
||||
|
||||
if d == 0 {
|
||||
if d == zero {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@ mod error;
|
|||
|
||||
use crate::flat::filler::Filler;
|
||||
|
||||
#[cfg(feature = "num-bigint")]
|
||||
use num_bigint::BigInt;
|
||||
|
||||
pub use encoder::Encoder;
|
||||
pub use error::Error;
|
||||
|
||||
|
|
@ -26,9 +29,10 @@ impl Encode for u8 {
|
|||
}
|
||||
}
|
||||
|
||||
impl Encode for i128 {
|
||||
#[cfg(feature = "num-bigint")]
|
||||
impl Encode for BigInt {
|
||||
fn encode(&self, e: &mut Encoder) -> Result<(), Error> {
|
||||
e.big_integer(*self);
|
||||
e.big_integer(self.clone());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,27 +1,51 @@
|
|||
pub fn to_usize(x: isize) -> usize {
|
||||
let double_x = x << 1;
|
||||
#[cfg(feature = "num-bigint")]
|
||||
use num_bigint::{BigInt, BigUint, ToBigInt};
|
||||
|
||||
if x.is_positive() || x == 0 {
|
||||
double_x as usize
|
||||
} else {
|
||||
(-double_x - 1) as usize
|
||||
pub trait ZigZag {
|
||||
type Zag;
|
||||
fn zigzag(self) -> Self::Zag;
|
||||
}
|
||||
|
||||
#[cfg(feature = "num-bigint")]
|
||||
impl ZigZag for BigInt {
|
||||
type Zag = BigUint;
|
||||
|
||||
fn zigzag(self) -> Self::Zag where {
|
||||
if self >= 0.into() {
|
||||
self << 1
|
||||
} else {
|
||||
let double: BigInt = self << 1;
|
||||
-double - <u8 as Into<BigInt>>::into(1)
|
||||
}
|
||||
.to_biguint()
|
||||
.expect("number is positive")
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_isize(u: usize) -> isize {
|
||||
((u >> 1) as isize) ^ (-((u & 1) as isize))
|
||||
}
|
||||
impl ZigZag for isize {
|
||||
type Zag = usize;
|
||||
|
||||
pub fn to_u128(x: i128) -> u128 {
|
||||
let double_x = x << 1;
|
||||
|
||||
if x.is_positive() || x == 0 {
|
||||
double_x as u128
|
||||
} else {
|
||||
(-double_x - 1) as u128
|
||||
fn zigzag(self) -> Self::Zag where {
|
||||
let bits = isize::BITS as i128;
|
||||
let i = self as i128;
|
||||
((i << 1) ^ (i >> (bits - 1))) as usize
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_i128(u: u128) -> i128 {
|
||||
((u >> 1) as i128) ^ (-((u & 1) as i128))
|
||||
#[cfg(feature = "num-bigint")]
|
||||
impl ZigZag for BigUint {
|
||||
type Zag = BigInt;
|
||||
|
||||
fn zigzag(self) -> Self::Zag where {
|
||||
let i = self.to_bigint().expect("always possible");
|
||||
(i.clone() >> 1) ^ -(i & <u8 as Into<BigInt>>::into(1))
|
||||
}
|
||||
}
|
||||
|
||||
impl ZigZag for usize {
|
||||
type Zag = isize;
|
||||
|
||||
fn zigzag(self) -> Self::Zag where {
|
||||
((self >> 1) as isize) ^ -((self & 1) as isize)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,37 @@ prop_compose! {
|
|||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "num-bigint")]
|
||||
mod bigint {
|
||||
use super::arb_big_vec;
|
||||
use num_bigint::{BigInt, Sign};
|
||||
use pallas_codec::flat::{decode, encode};
|
||||
use proptest::prelude::*;
|
||||
|
||||
prop_compose! {
|
||||
fn arb_isize()(i: isize) -> BigInt {
|
||||
i.into()
|
||||
}
|
||||
}
|
||||
|
||||
fn arb_bigint() -> impl Strategy<Value = BigInt> {
|
||||
prop_oneof![
|
||||
arb_isize(),
|
||||
arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Plus, &xs)),
|
||||
arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Minus, &xs))
|
||||
]
|
||||
}
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn encode_bigint(x in arb_bigint()) {
|
||||
let bytes = encode(&x).unwrap();
|
||||
let decoded: BigInt = decode(&bytes).unwrap();
|
||||
assert_eq!(decoded, x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_bool() {
|
||||
let bytes = encode(&true).unwrap();
|
||||
|
|
|
|||
|
|
@ -1,18 +1,18 @@
|
|||
use pallas_codec::flat::zigzag::{to_isize, to_usize};
|
||||
use pallas_codec::flat::zigzag::ZigZag;
|
||||
use proptest::prelude::*;
|
||||
|
||||
proptest! {
|
||||
#[test]
|
||||
fn zigzag(i: isize) {
|
||||
let u = to_usize(i);
|
||||
let converted_i = to_isize(u);
|
||||
let u = i.zigzag();
|
||||
let converted_i = u.zigzag();
|
||||
assert_eq!(converted_i, i);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zagzig(u: usize) {
|
||||
let i = to_isize(u);
|
||||
let converted_u = to_usize(i);
|
||||
let i = u.zigzag();
|
||||
let converted_u = i.zigzag();
|
||||
assert_eq!(converted_u, u);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue