fix(codec): Fix flat encoding and decoding of arbitrarily size integers (#378)
This commits fixes the flat encoding and decoding (and consequently,
the zigzag) for large integers in the following ways:
- It removes support for encoding and decoding i128 values.
- It optionally (feature = "num-bigint") introduces encoding and
decoding of large sized integers through the num-bigint::BigInt
type.
Without the feature enabled, it is still possible to encode and decode
isize values; but the use of i128 is now prohibited (as it would
overflow on boundaries) in favor of arbitrarily sized integers.
The commit also introduces a missing property roundtrip for encoding
and decoding large integers, which was missing and thus, failed to
identify the overflow problem.
See related issue: https://github.com/aiken-lang/aiken/issues/796
This commit is contained in:
parent
14e0809ea2
commit
7cb1ffe100
8 changed files with 133 additions and 54 deletions
|
|
@ -14,9 +14,13 @@ authors = [
|
||||||
"Kasey White <kwhitemsg@gmail.com>",
|
"Kasey White <kwhitemsg@gmail.com>",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = []
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
hex = "0.4.3"
|
hex = "0.4.3"
|
||||||
minicbor = { version = "0.20", features = ["std", "half", "derive"] }
|
minicbor = { version = "0.20", features = ["std", "half", "derive"] }
|
||||||
|
num-bigint = { version = "0.4.4", optional = true }
|
||||||
serde = { version = "1.0.143", features = ["derive"] }
|
serde = { version = "1.0.143", features = ["derive"] }
|
||||||
thiserror = "1.0.39"
|
thiserror = "1.0.39"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
use super::Decode;
|
use super::Decode;
|
||||||
use crate::flat::zigzag;
|
|
||||||
|
|
||||||
use super::Error;
|
use super::Error;
|
||||||
|
use crate::flat::zigzag::ZigZag;
|
||||||
|
|
||||||
|
#[cfg(feature = "num-bigint")]
|
||||||
|
use num_bigint::{BigInt, BigUint};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Decoder<'b> {
|
pub struct Decoder<'b> {
|
||||||
|
|
@ -24,7 +26,8 @@ impl<'b> Decoder<'b> {
|
||||||
T::decode(self)
|
T::decode(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Decode an integer of any size.
|
/// Decode an isize integer.
|
||||||
|
///
|
||||||
/// This is byte alignment agnostic.
|
/// This is byte alignment agnostic.
|
||||||
/// First we decode the next 8 bits of the buffer.
|
/// First we decode the next 8 bits of the buffer.
|
||||||
/// We take the 7 least significant bits as the 7 least significant bits of
|
/// We take the 7 least significant bits as the 7 least significant bits of
|
||||||
|
|
@ -35,10 +38,11 @@ impl<'b> Decoder<'b> {
|
||||||
/// any more bits. Finally we use zigzag to convert the unsigned integer
|
/// any more bits. Finally we use zigzag to convert the unsigned integer
|
||||||
/// back to a signed integer.
|
/// back to a signed integer.
|
||||||
pub fn integer(&mut self) -> Result<isize, Error> {
|
pub fn integer(&mut self) -> Result<isize, Error> {
|
||||||
Ok(zigzag::to_isize(self.word()?))
|
Ok(self.word()?.zigzag())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Decode an integer of 128 bits size.
|
/// Decode an integer of an arbitrary size..
|
||||||
|
///
|
||||||
/// This is byte alignment agnostic.
|
/// This is byte alignment agnostic.
|
||||||
/// First we decode the next 8 bits of the buffer.
|
/// First we decode the next 8 bits of the buffer.
|
||||||
/// We take the 7 least significant bits as the 7 least significant bits of
|
/// We take the 7 least significant bits as the 7 least significant bits of
|
||||||
|
|
@ -48,8 +52,9 @@ impl<'b> Decoder<'b> {
|
||||||
/// so on. If the most significant bit was instead 0 we stop decoding
|
/// so on. If the most significant bit was instead 0 we stop decoding
|
||||||
/// any more bits. Finally we use zigzag to convert the unsigned integer
|
/// any more bits. Finally we use zigzag to convert the unsigned integer
|
||||||
/// back to a signed integer.
|
/// back to a signed integer.
|
||||||
pub fn big_integer(&mut self) -> Result<i128, Error> {
|
#[cfg(feature = "num-bigint")]
|
||||||
Ok(zigzag::to_i128(self.big_word()?))
|
pub fn big_integer(&mut self) -> Result<BigInt, Error> {
|
||||||
|
Ok(self.big_word()?.zigzag())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Decode a single bit of the buffer to get a bool.
|
/// Decode a single bit of the buffer to get a bool.
|
||||||
|
|
@ -162,15 +167,16 @@ impl<'b> Decoder<'b> {
|
||||||
/// filling in the next 7 least significant bits of the unsigned integer and
|
/// filling in the next 7 least significant bits of the unsigned integer and
|
||||||
/// so on. If the most significant bit was instead 0 we stop decoding
|
/// so on. If the most significant bit was instead 0 we stop decoding
|
||||||
/// any more bits.
|
/// any more bits.
|
||||||
pub fn big_word(&mut self) -> Result<u128, Error> {
|
#[cfg(feature = "num-bigint")]
|
||||||
|
pub fn big_word(&mut self) -> Result<BigUint, Error> {
|
||||||
let mut leading_bit = 1;
|
let mut leading_bit = 1;
|
||||||
let mut final_word: u128 = 0;
|
let mut final_word: BigUint = (0 as u8).into();
|
||||||
let mut shl: u128 = 0;
|
let mut shl: u128 = 0;
|
||||||
// continue looping if lead bit is 1 which is 128 as a u8 otherwise exit
|
// continue looping if lead bit is 1 which is 128 as a u8 otherwise exit
|
||||||
while leading_bit > 0 {
|
while leading_bit > 0 {
|
||||||
let word8 = self.bits8(8)?;
|
let word8 = self.bits8(8)?;
|
||||||
let word7 = word8 & 127;
|
let word7 = word8 & 127;
|
||||||
final_word |= (word7 as u128) << shl;
|
final_word |= <u8 as Into<BigUint>>::into(word7) << shl;
|
||||||
shl += 7;
|
shl += 7;
|
||||||
leading_bit = word8 & 128;
|
leading_bit = word8 & 128;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,9 @@ mod error;
|
||||||
|
|
||||||
use crate::flat::filler::Filler;
|
use crate::flat::filler::Filler;
|
||||||
|
|
||||||
|
#[cfg(feature = "num-bigint")]
|
||||||
|
use num_bigint::BigInt;
|
||||||
|
|
||||||
pub use decoder::Decoder;
|
pub use decoder::Decoder;
|
||||||
pub use error::Error;
|
pub use error::Error;
|
||||||
|
|
||||||
|
|
@ -36,9 +39,10 @@ impl Decode<'_> for isize {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Decode<'_> for i128 {
|
#[cfg(feature = "num-bigint")]
|
||||||
|
impl Decode<'_> for BigInt {
|
||||||
fn decode(d: &mut Decoder) -> Result<Self, Error> {
|
fn decode(d: &mut Decoder) -> Result<Self, Error> {
|
||||||
d.big_integer()
|
Ok(d.big_integer()?.into())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
use super::Encode;
|
use super::Encode;
|
||||||
use crate::flat::zigzag;
|
|
||||||
|
|
||||||
use super::Error;
|
use super::Error;
|
||||||
|
use crate::flat::zigzag::ZigZag;
|
||||||
|
|
||||||
|
#[cfg(feature = "num-bigint")]
|
||||||
|
use num_bigint::{BigInt, BigUint};
|
||||||
|
|
||||||
pub struct Encoder {
|
pub struct Encoder {
|
||||||
pub buffer: Vec<u8>,
|
pub buffer: Vec<u8>,
|
||||||
|
|
@ -89,7 +91,8 @@ impl Encoder {
|
||||||
Ok(self)
|
Ok(self)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Encode an integer of any size.
|
/// Encode an isize integer.
|
||||||
|
///
|
||||||
/// This is byte alignment agnostic.
|
/// This is byte alignment agnostic.
|
||||||
/// First we use zigzag once to double the number and encode the negative
|
/// First we use zigzag once to double the number and encode the negative
|
||||||
/// sign as the least significant bit. Next we encode the 7 least
|
/// sign as the least significant bit. Next we encode the 7 least
|
||||||
|
|
@ -97,25 +100,21 @@ impl Encoder {
|
||||||
/// 127 we encode a leading 1 followed by repeating the encoding above for
|
/// 127 we encode a leading 1 followed by repeating the encoding above for
|
||||||
/// the next 7 bits and so on.
|
/// the next 7 bits and so on.
|
||||||
pub fn integer(&mut self, i: isize) -> &mut Self {
|
pub fn integer(&mut self, i: isize) -> &mut Self {
|
||||||
let i = zigzag::to_usize(i);
|
self.word(i.zigzag());
|
||||||
|
|
||||||
self.word(i);
|
|
||||||
|
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Encode an integer of 128 bits size.
|
/// Encode an arbitrarily sized integer.
|
||||||
|
///
|
||||||
/// This is byte alignment agnostic.
|
/// This is byte alignment agnostic.
|
||||||
/// First we use zigzag once to double the number and encode the negative
|
/// First we use zigzag once to double the number and encode the negative
|
||||||
/// sign as the least significant bit. Next we encode the 7 least
|
/// sign as the least significant bit. Next we encode the 7 least
|
||||||
/// significant bits of the unsigned integer. If the number is greater than
|
/// significant bits of the unsigned integer. If the number is greater than
|
||||||
/// 127 we encode a leading 1 followed by repeating the encoding above for
|
/// 127 we encode a leading 1 followed by repeating the encoding above for
|
||||||
/// the next 7 bits and so on.
|
/// the next 7 bits and so on.
|
||||||
pub fn big_integer(&mut self, i: i128) -> &mut Self {
|
#[cfg(feature = "num-bigint")]
|
||||||
let i = zigzag::to_u128(i);
|
pub fn big_integer(&mut self, i: BigInt) -> &mut Self {
|
||||||
|
self.big_word(i.zigzag());
|
||||||
self.big_word(i);
|
|
||||||
|
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -181,18 +180,25 @@ impl Encoder {
|
||||||
/// We encode the 7 least significant bits of the unsigned byte. If the char
|
/// We encode the 7 least significant bits of the unsigned byte. If the char
|
||||||
/// value is greater than 127 we encode a leading 1 followed by
|
/// value is greater than 127 we encode a leading 1 followed by
|
||||||
/// repeating the above for the next 7 bits and so on.
|
/// repeating the above for the next 7 bits and so on.
|
||||||
pub fn big_word(&mut self, c: u128) -> &mut Self {
|
#[cfg(feature = "num-bigint")]
|
||||||
|
pub fn big_word(&mut self, c: BigUint) -> &mut Self {
|
||||||
let mut d = c;
|
let mut d = c;
|
||||||
|
let zero = (0 as u8).into();
|
||||||
loop {
|
loop {
|
||||||
let mut w = (d & 127) as u8;
|
let m: usize = 127;
|
||||||
|
let mut w = (d.clone() & <usize as Into<BigUint>>::into(m))
|
||||||
|
.to_bytes_be()
|
||||||
|
.pop()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
d >>= 7;
|
d >>= 7;
|
||||||
|
|
||||||
if d != 0 {
|
if d != zero {
|
||||||
w |= 128;
|
w |= 128;
|
||||||
}
|
}
|
||||||
self.bits(8, w);
|
self.bits(8, w);
|
||||||
|
|
||||||
if d == 0 {
|
if d == zero {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,9 @@ mod error;
|
||||||
|
|
||||||
use crate::flat::filler::Filler;
|
use crate::flat::filler::Filler;
|
||||||
|
|
||||||
|
#[cfg(feature = "num-bigint")]
|
||||||
|
use num_bigint::BigInt;
|
||||||
|
|
||||||
pub use encoder::Encoder;
|
pub use encoder::Encoder;
|
||||||
pub use error::Error;
|
pub use error::Error;
|
||||||
|
|
||||||
|
|
@ -26,9 +29,10 @@ impl Encode for u8 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Encode for i128 {
|
#[cfg(feature = "num-bigint")]
|
||||||
|
impl Encode for BigInt {
|
||||||
fn encode(&self, e: &mut Encoder) -> Result<(), Error> {
|
fn encode(&self, e: &mut Encoder) -> Result<(), Error> {
|
||||||
e.big_integer(*self);
|
e.big_integer(self.clone());
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,27 +1,51 @@
|
||||||
pub fn to_usize(x: isize) -> usize {
|
#[cfg(feature = "num-bigint")]
|
||||||
let double_x = x << 1;
|
use num_bigint::{BigInt, BigUint, ToBigInt};
|
||||||
|
|
||||||
if x.is_positive() || x == 0 {
|
pub trait ZigZag {
|
||||||
double_x as usize
|
type Zag;
|
||||||
} else {
|
fn zigzag(self) -> Self::Zag;
|
||||||
(-double_x - 1) as usize
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "num-bigint")]
|
||||||
|
impl ZigZag for BigInt {
|
||||||
|
type Zag = BigUint;
|
||||||
|
|
||||||
|
fn zigzag(self) -> Self::Zag where {
|
||||||
|
if self >= 0.into() {
|
||||||
|
self << 1
|
||||||
|
} else {
|
||||||
|
let double: BigInt = self << 1;
|
||||||
|
-double - <u8 as Into<BigInt>>::into(1)
|
||||||
|
}
|
||||||
|
.to_biguint()
|
||||||
|
.expect("number is positive")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to_isize(u: usize) -> isize {
|
impl ZigZag for isize {
|
||||||
((u >> 1) as isize) ^ (-((u & 1) as isize))
|
type Zag = usize;
|
||||||
}
|
|
||||||
|
|
||||||
pub fn to_u128(x: i128) -> u128 {
|
fn zigzag(self) -> Self::Zag where {
|
||||||
let double_x = x << 1;
|
let bits = isize::BITS as i128;
|
||||||
|
let i = self as i128;
|
||||||
if x.is_positive() || x == 0 {
|
((i << 1) ^ (i >> (bits - 1))) as usize
|
||||||
double_x as u128
|
|
||||||
} else {
|
|
||||||
(-double_x - 1) as u128
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to_i128(u: u128) -> i128 {
|
#[cfg(feature = "num-bigint")]
|
||||||
((u >> 1) as i128) ^ (-((u & 1) as i128))
|
impl ZigZag for BigUint {
|
||||||
|
type Zag = BigInt;
|
||||||
|
|
||||||
|
fn zigzag(self) -> Self::Zag where {
|
||||||
|
let i = self.to_bigint().expect("always possible");
|
||||||
|
(i.clone() >> 1) ^ -(i & <u8 as Into<BigInt>>::into(1))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ZigZag for usize {
|
||||||
|
type Zag = isize;
|
||||||
|
|
||||||
|
fn zigzag(self) -> Self::Zag where {
|
||||||
|
((self >> 1) as isize) ^ -((self & 1) as isize)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,37 @@ prop_compose! {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "num-bigint")]
|
||||||
|
mod bigint {
|
||||||
|
use super::arb_big_vec;
|
||||||
|
use num_bigint::{BigInt, Sign};
|
||||||
|
use pallas_codec::flat::{decode, encode};
|
||||||
|
use proptest::prelude::*;
|
||||||
|
|
||||||
|
prop_compose! {
|
||||||
|
fn arb_isize()(i: isize) -> BigInt {
|
||||||
|
i.into()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn arb_bigint() -> impl Strategy<Value = BigInt> {
|
||||||
|
prop_oneof![
|
||||||
|
arb_isize(),
|
||||||
|
arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Plus, &xs)),
|
||||||
|
arb_big_vec().prop_map(|xs| BigInt::from_bytes_be(Sign::Minus, &xs))
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
proptest! {
|
||||||
|
#[test]
|
||||||
|
fn encode_bigint(x in arb_bigint()) {
|
||||||
|
let bytes = encode(&x).unwrap();
|
||||||
|
let decoded: BigInt = decode(&bytes).unwrap();
|
||||||
|
assert_eq!(decoded, x);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn encode_bool() {
|
fn encode_bool() {
|
||||||
let bytes = encode(&true).unwrap();
|
let bytes = encode(&true).unwrap();
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,18 @@
|
||||||
use pallas_codec::flat::zigzag::{to_isize, to_usize};
|
use pallas_codec::flat::zigzag::ZigZag;
|
||||||
use proptest::prelude::*;
|
use proptest::prelude::*;
|
||||||
|
|
||||||
proptest! {
|
proptest! {
|
||||||
#[test]
|
#[test]
|
||||||
fn zigzag(i: isize) {
|
fn zigzag(i: isize) {
|
||||||
let u = to_usize(i);
|
let u = i.zigzag();
|
||||||
let converted_i = to_isize(u);
|
let converted_i = u.zigzag();
|
||||||
assert_eq!(converted_i, i);
|
assert_eq!(converted_i, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn zagzig(u: usize) {
|
fn zagzig(u: usize) {
|
||||||
let i = to_isize(u);
|
let i = u.zigzag();
|
||||||
let converted_u = to_usize(i);
|
let converted_u = i.zigzag();
|
||||||
assert_eq!(converted_u, u);
|
assert_eq!(converted_u, u);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue