Skip to content

Commit

Permalink
implemented zigzag encoding for integers
Browse files Browse the repository at this point in the history
  • Loading branch information
usix79 committed Jun 13, 2022
1 parent e4c2ab9 commit f1dd1f9
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 18 deletions.
71 changes: 53 additions & 18 deletions src/BinaryImpl.fs
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,56 @@ open System
open System.IO
open System.Numerics


module BinaryHelpers =
let zigzagEncode (writer:BinaryWriter) (v:int64) =
let mutable chunk = 0UL
let mutable encoded = (v >>> 63) ^^^ (v <<< 1) |> uint64

let mutable stop = false
while not stop do
chunk <- encoded &&& 0x7fUL
encoded <- encoded >>> 7
if encoded <> 0UL then
chunk <- chunk ||| 0x80UL
writer.Write(byte chunk)
if encoded = 0UL then
stop <- true

let zigzagDecode (reader:BinaryReader) =
let mutable read = 0
let mutable shift = 0
let mutable result = 0UL
let mutable chunk = 0UL
let mutable result = 0UL
let mutable stop = false
while not stop do
if read = 10 then
raise (OverflowException "Encoded integer exceeds long bounds.")
read <- read + 1
chunk <- reader.ReadByte() |> uint64
result <- result ||| ((chunk &&& 0x7FUL) <<< shift)
shift <- shift + 7
if (chunk &&& 0x80UL) = 0UL then
stop <- true
let coerced = result |> int64
(-(coerced &&& 0x1L)) ^^^ ((coerced >>> 1) &&& 0x7FFFFFFFFFFFFFFFL)

type BinaryBuilder(writer:BinaryWriter) =

interface IAvroBuilder with

member _.Start() = ()
member _.Null() = ()
member _.Boolean(v: bool) = writer.Write(if v then 1uy else 0uy)
member _.Int(v: int) = writer.Write v
member _.Long(v: int64) = writer.Write v
member _.Int(v: int) = BinaryHelpers.zigzagEncode writer (v |> int64)
member _.Long(v: int64) = BinaryHelpers.zigzagEncode writer v
member _.Float(v: float32) = writer.Write v
member _.Double(v: float) = writer.Write v
member this.String(v: string) = (this :> IAvroBuilder).Bytes(System.Text.Encoding.UTF8.GetBytes(v))

member _.Bytes(v: byte array) =
writer.Write v.LongLength
BinaryHelpers.zigzagEncode writer v.LongLength
writer.Write v

member this.Decimal(v: decimal, schema: DecimalSchema) =
Expand All @@ -29,12 +64,12 @@ type BinaryBuilder(writer:BinaryWriter) =

member _.Enum(idx:int, symbol: string) = writer.Write idx
member _.StartArray() = ()
member _.StartArrayBlock(size: int64) = writer.Write(size)
member _.EndArray() = writer.Write(0L)
member _.StartArrayBlock(size: int64) = BinaryHelpers.zigzagEncode writer size
member _.EndArray() = BinaryHelpers.zigzagEncode writer 0L
member _.StartMap() = ()
member _.StartMapBlock(size: int64) = writer.Write(size)
member _.StartMapBlock(size: int64) = BinaryHelpers.zigzagEncode writer size
member this.Key(key: string) = (this :> IAvroBuilder).String(key)
member _.EndMap() = writer.Write(0L)
member _.EndMap() = BinaryHelpers.zigzagEncode writer 0L
member _.StartRecord() = ()
member _.Field(name: string) = true
member _.EndRecord() = ()
Expand Down Expand Up @@ -88,19 +123,19 @@ module BinaryDirector =
let rec write (builder:IAvroBuilder) = function
| Null -> builder.Null()
| Boolean -> (reader.ReadByte() <> 0uy) |> builder.Boolean
| Int -> reader.ReadInt32() |> builder.Int
| Long -> reader.ReadInt64() |> builder.Long
| Int -> BinaryHelpers.zigzagDecode reader |> int32 |> builder.Int
| Long -> BinaryHelpers.zigzagDecode reader |> builder.Long
| Float -> reader.ReadSingle() |> builder.Float
| Double -> reader.ReadDouble() |> builder.Double
| Bytes ->
let size = reader.ReadInt64()
let size = BinaryHelpers.zigzagDecode reader
reader.ReadBytes(int size) |> builder.Bytes
| String ->
let size = reader.ReadInt64()
let size = BinaryHelpers.zigzagDecode reader
let bytes = reader.ReadBytes(int size)
builder.String(System.Text.Encoding.UTF8.GetString(bytes))
| Decimal schema ->
let size = reader.ReadInt64()
let size = BinaryHelpers.zigzagDecode reader
let bytesValue = reader.ReadBytes(int size)
Array.Reverse(bytesValue)
let intValue = BigInteger(bytesValue)
Expand All @@ -118,25 +153,25 @@ module BinaryDirector =
builder.StartArrayBlock size
for _ in 1 .. int size do
write builder schema.Items
reader.ReadInt64() |> arrayFun
BinaryHelpers.zigzagDecode reader |> arrayFun

builder.StartArray()
reader.ReadInt64() |> arrayFun
BinaryHelpers.zigzagDecode reader |> arrayFun
builder.EndArray()
| Map schema ->
let rec mapFun = function
| 0L -> ()
| size ->
builder.StartMapBlock size
for _ in 1 .. int size do
let size = reader.ReadInt64()
let size = BinaryHelpers.zigzagDecode reader
let bytes = reader.ReadBytes(int size)
builder.Key(System.Text.Encoding.UTF8.GetString(bytes))
write builder schema.Values
reader.ReadInt64() |> mapFun
BinaryHelpers.zigzagDecode reader |> mapFun

builder.StartMap()
reader.ReadInt64() |> mapFun
BinaryHelpers.zigzagDecode reader |> mapFun
builder.EndMap()
| Record schema ->
builder.StartRecord()
Expand Down Expand Up @@ -167,4 +202,4 @@ module BinaryDirector =

builder.Start()
write builder writerSchema
builder.End()
builder.End()
33 changes: 33 additions & 0 deletions test/SerdeTests.fs
Original file line number Diff line number Diff line change
Expand Up @@ -368,3 +368,36 @@ let leaveStreamOpenTests =
let copy2 = deserializer schema stream :?> string
Expect.equal "Deserialized message should be equal to the original" subject copy2
}

[<Tests>]
let zigzagEncodingTests =
[
test "Zigzag encoding int32" {
let values = [0; 1; -1; 8; 255; 256; 12345; -4321; System.Int32.MaxValue; System.Int32.MinValue]
use stream = new MemoryStream()
for v in values do
stream.Seek(0L, SeekOrigin.Begin) |> ignore
let writer = new BinaryWriter(stream, System.Text.Encoding.UTF8, true)
let reader = new BinaryReader(stream, System.Text.Encoding.UTF8, true)

BinaryHelpers.zigzagEncode writer (int64 v)
stream.Seek(0L, SeekOrigin.Begin) |> ignore
let copy = BinaryHelpers.zigzagDecode reader |> int32

Expect.equal "Deserialized number should be equal to the original" v copy
}
test "Zigzag encoding int64" {
let values = [0L; 1L; -1L; 8L; 255L; 256L; 12345L; -4321L; System.Int64.MaxValue; System.Int64.MinValue]
use stream = new MemoryStream()
for v in values do
stream.Seek(0L, SeekOrigin.Begin) |> ignore
let writer = new BinaryWriter(stream, System.Text.Encoding.UTF8, true)
let reader = new BinaryReader(stream, System.Text.Encoding.UTF8, true)

BinaryHelpers.zigzagEncode writer v
stream.Seek(0L, SeekOrigin.Begin) |> ignore
let copy = BinaryHelpers.zigzagDecode reader

Expect.equal "Deserialized number should be equal to the original" v copy
}
] |> testList "Zigzag encoding"

0 comments on commit f1dd1f9

Please sign in to comment.