Skip to content

Commit

Permalink
fix:fix calculate arrow nest type and add ut (#38527)
Browse files Browse the repository at this point in the history
#37767

Signed-off-by: luzhang <[email protected]>
Co-authored-by: luzhang <[email protected]>
  • Loading branch information
zhagnlu and luzhang authored Dec 18, 2024
1 parent fb0e689 commit 6ee94d0
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 6 deletions.
16 changes: 10 additions & 6 deletions internal/storage/serde.go
Original file line number Diff line number Diff line change
Expand Up @@ -620,15 +620,18 @@ func calculateArraySize(a arrow.Array) int {
offset := a.Data().Offset()
length := a.Len()

if len(a.NullBitmapBytes()) > 0 {
totalSize += (length + 7) / 8
}

for i, buf := range a.Data().Buffers() {
if buf == nil {
continue
}

switch i {
case 0:
// Handle bitmap buffer
totalSize += (length + 7) / 8
// Handle bitmap buffer, already handled
case 1:
switch a.DataType().ID() {
case arrow.STRING, arrow.BINARY:
Expand All @@ -639,13 +642,14 @@ func calculateArraySize(a arrow.Array) int {
case arrow.LIST:
// Handle nest types like list
for i := 0; i < length; i++ {
startOffset := int(binary.LittleEndian.Uint32(buf.Bytes()[i*4:]))
endOffset := int(binary.LittleEndian.Uint32(buf.Bytes()[(i+1)*4:]))
totalSize += endOffset - startOffset
startOffset := int(binary.LittleEndian.Uint32(buf.Bytes()[(offset+i)*4:]))
endOffset := int(binary.LittleEndian.Uint32(buf.Bytes()[(offset+i+1)*4:]))
elementSize := a.DataType().(*arrow.ListType).Elem().(arrow.FixedWidthDataType).Bytes()
totalSize += (endOffset - startOffset) * elementSize
}
default:
// Handle fixed-length types
elementSize := buf.Len() / a.Data().Len()
elementSize := a.DataType().(arrow.FixedWidthDataType).Bytes()
totalSize += elementSize * length
}
}
Expand Down
95 changes: 95 additions & 0 deletions internal/storage/serde_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,98 @@ func BenchmarkBinlogIterator(b *testing.B) {
assert.False(b, itr.HasNext())
}
}

func TestCalculateArraySize(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)

tests := []struct {
name string
arrayBuilder func() arrow.Array
expectedSize int
}{
{
name: "Empty array",
arrayBuilder: func() arrow.Array {
b := array.NewInt32Builder(mem)
defer b.Release()
return b.NewArray()
},
expectedSize: 0,
},
{
name: "Fixed-length array",
arrayBuilder: func() arrow.Array {
b := array.NewInt32Builder(mem)
defer b.Release()
b.AppendValues([]int32{1, 2, 3, 4}, nil)
return b.NewArray()
},
expectedSize: 17, // 4 elements * 4 bytes + bitmap(1bytes)
},
{
name: "Variable-length string array",
arrayBuilder: func() arrow.Array {
b := array.NewStringBuilder(mem)
defer b.Release()
b.AppendValues([]string{"hello", "world"}, nil)
return b.NewArray()
},
expectedSize: 11, // "hello" (5 bytes) + "world" (5 bytes) + bitmap(1bytes)
},
{
name: "Nested list array",
arrayBuilder: func() arrow.Array {
b := array.NewListBuilder(mem, arrow.PrimitiveTypes.Int32)
defer b.Release()
valueBuilder := b.ValueBuilder().(*array.Int32Builder)

b.Append(true)
valueBuilder.AppendValues([]int32{1, 2, 3}, nil)

b.Append(true)
valueBuilder.AppendValues([]int32{4, 5}, nil)

b.Append(true)
valueBuilder.AppendValues([]int32{}, nil)

return b.NewArray()
},
expectedSize: 21, // 3 + 2 elements in data buffer, plus bitmap(1bytes)
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
arr := tt.arrayBuilder()
defer arr.Release()

size := calculateArraySize(arr)
if size != tt.expectedSize {
t.Errorf("Expected size %d, got %d", tt.expectedSize, size)
}
})
}
}

func TestCalculateArraySizeWithOffset(t *testing.T) {
mem := memory.NewCheckedAllocator(memory.DefaultAllocator)
defer mem.AssertSize(t, 0)

b := array.NewStringBuilder(mem)
defer b.Release()

b.AppendValues([]string{"zero", "one", "two", "three", "four"}, nil)
fullArray := b.NewArray()
defer fullArray.Release()

slicedArray := array.NewSlice(fullArray, 1, 4) // Offset = 1, End = 4
defer slicedArray.Release()

size := calculateArraySize(slicedArray)
expectedSize := len("one") + len("two") + len("three") + 1 // "one", "two", "three", bitmap(1 bytes)

if size != expectedSize {
t.Errorf("Expected size %d, got %d", expectedSize, size)
}
}

0 comments on commit 6ee94d0

Please sign in to comment.