Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RAI-22855 fix issues with (u)int128 within value types #102

Merged
merged 11 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/go-build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ name: build/test

on:
push:
branches:
- '*'
schedule:
- cron: '30 5 * * *'
workflow_dispatch:

jobs:
build:
Expand Down
147 changes: 130 additions & 17 deletions rai/results.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ type Tabular interface {
Strings(int) []string
}

// TabularSlice is an interface for columns that contain array data that can
// be sliced into sub-arrays, combining the sub-array's values to represent
// values such as int128
type TabularSlice interface {
Tabular
ColumnSlice(int, int) Column
}

type Relation interface {
Tabular
Showable
Expand Down Expand Up @@ -273,6 +281,8 @@ type listColumn[T any] struct {
cols []Column
}

var _ TabularSlice = &listColumn[int]{}

func (c listColumn[T]) Column(cnum int) Column {
return listItemColumn[T]{c.data, cnum, c.ncols}
}
Expand All @@ -287,6 +297,13 @@ func (c listColumn[T]) Columns() []Column {
return c.cols
}

func (c listColumn[T]) ColumnSlice(cnum int, width int) Column {
if width == 1 {
return listItemColumn[T]{c.data, cnum, c.ncols}
}
return listSliceColumn[T]{c.data, cnum, width, c.ncols}
}

func (c listColumn[T]) GetItem(rnum int, out []T) {
roffs := rnum * c.ncols
for cnum := 0; cnum < c.ncols; cnum++ {
Expand Down Expand Up @@ -431,6 +448,58 @@ func (c listItemColumn[T]) Value(rnum int) any {
return c.Item(rnum)
}

// Represents several sub-columns of a `listColumn` that represent one column for a composite type (e.g. int128)
type listSliceColumn[T any] struct {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this what we were calling compositeColumn on our call? I think I liked that name a bit better

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't call it that because this isn't the only way values made by composing primitive types together are stored as columns. Not a good general preference, but here I think we should be clear about how the struct works vs what user-level functionality it's intended for, given the complexity of all the various "column" transformations going on

data []T
cnum int
width int
ncols int
}

var _ TabularColumn[int] = &listSliceColumn[int]{}

func (c listSliceColumn[T]) Item(rnum int) []T {
out := make([]T, c.width)
c.GetItem(rnum, out)
return out
}

func (c listSliceColumn[T]) GetItem(rnum int, out []T) {
roffs := rnum * c.ncols
for i := 0; i < c.width; i++ {
out[i] = c.data[roffs+c.cnum+i]
}
}

func (c listSliceColumn[T]) NumCols() int {
return 1
}

func (c listSliceColumn[T]) Strings(rnum int) []string {
roffs := rnum * c.ncols
result := make([]string, c.width)
for i := 0; i < c.width; i++ {
result[i] = asString(c.data[roffs+c.cnum+i])
}
return result
}

func (c listSliceColumn[T]) NumRows() int {
return len(c.data) / c.ncols
}

func (c listSliceColumn[T]) String(rnum int) string {
return asString(c.Item(rnum))
}

func (c listSliceColumn[T]) Type() any {
return typeOf[T]()
}

func (c listSliceColumn[T]) Value(rnum int) any {
return c.Item(rnum)
}

type structColumn struct {
cols []Column
}
Expand Down Expand Up @@ -1544,7 +1613,7 @@ func newBuiltinValueColumn(vt ValueType, c Column, nrows int) Column {
case "FixedDecimal":
return newDecimalColumn(vt, c)
case "Hash":
return newUint128Column(c.(listColumn[uint64]))
return newUint128Column(c.(TabularColumn[uint64]))
case "Rational":
return newRationalColumn(c)
case "Missing":
Expand Down Expand Up @@ -1574,27 +1643,71 @@ func newSimpleValueColumn(vt ValueType, c Column, nrows int) Column {
return valueColumn{cols}
}

// getSliceWidth gets the corresponding width of an Arrow array column for
// a `t` that is one of the parts of a Signature
func getSliceWidth(t any) int {
switch tt := t.(type) {
case reflect.Type:
switch tt {
case Int128Type:
case Uint128Type:
return 2
default:
return 1
}
case ValueType:
ret := 0
for _, st := range t.(ValueType) {
ret += getSliceWidth(st)
}
return ret
}
return 0
}

// Projects a valueColumn from an underlying `Tabular` column.
func newTabularValueColumn(vt ValueType, c Tabular, nrows int) Column {
ncol := 0
ncols := len(vt)
cols := make([]Column, ncols)
for i, t := range vt {
var cc Column
switch tt := t.(type) {
case reflect.Type:
cc = newRelationColumn(tt, c.Column(ncol), nrows)
ncol++
case ValueType:
cc = newValueColumn(tt, c.Column(ncol), nrows)
ncol++
case string:
cc = newSymbolColumn(tt, nrows)
default:
cc = newLiteralColumn(tt, nrows)
tcols := len(vt)
cols := make([]Column, tcols)

if tsc, ok := c.(TabularSlice); ok {
for i, t := range vt {
sliceWidth := getSliceWidth(t)
var cc Column
switch tt := t.(type) {
case reflect.Type:
cc = newRelationColumn(tt, tsc.ColumnSlice(ncol, sliceWidth), nrows)
ncol += sliceWidth
case ValueType:
cc = newValueColumn(tt, tsc.ColumnSlice(ncol, sliceWidth), nrows)
ncol += sliceWidth
case string:
cc = newSymbolColumn(tt, nrows)
default:
cc = newLiteralColumn(tt, nrows)
}
cols[i] = cc
}
} else {
for i, t := range vt {
var cc Column
switch tt := t.(type) {
case reflect.Type:
cc = newRelationColumn(tt, c.Column(ncol), nrows)
ncol++
case ValueType:
cc = newValueColumn(tt, c.Column(ncol), nrows)
ncol++
case string:
cc = newSymbolColumn(tt, nrows)
default:
cc = newLiteralColumn(tt, nrows)
}
cols[i] = cc
}
cols[i] = cc
}

return valueColumn{cols}
}

Expand Down
70 changes: 65 additions & 5 deletions rai/results_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1236,21 +1236,81 @@ var valueTypeTests = []execTest{
}

var extraValueTypeTests = []execTest{
// Tests RAI-22855
{
query: `
module Foo
module Bar
value type MyType = Int, Int
value type MyType = UnsignedInt[64], FixedDecimal[128, 2]
def output = ^MyType[uint[64, 1], decimal[128, 2, 2/3]]`,
mdata: mdata("0.arrow", sig("output",
vtype("MyType", Uint64Type, vtype("rel:base:FixedDecimal", int64(128), int64(2), Int128Type)))),
pdata: xdata("0.arrow", sig(StructType),
row([]any{uint64(1), []uint64{67, 0}})),
rdata: xdata("0.arrow",
sig("output", vtype("MyType", Uint64Type, DecimalType)),
row("output", value("MyType", uint64(1),
NewDecimal128(67, 0, -2)))),
},
{
query: `
value type MyType = Hash
def h(x) = hash128["abc", _, x]
def output = ^MyType[h]`,
mdata: mdata("0.arrow", sig("output",
vtype("MyType", vtype("rel:base:Hash", Uint128Type)))),
pdata: xdata("0.arrow", sig(Uint64ListType),
row([]uint64{3877405323480549948, 3198683864092244389})),
rdata: xdata("0.arrow",
sig("output", vtype("MyType", BigIntType)),
row("output", value("MyType",
NewBigUint128(3877405323480549948, 3198683864092244389)))),
},
{
query: `
value type MyType = Hash, Hash
def h(x) = hash128["abc", _, x]
def output = ^MyType[h, h]`,
mdata: mdata("0.arrow", sig("output",
vtype("MyType", vtype("rel:base:Hash", Uint128Type), vtype("rel:base:Hash", Uint128Type)))),
pdata: xdata("0.arrow", sig(Uint64ListType),
row([]uint64{3877405323480549948, 3198683864092244389, 3877405323480549948, 3198683864092244389})),
rdata: xdata("0.arrow",
sig("output", vtype("MyType", BigIntType, BigIntType)),
row("output", value("MyType",
NewBigUint128(3877405323480549948, 3198683864092244389),
NewBigUint128(3877405323480549948, 3198683864092244389)))),
},
// End tests RAI-22855
{
query: `
module Foo
module Bar
value type MyType = Int, Int
end
end
end
def output = Foo:Bar:^MyType[12, 34]`,
def output = Foo:Bar:^MyType[12, 34]`,
mdata: mdata("0.arrow",
sig("output", vtype("Foo", "Bar", "MyType", Int64Type, Int64Type))),
pdata: xdata("0.arrow", sig(Int64ListType), row([]int64{12, 34})),
rdata: xdata("0.arrow",
sig("output", vtype("Foo", "Bar", "MyType", Int64Type, Int64Type)),
row("output", value("Foo", "Bar", "MyType", int64(12), int64(34)))),
},
// RAI-23484 There is a bug with nested value types
/*
{
query: `
value type Foo { Foo2 }
value type Foo2 {Int, SignedInt[128]}
def output { ^Foo[^Foo2[1, int128[2]]] }`,
mdata: mdata("0.arrow", sig("output",
vtype("Foo", vtype("Foo2", Int64Type, Int128Type)))),
pdata: xdata("0.arrow", sig(StructType),
row([]any{int64(1), []uint64{2, 0}})),
rdata: xdata("0.arrow",
sig("output", vtype("Foo", vtype("Foo2", Int64Type, BigIntType))),
row("output", value("Foo", vtype("Foo2", uint64(1), NewBigInt128(2, 0))))),
},
*/
}

var constValueTypeTests = []execTest{
Expand Down
Loading