Skip to content

Commit

Permalink
Support row based Array & fix varchar array type
Browse files Browse the repository at this point in the history
Signed-off-by: Congqi Xia <[email protected]>
  • Loading branch information
congqixia committed Nov 8, 2023
1 parent 40a4e1e commit 7c0a389
Show file tree
Hide file tree
Showing 6 changed files with 165 additions and 108 deletions.
22 changes: 16 additions & 6 deletions entity/columns.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,14 @@ func FieldDataColumn(fd *schema.FieldData, begin, end int) (Column, error) {
if data == nil {
return nil, errFieldDataTypeNotMatch
}
return parseArrayData(fd.GetFieldName(), data)
var arrayData []*schema.ScalarField
if end < 0 {
arrayData = data.GetData()[begin:]
} else {
arrayData = data.GetData()[begin:end]
}

return parseArrayData(fd.GetFieldName(), data.GetElementType(), arrayData)

case schema.DataType_JSON:
data, ok := fd.GetScalars().GetData().(*schema.ScalarField_JsonData)
Expand Down Expand Up @@ -304,9 +311,7 @@ func FieldDataColumn(fd *schema.FieldData, begin, end int) (Column, error) {
}
}

func parseArrayData(fieldName string, array *schema.ArrayArray) (Column, error) {
fieldDataList := array.Data
elementType := array.ElementType
func parseArrayData(fieldName string, elementType schema.DataType, fieldDataList []*schema.ScalarField) (Column, error) {

switch elementType {
case schema.DataType_Bool:
Expand Down Expand Up @@ -368,11 +373,16 @@ func parseArrayData(fieldName string, array *schema.ArrayArray) (Column, error)
}
return NewColumnDoubleArray(fieldName, data), nil

case schema.DataType_VarChar:
case schema.DataType_VarChar, schema.DataType_String:
var data [][][]byte
for _, fd := range fieldDataList {
data = append(data, fd.GetBytesData().GetData())
strs := fd.GetStringData().GetData()
bytesData := make([][]byte, 0, len(strs))
for _, str := range strs {
bytesData = append(bytesData, []byte(str))
}
}

return NewColumnVarCharArray(fieldName, data), nil

default:
Expand Down
107 changes: 107 additions & 0 deletions entity/columns_array.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package entity

import (
"fmt"

"github.com/cockroachdb/errors"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
)

// ColumnVarCharArray generated columns type for VarChar
type ColumnVarCharArray struct {
ColumnBase
name string
values [][][]byte
}

// Name returns column name
func (c *ColumnVarCharArray) Name() string {
return c.name
}

// Type returns column FieldType
func (c *ColumnVarCharArray) Type() FieldType {
return FieldTypeArray
}

// Len returns column values length
func (c *ColumnVarCharArray) Len() int {
return len(c.values)
}

// Get returns value at index as interface{}.
func (c *ColumnVarCharArray) Get(idx int) (interface{}, error) {
var r []string // use default value
if idx < 0 || idx >= c.Len() {
return r, errors.New("index out of range")
}
return c.values[idx], nil
}

// FieldData return column data mapped to schemapb.FieldData
func (c *ColumnVarCharArray) FieldData() *schemapb.FieldData {
fd := &schemapb.FieldData{
Type: schemapb.DataType_Array,
FieldName: c.name,
}

data := make([]*schemapb.ScalarField, 0, c.Len())
for _, arr := range c.values {
converted := make([]string, 0, c.Len())
for i := 0; i < len(arr); i++ {
converted = append(converted, string(arr[i]))
}
data = append(data, &schemapb.ScalarField{
Data: &schemapb.ScalarField_StringData{
StringData: &schemapb.StringArray{
Data: converted,
},
},
})
}
fd.Field = &schemapb.FieldData_Scalars{
Scalars: &schemapb.ScalarField{
Data: &schemapb.ScalarField_ArrayData{
ArrayData: &schemapb.ArrayArray{
Data: data,
ElementType: schemapb.DataType_VarChar,
},
},
},
}
return fd
}

// ValueByIdx returns value of the provided index
// error occurs when index out of range
func (c *ColumnVarCharArray) ValueByIdx(idx int) ([][]byte, error) {
var r [][]byte // use default value
if idx < 0 || idx >= c.Len() {
return r, errors.New("index out of range")
}
return c.values[idx], nil
}

// AppendValue append value into column
func (c *ColumnVarCharArray) AppendValue(i interface{}) error {
v, ok := i.([][]byte)
if !ok {
return fmt.Errorf("invalid type, expected []string, got %T", i)
}
c.values = append(c.values, v)

return nil
}

// Data returns column data
func (c *ColumnVarCharArray) Data() [][][]byte {
return c.values
}

// NewColumnVarChar auto generated constructor
func NewColumnVarCharArray(name string, values [][][]byte) *ColumnVarCharArray {
return &ColumnVarCharArray{
name: name,
values: values,
}
}
98 changes: 0 additions & 98 deletions entity/columns_array_gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion entity/genarray/gen_array.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func main() {
entity.FieldTypeInt64,
entity.FieldTypeFloat,
entity.FieldTypeDouble,
entity.FieldTypeVarChar,
// entity.FieldTypeVarChar, change to hand written
}

pf := func(ft entity.FieldType) interface{} {
Expand Down
40 changes: 39 additions & 1 deletion entity/rows.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@ package entity

import (
"encoding/json"
"errors"
"fmt"
"go/ast"
"reflect"
"strconv"
"strings"

"github.com/cockroachdb/errors"
)

const (
Expand Down Expand Up @@ -324,6 +325,12 @@ func AnyToColumns(rows []interface{}, schemas ...*Schema) ([]Column, error) {
data := make([][]byte, 0, rowsLen)
col := NewColumnJSONBytes(field.Name, data)
nameColumns[field.Name] = col
case FieldTypeArray:
col := NewArrayColumn(field)
if col == nil {
return nil, errors.Errorf("unsupported element type %s for Array", field.ElementType.String())
}
nameColumns[field.Name] = col
case FieldTypeFloatVector:
data := make([][]float32, 0, rowsLen)
dimStr, has := field.TypeParams[TypeParamDim]
Expand Down Expand Up @@ -415,6 +422,37 @@ func AnyToColumns(rows []interface{}, schemas ...*Schema) ([]Column, error) {
return columns, nil
}

func NewArrayColumn(f *Field) Column {
switch f.ElementType {
case FieldTypeBool:
return NewColumnBoolArray(f.Name, nil)

case FieldTypeInt8:
return NewColumnInt8Array(f.Name, nil)

case FieldTypeInt16:
return NewColumnInt16Array(f.Name, nil)

case FieldTypeInt32:
return NewColumnInt32Array(f.Name, nil)

case FieldTypeInt64:
return NewColumnInt64Array(f.Name, nil)

case FieldTypeFloat:
return NewColumnFloatArray(f.Name, nil)

case FieldTypeDouble:
return NewColumnDoubleArray(f.Name, nil)

case FieldTypeVarChar:
return NewColumnVarCharArray(f.Name, nil)

default:
return nil
}
}

// RowsToColumns rows to columns
func RowsToColumns(rows []Row, schemas ...*Schema) ([]Column, error) {
anys := make([]interface{}, 0, len(rows))
Expand Down
4 changes: 2 additions & 2 deletions entity/schema.go
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ func (t FieldType) String() string {
case FieldTypeString:
return "string"
case FieldTypeVarChar:
return "[]byte"
return "string"
case FieldTypeArray:
return "Array"
case FieldTypeJSON:
Expand Down Expand Up @@ -425,7 +425,7 @@ func (t FieldType) PbFieldType() (string, string) {
case FieldTypeString:
return "String", "string"
case FieldTypeVarChar:
return "Bytes", "[]byte"
return "VarChar", "string"
case FieldTypeJSON:
return "JSON", "JSON"
case FieldTypeBinaryVector:
Expand Down

0 comments on commit 7c0a389

Please sign in to comment.