From 3f53bfabeb9db81160b00070f4645a3a90e235df Mon Sep 17 00:00:00 2001 From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com> Date: Wed, 26 Jun 2024 14:23:38 +0200 Subject: [PATCH] moving row scanning into the data chunk api --- data_chunk.go | 41 ++++++ rows.go | 331 ++++++---------------------------------------- vector.go | 81 ++++++++++++ vector_getters.go | 172 ++++++++++++++++++++++++ 4 files changed, 334 insertions(+), 291 deletions(-) create mode 100644 vector_getters.go diff --git a/data_chunk.go b/data_chunk.go index 8b692496..49311bc2 100644 --- a/data_chunk.go +++ b/data_chunk.go @@ -16,6 +16,8 @@ type DataChunk struct { data C.duckdb_data_chunk // columns is a helper slice providing direct access to all columns. columns []vector + // columnNames holds the column names, if known. + columnNames []string } // GetDataChunkCapacity returns the capacity of a data chunk. @@ -23,6 +25,11 @@ func GetDataChunkCapacity() int { return int(C.duckdb_vector_size()) } +// GetSize returns the internal size of the data chunk. +func (chunk *DataChunk) GetSize() int { + return int(C.duckdb_data_chunk_get_size(chunk.data)) +} + // SetSize sets the internal size of the data chunk. Cannot exceed GetCapacity(). func (chunk *DataChunk) SetSize(size int) error { if size > GetDataChunkCapacity() { @@ -32,6 +39,15 @@ func (chunk *DataChunk) SetSize(size int) error { return nil } +// GetValue returns a single value of a column. +func (chunk *DataChunk) GetValue(colIdx int, rowIdx int) (any, error) { + if colIdx >= len(chunk.columns) { + return nil, getError(errAPI, columnCountError(colIdx, len(chunk.columns))) + } + column := &chunk.columns[colIdx] + return column.getFn(column, C.idx_t(rowIdx)), nil +} + // SetValue writes a single value to a column in a data chunk. Note that this requires casting the type for each invocation. // NOTE: Custom ENUM types must be passed as string. func (chunk *DataChunk) SetValue(colIdx int, rowIdx int, val any) error { @@ -56,6 +72,7 @@ func (chunk *DataChunk) SetValue(colIdx int, rowIdx int, val any) error { } func (chunk *DataChunk) initFromTypes(ptr unsafe.Pointer, types []C.duckdb_logical_type) error { + // NOTE: initFromTypes does not initialize the column names. columnCount := len(types) // Initialize the callback functions to read and write values. @@ -83,6 +100,30 @@ func (chunk *DataChunk) initFromTypes(ptr unsafe.Pointer, types []C.duckdb_logic return nil } +func (chunk *DataChunk) initFromDuckDataChunk(data C.duckdb_data_chunk) error { + columnCount := int(C.duckdb_data_chunk_get_column_count(data)) + chunk.columns = make([]vector, columnCount) + chunk.data = data + + var err error + for i := 0; i < columnCount; i++ { + duckdbVector := C.duckdb_data_chunk_get_vector(data, C.idx_t(i)) + + // Initialize the callback functions to read and write values. + logicalType := C.duckdb_vector_get_column_type(duckdbVector) + err = chunk.columns[i].init(logicalType, i) + C.duckdb_destroy_logical_type(&logicalType) + if err != nil { + break + } + + // Initialize the vectors and their child vectors. + chunk.columns[i].duckdbVector = duckdbVector + chunk.columns[i].getChildVectors(duckdbVector) + } + return err +} + func (chunk *DataChunk) close() { C.duckdb_destroy_data_chunk(&chunk.data) } diff --git a/rows.go b/rows.go index 8c986170..0fbdac7a 100644 --- a/rows.go +++ b/rows.go @@ -7,7 +7,6 @@ import "C" import ( "database/sql/driver" - "errors" "fmt" "io" "math/big" @@ -17,160 +16,71 @@ import ( "unsafe" ) +// rows is a helper struct for scanning a duckdb result. type rows struct { - res C.duckdb_result - stmt *stmt - chunk C.duckdb_data_chunk - columns []string - chunkCount C.idx_t - chunkRowCount C.idx_t - chunkIdx C.idx_t - chunkRowIdx C.idx_t + // stmt is a pointer to the stmt of which we are scanning the result. + stmt *stmt + // res is the result of stmt. + res C.duckdb_result + // chunk holds the currently active data chunk. + chunk DataChunk + // chunkCount is the number of chunks in the result. + chunkCount C.idx_t + // chunkIdx is the chunk index in the result. + chunkIdx C.idx_t + // rowCount is the number of scanned rows. + rowCount int } func newRowsWithStmt(res C.duckdb_result, stmt *stmt) *rows { - n := C.duckdb_column_count(&res) - columns := make([]string, 0, n) - for i := C.idx_t(0); i < n; i++ { - columns = append(columns, C.GoString(C.duckdb_column_name(&res, i))) + columnCount := C.duckdb_column_count(&res) + r := rows{ + res: res, + stmt: stmt, + chunk: DataChunk{}, + chunkCount: C.duckdb_result_chunk_count(res), + chunkIdx: 0, + rowCount: 0, } - return &rows{ - res: res, - stmt: stmt, - columns: columns, - chunkCount: C.duckdb_result_chunk_count(res), - chunkRowCount: 0, - chunkIdx: 0, - chunkRowIdx: 0, + for i := C.idx_t(0); i < columnCount; i++ { + columnName := C.GoString(C.duckdb_column_name(&res, i)) + r.chunk.columnNames = append(r.chunk.columnNames, columnName) } + return &r } func (r *rows) Columns() []string { - return r.columns + return r.chunk.columnNames } func (r *rows) Next(dst []driver.Value) error { - for r.chunkRowIdx == r.chunkRowCount { - C.duckdb_destroy_data_chunk(&r.chunk) + for r.rowCount == r.chunk.GetSize() { + r.chunk.close() if r.chunkIdx == r.chunkCount { return io.EOF } - r.chunk = C.duckdb_result_get_chunk(r.res, r.chunkIdx) + data := C.duckdb_result_get_chunk(r.res, r.chunkIdx) + if err := r.chunk.initFromDuckDataChunk(data); err != nil { + return getError(err, nil) + } + r.chunkIdx++ - r.chunkRowCount = C.duckdb_data_chunk_get_size(r.chunk) - r.chunkRowIdx = 0 + r.rowCount = 0 } - colCount := len(r.columns) - - for colIdx := C.idx_t(0); colIdx < C.idx_t(colCount); colIdx++ { - vector := C.duckdb_data_chunk_get_vector(r.chunk, colIdx) - value, err := scanValue(vector, r.chunkRowIdx) - if err != nil { + columnCount := len(r.chunk.columns) + for colIdx := 0; colIdx < columnCount; colIdx++ { + var err error + if dst[colIdx], err = r.chunk.GetValue(colIdx, r.rowCount); err != nil { return err } - dst[colIdx] = value } - r.chunkRowIdx++ - + r.rowCount++ return nil } -func scanValue(vector C.duckdb_vector, rowIdx C.idx_t) (any, error) { - v, err := scan(vector, rowIdx) - if err != nil { - return nil, err - } - - switch value := v.(type) { - case map[string]any, []any, Map, driver.Value: - return value, nil - case nil: - return nil, nil - default: - panic(fmt.Sprintf("BUG: found unexpected type when scanning: %T", value)) - } -} - -func scan(vector C.duckdb_vector, rowIdx C.idx_t) (any, error) { - validity := C.duckdb_vector_get_validity(vector) - if !C.duckdb_validity_row_is_valid(validity, rowIdx) { - return nil, nil - } - - columnType := C.duckdb_vector_get_column_type(vector) - defer C.duckdb_destroy_logical_type(&columnType) - - typeId := C.duckdb_get_type_id(columnType) - switch typeId { - case C.DUCKDB_TYPE_INVALID: - return nil, errInvalidType - case C.DUCKDB_TYPE_BOOLEAN: - return get[bool](vector, rowIdx), nil - case C.DUCKDB_TYPE_TINYINT: - return get[int8](vector, rowIdx), nil - case C.DUCKDB_TYPE_SMALLINT: - return get[int16](vector, rowIdx), nil - case C.DUCKDB_TYPE_INTEGER: - return get[int32](vector, rowIdx), nil - case C.DUCKDB_TYPE_BIGINT: - return get[int64](vector, rowIdx), nil - case C.DUCKDB_TYPE_UTINYINT: - return get[uint8](vector, rowIdx), nil - case C.DUCKDB_TYPE_USMALLINT: - return get[uint16](vector, rowIdx), nil - case C.DUCKDB_TYPE_UINTEGER: - return get[uint32](vector, rowIdx), nil - case C.DUCKDB_TYPE_UBIGINT: - return get[uint64](vector, rowIdx), nil - case C.DUCKDB_TYPE_FLOAT: - return get[float32](vector, rowIdx), nil - case C.DUCKDB_TYPE_DOUBLE: - return get[float64](vector, rowIdx), nil - case C.DUCKDB_TYPE_TIMESTAMP: - return time.UnixMicro(int64(get[C.duckdb_timestamp](vector, rowIdx).micros)).UTC(), nil - case C.DUCKDB_TYPE_DATE: - date := C.duckdb_from_date(get[C.duckdb_date](vector, rowIdx)) - return time.Date(int(date.year), time.Month(date.month), int(date.day), 0, 0, 0, 0, time.UTC), nil - case C.DUCKDB_TYPE_TIME: - return time.UnixMicro(int64(get[C.duckdb_time](vector, rowIdx).micros)).UTC(), nil - case C.DUCKDB_TYPE_INTERVAL: - return scanInterval(vector, rowIdx) - case C.DUCKDB_TYPE_HUGEINT: - hugeInt := get[C.duckdb_hugeint](vector, rowIdx) - return hugeIntToNative(hugeInt), nil - case C.DUCKDB_TYPE_VARCHAR: - return scanString(vector, rowIdx), nil - case C.DUCKDB_TYPE_BLOB: - return scanBlob(vector, rowIdx), nil - case C.DUCKDB_TYPE_DECIMAL: - return scanDecimal(columnType, vector, rowIdx) - case C.DUCKDB_TYPE_TIMESTAMP_S: - return time.Unix(int64(get[C.duckdb_timestamp](vector, rowIdx).micros), 0).UTC(), nil - case C.DUCKDB_TYPE_TIMESTAMP_MS: - return time.UnixMilli(int64(get[C.duckdb_timestamp](vector, rowIdx).micros)).UTC(), nil - case C.DUCKDB_TYPE_TIMESTAMP_NS: - return time.Unix(0, int64(get[C.duckdb_timestamp](vector, rowIdx).micros)).UTC(), nil - case C.DUCKDB_TYPE_ENUM: - return scanENUM(columnType, vector, rowIdx) - case C.DUCKDB_TYPE_LIST: - return scanList(vector, rowIdx) - case C.DUCKDB_TYPE_STRUCT: - return scanStruct(columnType, vector, rowIdx) - case C.DUCKDB_TYPE_MAP: - return scanMap(vector, rowIdx) - case C.DUCKDB_TYPE_UUID: - hugeInt := get[C.duckdb_hugeint](vector, rowIdx) - return hugeIntToUUID(hugeInt), nil - case C.DUCKDB_TYPE_TIMESTAMP_TZ: - return time.UnixMicro(int64(get[C.duckdb_timestamp](vector, rowIdx).micros)).UTC(), nil - default: - return nil, fmt.Errorf("unsupported type %d", typeId) - } -} - // Implements driver.RowsColumnTypeScanType func (r *rows) ColumnTypeScanType(index int) reflect.Type { colType := C.duckdb_column_type(&r.res, C.idx_t(index)) @@ -262,7 +172,7 @@ func (r *rows) ColumnTypeDatabaseTypeName(index int) string { } func (r *rows) Close() error { - C.duckdb_destroy_data_chunk(&r.chunk) + r.chunk.close() C.duckdb_destroy_result(&r.res) var err error @@ -273,170 +183,9 @@ func (r *rows) Close() error { } r.stmt = nil } - return err } -func get[T any](vector C.duckdb_vector, rowIdx C.idx_t) T { - ptr := C.duckdb_vector_get_data(vector) - xs := (*[1 << 31]T)(ptr) - return xs[rowIdx] -} - -func scanMap(vector C.duckdb_vector, rowIdx C.idx_t) (Map, error) { - list, err := scanList(vector, rowIdx) - if err != nil { - return nil, err - } - - // DuckDB supports more map key types than Go, which only supports comparable types. - // To avoid a panic, we check that the map key type is comparable. - // All keys in a DuckDB map have the same type, so we just do this check for the first value. - if len(list) > 0 { - mapItem := list[0].(map[string]any) - key, ok := mapItem["key"] - if !ok { - return nil, errMissingKeyOrValue - } - if !reflect.TypeOf(key).Comparable() { - return nil, getError(errUnsupportedMapKeyType, nil) - } - } - - out := Map{} - for i := 0; i < len(list); i++ { - mapItem := list[i].(map[string]any) - key, ok := mapItem["key"] - if !ok { - return nil, errMissingKeyOrValue - } - val, ok := mapItem["value"] - if !ok { - return nil, errMissingKeyOrValue - } - out[key] = val - } - - return out, nil -} - -func scanString(vector C.duckdb_vector, rowIdx C.idx_t) string { - return string(scanBlob(vector, rowIdx)) -} - -// duckdb/tools/juliapkg/src/ctypes.jl -// `json`, `varchar`, and `blob` are C-style char arrays -func scanBlob(vector C.duckdb_vector, rowIdx C.idx_t) []byte { - // we don't have to free s.ptr, as it is part of the data in the vector - s := get[duckdb_string_t](vector, rowIdx) - - if s.length <= stringInlineLength { - // inlined data is stored from byte 4..16 (up to 12 bytes) - return C.GoBytes(unsafe.Pointer(&s.prefix), C.int(s.length)) - } - - // any longer strings are stored as a pointer in `ptr` - return C.GoBytes(unsafe.Pointer(s.ptr), C.int(s.length)) -} - -func scanList(vector C.duckdb_vector, rowIdx C.idx_t) ([]any, error) { - data := C.duckdb_list_vector_get_child(vector) - entry := get[duckdb_list_entry_t](vector, rowIdx) - converted := make([]any, 0, entry.length) - - for i := entry.offset; i < entry.offset+entry.length; i++ { - value, err := scan(data, i) - if err != nil { - return nil, err - } - converted = append(converted, value) - } - - return converted, nil -} - -func scanStruct(ty C.duckdb_logical_type, vector C.duckdb_vector, rowIdx C.idx_t) (map[string]any, error) { - data := map[string]any{} - - for j := C.idx_t(0); j < C.duckdb_struct_type_child_count(ty); j++ { - ptrToChildName := C.duckdb_struct_type_child_name(ty, j) - name := C.GoString(ptrToChildName) - C.duckdb_free(unsafe.Pointer(ptrToChildName)) - - child := C.duckdb_struct_vector_get_child(vector, j) - value, err := scan(child, rowIdx) - if err != nil { - return nil, err - } - data[name] = value - } - return data, nil -} - -func scanDecimal(ty C.duckdb_logical_type, vector C.duckdb_vector, rowIdx C.idx_t) (Decimal, error) { - scale := C.duckdb_decimal_scale(ty) - width := C.duckdb_decimal_width(ty) - var nativeValue *big.Int - switch C.duckdb_decimal_internal_type(ty) { - case C.DUCKDB_TYPE_SMALLINT: - nativeValue = big.NewInt(int64(get[int16](vector, rowIdx))) - case C.DUCKDB_TYPE_INTEGER: - nativeValue = big.NewInt(int64(get[int32](vector, rowIdx))) - case C.DUCKDB_TYPE_BIGINT: - nativeValue = big.NewInt(get[int64](vector, rowIdx)) - case C.DUCKDB_TYPE_HUGEINT: - i := get[C.duckdb_hugeint](vector, rowIdx) - nativeValue = hugeIntToNative(C.duckdb_hugeint{ - lower: i.lower, - upper: i.upper, - }) - default: - return Decimal{}, errInvalidType - } - - if nativeValue == nil { - return Decimal{}, fmt.Errorf("unable to convert hugeint to native type") - } - - return Decimal{Width: uint8(width), Scale: uint8(scale), Value: nativeValue}, nil -} - -func scanInterval(vector C.duckdb_vector, rowIdx C.idx_t) (Interval, error) { - i := get[C.duckdb_interval](vector, rowIdx) - data := Interval{ - Days: int32(i.days), - Months: int32(i.months), - Micros: int64(i.micros), - } - return data, nil -} - -func scanENUM(ty C.duckdb_logical_type, vector C.duckdb_vector, rowIdx C.idx_t) (string, error) { - var idx uint64 - internalType := C.duckdb_enum_internal_type(ty) - switch internalType { - case C.DUCKDB_TYPE_UTINYINT: - idx = uint64(get[uint8](vector, rowIdx)) - case C.DUCKDB_TYPE_USMALLINT: - idx = uint64(get[uint16](vector, rowIdx)) - case C.DUCKDB_TYPE_UINTEGER: - idx = uint64(get[uint32](vector, rowIdx)) - case C.DUCKDB_TYPE_UBIGINT: - idx = get[uint64](vector, rowIdx) - default: - return "", errInvalidType - } - - val := C.duckdb_enum_dictionary_value(ty, (C.idx_t)(idx)) - defer C.duckdb_free(unsafe.Pointer(val)) - return C.GoString(val), nil -} - -var ( - errInvalidType = errors.New("invalid data type") - errMissingKeyOrValue = errors.New("missing key and/or value for map item") -) - func logicalTypeName(lt C.duckdb_logical_type) string { t := C.duckdb_get_type_id(lt) switch t { diff --git a/vector.go b/vector.go index e7e1d57c..ac05eb2a 100644 --- a/vector.go +++ b/vector.go @@ -18,6 +18,8 @@ import ( type vector struct { // The underlying DuckDB vector. duckdbVector C.duckdb_vector + // A callback function to get a value from this vector. + getFn fnGetVectorValue // A callback function to write to this vector. setFn fnSetVectorValue // The data type of the vector. @@ -327,6 +329,12 @@ func (vec *vector) getChildVectors(vector C.duckdb_vector) { } func initPrimitive[T any](vec *vector, duckdbType C.duckdb_type) { + vec.getFn = func(vec *vector, rowIdx C.idx_t) any { + if vec.getNull(rowIdx) { + return nil + } + return getPrimitive[T](vec, rowIdx) + } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { if val == nil { vec.setNull(rowIdx) @@ -338,6 +346,12 @@ func initPrimitive[T any](vec *vector, duckdbType C.duckdb_type) { } func (vec *vector) initTS(duckdbType C.duckdb_type) { + vec.getFn = func(vec *vector, rowIdx C.idx_t) any { + if vec.getNull(rowIdx) { + return nil + } + return vec.getTS(duckdbType, rowIdx) + } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { if val == nil { vec.setNull(rowIdx) @@ -349,6 +363,12 @@ func (vec *vector) initTS(duckdbType C.duckdb_type) { } func (vec *vector) initDate() { + vec.getFn = func(vec *vector, rowIdx C.idx_t) any { + if vec.getNull(rowIdx) { + return nil + } + return vec.getDate(rowIdx) + } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { if val == nil { vec.setNull(rowIdx) @@ -360,6 +380,12 @@ func (vec *vector) initDate() { } func (vec *vector) initTime() { + vec.getFn = func(vec *vector, rowIdx C.idx_t) any { + if vec.getNull(rowIdx) { + return nil + } + return vec.getTime(rowIdx) + } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { if val == nil { vec.setNull(rowIdx) @@ -371,6 +397,12 @@ func (vec *vector) initTime() { } func (vec *vector) initInterval() { + vec.getFn = func(vec *vector, rowIdx C.idx_t) any { + if vec.getNull(rowIdx) { + return nil + } + return vec.getInterval(rowIdx) + } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { if val == nil { vec.setNull(rowIdx) @@ -382,6 +414,12 @@ func (vec *vector) initInterval() { } func (vec *vector) initHugeint() { + vec.getFn = func(vec *vector, rowIdx C.idx_t) any { + if vec.getNull(rowIdx) { + return nil + } + return vec.getHugeint(rowIdx) + } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { if val == nil { vec.setNull(rowIdx) @@ -393,6 +431,12 @@ func (vec *vector) initHugeint() { } func (vec *vector) initCString(duckdbType C.duckdb_type) { + vec.getFn = func(vec *vector, rowIdx C.idx_t) any { + if vec.getNull(rowIdx) { + return nil + } + return vec.getCString(rowIdx) + } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { if val == nil { vec.setNull(rowIdx) @@ -410,6 +454,12 @@ func (vec *vector) initDecimal(logicalType C.duckdb_logical_type, colIdx int) er internalType := C.duckdb_decimal_internal_type(logicalType) switch internalType { case C.DUCKDB_TYPE_SMALLINT, C.DUCKDB_TYPE_INTEGER, C.DUCKDB_TYPE_BIGINT, C.DUCKDB_TYPE_HUGEINT: + vec.getFn = func(vec *vector, rowIdx C.idx_t) any { + if vec.getNull(rowIdx) { + return nil + } + return vec.getDecimal(internalType, rowIdx) + } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { if val == nil { vec.setNull(rowIdx) @@ -439,6 +489,12 @@ func (vec *vector) initEnum(logicalType C.duckdb_logical_type, colIdx int) error internalType := C.duckdb_enum_internal_type(logicalType) switch internalType { case C.DUCKDB_TYPE_UTINYINT, C.DUCKDB_TYPE_USMALLINT, C.DUCKDB_TYPE_UINTEGER, C.DUCKDB_TYPE_UBIGINT: + vec.getFn = func(vec *vector, rowIdx C.idx_t) any { + if vec.getNull(rowIdx) { + return nil + } + return vec.getEnum(internalType, rowIdx) + } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { if val == nil { vec.setNull(rowIdx) @@ -466,6 +522,12 @@ func (vec *vector) initList(logicalType C.duckdb_logical_type, colIdx int) error return err } + vec.getFn = func(vec *vector, rowIdx C.idx_t) any { + if vec.getNull(rowIdx) { + return nil + } + return vec.getList(rowIdx) + } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { if val == nil { vec.setNull(rowIdx) @@ -500,6 +562,12 @@ func (vec *vector) initStruct(logicalType C.duckdb_logical_type, colIdx int) err } } + vec.getFn = func(vec *vector, rowIdx C.idx_t) any { + if vec.getNull(rowIdx) { + return nil + } + return vec.getStruct(rowIdx) + } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { if val == nil { vec.setNull(rowIdx) @@ -536,6 +604,12 @@ func (vec *vector) initMap(logicalType C.duckdb_logical_type, colIdx int) error return columnError(errUnsupportedMapKeyType, colIdx) } + vec.getFn = func(vec *vector, rowIdx C.idx_t) any { + if vec.getNull(rowIdx) { + return nil + } + return vec.getMap(rowIdx) + } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { if val == nil { vec.setNull(rowIdx) @@ -548,6 +622,13 @@ func (vec *vector) initMap(logicalType C.duckdb_logical_type, colIdx int) error } func (vec *vector) initUUID() { + vec.getFn = func(vec *vector, rowIdx C.idx_t) any { + if vec.getNull(rowIdx) { + return nil + } + hugeInt := getPrimitive[C.duckdb_hugeint](vec, rowIdx) + return hugeIntToUUID(hugeInt) + } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { if val == nil { vec.setNull(rowIdx) diff --git a/vector_getters.go b/vector_getters.go new file mode 100644 index 00000000..b97a42c3 --- /dev/null +++ b/vector_getters.go @@ -0,0 +1,172 @@ +package duckdb + +/* +#include +#include +*/ +import "C" + +import ( + "math/big" + "time" + "unsafe" +) + +// fnGetVectorValue is the getter callback function for any (nested) vector. +type fnGetVectorValue func(vec *vector, rowIdx C.idx_t) any + +func (vec *vector) getNull(rowIdx C.idx_t) bool { + mask := C.duckdb_vector_get_validity(vec.duckdbVector) + return !bool(C.duckdb_validity_row_is_valid(mask, rowIdx)) +} + +func getPrimitive[T any](vec *vector, rowIdx C.idx_t) T { + ptr := C.duckdb_vector_get_data(vec.duckdbVector) + xs := (*[1 << 31]T)(ptr) + return xs[rowIdx] +} + +func (vec *vector) getTS(duckdbType C.duckdb_type, rowIdx C.idx_t) time.Time { + val := getPrimitive[C.duckdb_timestamp](vec, rowIdx) + micros := val.micros + + switch duckdbType { + case C.DUCKDB_TYPE_TIMESTAMP: + return time.UnixMicro(int64(micros)).UTC() + case C.DUCKDB_TYPE_TIMESTAMP_S: + return time.Unix(int64(micros), 0).UTC() + case C.DUCKDB_TYPE_TIMESTAMP_MS: + return time.UnixMilli(int64(micros)).UTC() + case C.DUCKDB_TYPE_TIMESTAMP_NS: + return time.Unix(0, int64(micros)).UTC() + case C.DUCKDB_TYPE_TIMESTAMP_TZ: + return time.UnixMicro(int64(micros)).UTC() + } + + return time.Time{} +} + +func (vec *vector) getDate(rowIdx C.idx_t) time.Time { + primitiveDate := getPrimitive[C.duckdb_date](vec, rowIdx) + date := C.duckdb_from_date(primitiveDate) + return time.Date(int(date.year), time.Month(date.month), int(date.day), 0, 0, 0, 0, time.UTC) +} + +func (vec *vector) getTime(rowIdx C.idx_t) time.Time { + val := getPrimitive[C.duckdb_time](vec, rowIdx) + micros := val.micros + return time.UnixMicro(int64(micros)).UTC() +} + +func (vec *vector) getInterval(rowIdx C.idx_t) Interval { + val := getPrimitive[C.duckdb_interval](vec, rowIdx) + interval := Interval{ + Days: int32(val.days), + Months: int32(val.months), + Micros: int64(val.micros), + } + return interval +} + +func (vec *vector) getHugeint(rowIdx C.idx_t) *big.Int { + hugeInt := getPrimitive[C.duckdb_hugeint](vec, rowIdx) + return hugeIntToNative(hugeInt) +} + +func (vec *vector) getCString(rowIdx C.idx_t) any { + cStr := getPrimitive[duckdb_string_t](vec, rowIdx) + + var blob []byte + if cStr.length <= stringInlineLength { + // Inlined data is stored from byte 4 to stringInlineLength + 4. + blob = C.GoBytes(unsafe.Pointer(&cStr.prefix), C.int(cStr.length)) + } else { + // Any strings exceeding stringInlineLength are stored as a pointer in `ptr`. + blob = C.GoBytes(unsafe.Pointer(cStr.ptr), C.int(cStr.length)) + } + + if vec.duckdbType == C.DUCKDB_TYPE_VARCHAR { + return string(blob) + } + return blob +} + +func (vec *vector) getDecimal(internalType C.duckdb_type, rowIdx C.idx_t) Decimal { + var val *big.Int + switch internalType { + case C.DUCKDB_TYPE_SMALLINT: + v := getPrimitive[int16](vec, rowIdx) + val = big.NewInt(int64(v)) + case C.DUCKDB_TYPE_INTEGER: + v := getPrimitive[int32](vec, rowIdx) + val = big.NewInt(int64(v)) + case C.DUCKDB_TYPE_BIGINT: + v := getPrimitive[int64](vec, rowIdx) + val = big.NewInt(v) + case C.DUCKDB_TYPE_HUGEINT: + v := getPrimitive[C.duckdb_hugeint](vec, rowIdx) + val = hugeIntToNative(C.duckdb_hugeint{ + lower: v.lower, + upper: v.upper, + }) + } + + return Decimal{Width: vec.width, Scale: vec.scale, Value: val} +} + +func (vec *vector) getEnum(internalType C.duckdb_type, rowIdx C.idx_t) string { + var idx uint64 + switch internalType { + case C.DUCKDB_TYPE_UTINYINT: + idx = uint64(getPrimitive[uint8](vec, rowIdx)) + case C.DUCKDB_TYPE_USMALLINT: + idx = uint64(getPrimitive[uint16](vec, rowIdx)) + case C.DUCKDB_TYPE_UINTEGER: + idx = uint64(getPrimitive[uint32](vec, rowIdx)) + case C.DUCKDB_TYPE_UBIGINT: + idx = getPrimitive[uint64](vec, rowIdx) + } + + logicalType := C.duckdb_vector_get_column_type(vec.duckdbVector) + defer C.duckdb_destroy_logical_type(&logicalType) + + val := C.duckdb_enum_dictionary_value(logicalType, (C.idx_t)(idx)) + defer C.duckdb_free(unsafe.Pointer(val)) + return C.GoString(val) +} + +func (vec *vector) getList(rowIdx C.idx_t) []any { + entry := getPrimitive[duckdb_list_entry_t](vec, rowIdx) + slice := make([]any, 0, entry.length) + childVector := &vec.childVectors[0] + + // Fill the slice with all child values. + for i := C.idx_t(0); i < entry.length; i++ { + val := childVector.getFn(childVector, i+entry.offset) + slice = append(slice, val) + } + return slice +} + +func (vec *vector) getStruct(rowIdx C.idx_t) map[string]any { + m := map[string]any{} + for i := 0; i < len(vec.childVectors); i++ { + childVector := &vec.childVectors[i] + val := childVector.getFn(childVector, rowIdx) + m[vec.childNames[i]] = val + } + return m +} + +func (vec *vector) getMap(rowIdx C.idx_t) Map { + list := vec.getList(rowIdx) + + m := Map{} + for i := 0; i < len(list); i++ { + mapItem := list[i].(map[string]any) + key := mapItem[mapKeysField()] + val := mapItem[mapValuesField()] + m[key] = val + } + return m +}