From 0fc45509f5d41d910674d599a876b18ae27a8076 Mon Sep 17 00:00:00 2001 From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com> Date: Fri, 7 Jun 2024 09:37:36 +0200 Subject: [PATCH 1/9] moving the appender into the data chunk API --- appender.go | 141 ++++++----------- data_chunk.go | 104 +++++++++++++ helper.go | 29 ++++ appender_vector.go => vector.go | 264 ++++++++++++-------------------- vector_setters.go | 109 +++++++++++++ 5 files changed, 388 insertions(+), 259 deletions(-) create mode 100644 data_chunk.go create mode 100644 helper.go rename appender_vector.go => vector.go (68%) create mode 100644 vector_setters.go diff --git a/appender.go b/appender.go index 4182feda..ed043bd5 100644 --- a/appender.go +++ b/appender.go @@ -19,13 +19,14 @@ type Appender struct { duckdbAppender C.duckdb_appender closed bool - chunks []C.duckdb_data_chunk - currSize C.idx_t - colTypes []C.duckdb_logical_type - colTypesPtr unsafe.Pointer - - // The vector storage of each column in the data chunk. - vectors []vector + // The appender storage before flushing any data. + chunks []DataChunk + // The column types of the table to append to. + types []C.duckdb_logical_type + // A pointer to the allocated memory of the column types. + ptr unsafe.Pointer + // The number of appended rows. + rowCount int } // NewAppenderFromConn returns a new Appender from a DuckDB driver connection. @@ -62,30 +63,25 @@ func NewAppenderFromConn(driverConn driver.Conn, schema, table string) (*Appende schema: schema, table: table, duckdbAppender: duckdbAppender, - currSize: 0, + rowCount: 0, } - columnCount := int(C.duckdb_appender_column_count(duckdbAppender)) - a.colTypesPtr, a.colTypes = a.mallocTypeSlice(columnCount) - // Get the column types. + columnCount := int(C.duckdb_appender_column_count(duckdbAppender)) + a.ptr, a.types = mallocTypeSlice(columnCount) for i := 0; i < columnCount; i++ { - a.colTypes[i] = C.duckdb_appender_column_type(duckdbAppender, C.idx_t(i)) - } - - // Get the vector storage of each column. - a.vectors = make([]vector, columnCount) - var err error - for i := 0; i < columnCount; i++ { - if err = a.vectors[i].init(a.colTypes[i], i); err != nil { - break + a.types[i] = C.duckdb_appender_column_type(duckdbAppender, C.idx_t(i)) + + // Ensure that we only create an appender for supported column types. + duckdbType := C.duckdb_get_type_id(a.types[i]) + name, found := unsupportedAppenderTypeMap[duckdbType] + if found { + err := columnError(unsupportedTypeError(name), i+1) + destroyTypeSlice(a.ptr, a.types) + C.duckdb_appender_destroy(&duckdbAppender) + return nil, getError(errAppenderCreation, err) } } - if err != nil { - a.destroyColumnTypes() - C.duckdb_appender_destroy(&duckdbAppender) - return nil, getError(errAppenderCreation, err) - } return a, nil } @@ -94,11 +90,6 @@ func NewAppenderFromConn(driverConn driver.Conn, schema, table string) (*Appende // Does not close the appender, even if it returns an error. Unless you have a good reason to call this, // call Close when you are done with the appender. func (a *Appender) Flush() error { - // Nothing to flush. - if len(a.chunks) == 0 && a.currSize == 0 { - return nil - } - if err := a.appendDataChunks(); err != nil { return getError(errAppenderFlush, invalidatedAppenderError(err)) } @@ -120,16 +111,14 @@ func (a *Appender) Close() error { a.closed = true // Append all remaining chunks. - var err error - if len(a.chunks) != 0 || a.currSize != 0 { - err = a.appendDataChunks() - } + err := a.appendDataChunks() - a.destroyColumnTypes() + // Destroy all appender data. + destroyTypeSlice(a.ptr, a.types) state := C.duckdb_appender_destroy(&a.duckdbAppender) if err != nil || state == C.DuckDBError { - // We destroyed the appender, so we cannot retrieve the duckdb error. + // We destroyed the appender, so we cannot retrieve the duckdb internal error. return getError(errAppenderClose, invalidatedAppenderError(err)) } return nil @@ -148,92 +137,64 @@ func (a *Appender) AppendRow(args ...driver.Value) error { return nil } -func (a *Appender) destroyColumnTypes() { - for i := range a.colTypes { - C.duckdb_destroy_logical_type(&a.colTypes[i]) +func (a *Appender) addDataChunk() error { + var chunk DataChunk + if err := chunk.InitFromTypes(a.ptr, a.types); err != nil { + return err } - C.free(a.colTypesPtr) -} - -func (*Appender) mallocTypeSlice(count int) (unsafe.Pointer, []C.duckdb_logical_type) { - var dummy C.duckdb_logical_type - size := C.size_t(unsafe.Sizeof(dummy)) - - ctPtr := unsafe.Pointer(C.malloc(C.size_t(count) * size)) - slice := (*[1 << 30]C.duckdb_logical_type)(ctPtr)[:count:count] - - return ctPtr, slice -} - -func (a *Appender) newDataChunk(colCount int) { - a.currSize = 0 - - // duckdb_create_data_chunk takes an array of duckdb_logical_type and a column count. - colTypesPtr := (*C.duckdb_logical_type)(a.colTypesPtr) - dataChunk := C.duckdb_create_data_chunk(colTypesPtr, C.idx_t(colCount)) - C.duckdb_data_chunk_set_size(dataChunk, C.duckdb_vector_size()) - - for i := 0; i < colCount; i++ { - duckdbVector := C.duckdb_data_chunk_get_vector(dataChunk, C.idx_t(i)) - a.vectors[i].duckdbVector = duckdbVector - a.vectors[i].getChildVectors(duckdbVector) - } - - a.chunks = append(a.chunks, dataChunk) + a.chunks = append(a.chunks, chunk) + return nil } func (a *Appender) appendRowSlice(args []driver.Value) error { - // early-out, if the number of args does not match the column count - if len(args) != len(a.vectors) { - return columnCountError(len(args), len(a.vectors)) + // Early-out, if the number of args does not match the column count. + if len(args) != len(a.types) { + return columnCountError(len(args), len(a.types)) } - // Create a new data chunk if the current chunk is full, or if this is the first row. - if a.currSize == C.duckdb_vector_size() || len(a.chunks) == 0 { - a.newDataChunk(len(args)) + // Create a new data chunk if the current chunk is full. + if C.idx_t(a.rowCount) == C.duckdb_vector_size() || len(a.chunks) == 0 { + if err := a.addDataChunk(); err != nil { + return err + } } + // Set all values. for i, val := range args { - vec := a.vectors[i] - - // Ensure that the types match before attempting to append anything. - v, err := vec.tryCast(val) + chunk := &a.chunks[len(a.chunks)-1] + err := chunk.SetValue(i, a.rowCount, val) if err != nil { - // Use 1-based indexing for readability, as we're talking about columns. - return columnError(err, i+1) + return err } - - // Append the row to the data chunk. - vec.fn(&vec, a.currSize, v) } - a.currSize++ + a.rowCount++ return nil } func (a *Appender) appendDataChunks() error { - // Set the size of the current chunk to the current row count. - C.duckdb_data_chunk_set_size(a.chunks[len(a.chunks)-1], C.idx_t(a.currSize)) - - // Append all chunks to the appender and destroy them. var state C.duckdb_state var err error for _, chunk := range a.chunks { - state = C.duckdb_append_data_chunk(a.duckdbAppender, chunk) + if err = chunk.SetSize(); err != nil { + break + } + state = C.duckdb_append_data_chunk(a.duckdbAppender, chunk.data) if state == C.DuckDBError { err = duckdbError(C.duckdb_appender_error(a.duckdbAppender)) break } } + a.destroyDataChunks() + a.rowCount = 0 return err } func (a *Appender) destroyDataChunks() { for _, chunk := range a.chunks { - C.duckdb_destroy_data_chunk(&chunk) + chunk.Destroy() } - a.currSize = 0 a.chunks = a.chunks[:0] } diff --git a/data_chunk.go b/data_chunk.go new file mode 100644 index 00000000..bb937a55 --- /dev/null +++ b/data_chunk.go @@ -0,0 +1,104 @@ +package duckdb + +/* +#include +#include +*/ +import "C" + +import ( + "unsafe" +) + +// DataChunk storage of a DuckDB table. +type DataChunk struct { + // data holds the underlying duckdb data chunk. + data C.duckdb_data_chunk + // columns is a helper slice providing direct access to all columns. + columns []vector + // columnNames holds the column names, if known. + columnNames []string +} + +// InitFromTypes initializes a data chunk by providing its column types. +func (chunk *DataChunk) InitFromTypes(ptr unsafe.Pointer, types []C.duckdb_logical_type) error { + columnCount := len(types) + + // Initialize the callback functions to read and write values. + chunk.columns = make([]vector, columnCount) + var err error + for i := 0; i < columnCount; i++ { + if err = chunk.columns[i].init(types[i], i); err != nil { + break + } + } + if err != nil { + return err + } + + logicalTypesPtr := (*C.duckdb_logical_type)(ptr) + chunk.data = C.duckdb_create_data_chunk(logicalTypesPtr, C.idx_t(columnCount)) + C.duckdb_data_chunk_set_size(chunk.data, C.duckdb_vector_size()) + + // Initialize the vectors and their child vectors. + for i := 0; i < columnCount; i++ { + duckdbVector := C.duckdb_data_chunk_get_vector(chunk.data, C.idx_t(i)) + chunk.columns[i].duckdbVector = duckdbVector + chunk.columns[i].getChildVectors(duckdbVector) + } + return nil +} + +// Destroy the memory of a data chunk. This is crucial to avoid leaks. +func (chunk *DataChunk) Destroy() { + C.duckdb_destroy_data_chunk(&chunk.data) +} + +// SetSize sets the internal size of the data chunk. This fails if columns have different sizes. +func (chunk *DataChunk) SetSize() error { + if len(chunk.columns) == 0 { + C.duckdb_data_chunk_set_size(chunk.data, C.idx_t(0)) + return nil + } + + allEqual := true + maxSize := C.idx_t(chunk.columns[0].size) + for i := 0; i < len(chunk.columns); i++ { + if chunk.columns[i].size != maxSize { + allEqual = false + } + if chunk.columns[i].size > maxSize { + maxSize = chunk.columns[i].size + } + } + + if !allEqual { + return errDriver + } + C.duckdb_data_chunk_set_size(chunk.data, maxSize) + return nil +} + +// GetSize returns the internal size of the data chunk. +func (chunk *DataChunk) GetSize() int { + return int(C.duckdb_data_chunk_get_size(chunk.data)) +} + +// SetValue writes a single value to a column. Note that this requires casting the type for +// each invocation. Try to use the columnar function SetColumn for performance. +func (chunk *DataChunk) SetValue(colIdx int, rowIdx int, val any) error { + if colIdx >= len(chunk.columns) { + return errDriver + } + column := &chunk.columns[colIdx] + + // Ensure that the types match before attempting to set anything. + v, err := column.tryCast(val) + if err != nil { + return columnError(err, colIdx) + } + + // Set the value. + column.setFn(column, C.idx_t(rowIdx), v) + return nil +} diff --git a/helper.go b/helper.go new file mode 100644 index 00000000..97f1da78 --- /dev/null +++ b/helper.go @@ -0,0 +1,29 @@ +package duckdb + +/* +#include +#include +*/ +import "C" + +import "unsafe" + +// secondsPerDay to calculate the days since 1970-01-01. +const secondsPerDay = 24 * 60 * 60 + +func mallocTypeSlice(count int) (unsafe.Pointer, []C.duckdb_logical_type) { + var dummy C.duckdb_logical_type + size := C.size_t(unsafe.Sizeof(dummy)) + + ptr := unsafe.Pointer(C.malloc(C.size_t(count) * size)) + slice := (*[1 << 30]C.duckdb_logical_type)(ptr)[:count:count] + + return ptr, slice +} + +func destroyTypeSlice(ptr unsafe.Pointer, slice []C.duckdb_logical_type) { + for _, t := range slice { + C.duckdb_destroy_logical_type(&t) + } + C.free(ptr) +} diff --git a/appender_vector.go b/vector.go similarity index 68% rename from appender_vector.go rename to vector.go index b75fb2c4..f69cdd47 100644 --- a/appender_vector.go +++ b/vector.go @@ -13,26 +13,22 @@ import ( "unsafe" ) -// secondsPerDay to calculate the days since 1970-01-01. -const secondsPerDay = 24 * 60 * 60 - // vector storage of a DuckDB column. type vector struct { // The underlying DuckDB vector. duckdbVector C.duckdb_vector // A callback function to write to this vector. - fn fnSetVectorValue + setFn fnSetVectorValue // The data type of the vector. duckdbType C.duckdb_type // The child names of STRUCT vectors. childNames []string // The child vectors of nested data types. childVectors []vector + // The number of values in this vector. + size C.idx_t } -// fnSetVectorValue is the setter callback function for any (nested) vectors. -type fnSetVectorValue func(vec *vector, rowIdx C.idx_t, val any) - func (vec *vector) tryCast(val any) (any, error) { if val == nil { return val, nil @@ -189,49 +185,68 @@ func (vec *vector) init(logicalType C.duckdb_logical_type, colIdx int) error { duckdbType := C.duckdb_get_type_id(logicalType) switch duckdbType { - case C.DUCKDB_TYPE_UTINYINT: - initPrimitive[uint8](vec, C.DUCKDB_TYPE_UTINYINT) + case C.DUCKDB_TYPE_INVALID: + return columnError(unsupportedTypeError(duckdbTypeMap[duckdbType]), colIdx) + case C.DUCKDB_TYPE_BOOLEAN: + initPrimitive[bool](vec, C.DUCKDB_TYPE_BOOLEAN) case C.DUCKDB_TYPE_TINYINT: initPrimitive[int8](vec, C.DUCKDB_TYPE_TINYINT) - case C.DUCKDB_TYPE_USMALLINT: - initPrimitive[uint16](vec, C.DUCKDB_TYPE_USMALLINT) case C.DUCKDB_TYPE_SMALLINT: initPrimitive[int16](vec, C.DUCKDB_TYPE_SMALLINT) - case C.DUCKDB_TYPE_UINTEGER: - initPrimitive[uint32](vec, C.DUCKDB_TYPE_UINTEGER) case C.DUCKDB_TYPE_INTEGER: initPrimitive[int32](vec, C.DUCKDB_TYPE_INTEGER) - case C.DUCKDB_TYPE_UBIGINT: - initPrimitive[uint64](vec, C.DUCKDB_TYPE_UBIGINT) case C.DUCKDB_TYPE_BIGINT: initPrimitive[int64](vec, C.DUCKDB_TYPE_BIGINT) + case C.DUCKDB_TYPE_UTINYINT: + initPrimitive[uint8](vec, C.DUCKDB_TYPE_UTINYINT) + case C.DUCKDB_TYPE_USMALLINT: + initPrimitive[uint16](vec, C.DUCKDB_TYPE_USMALLINT) + case C.DUCKDB_TYPE_UINTEGER: + initPrimitive[uint32](vec, C.DUCKDB_TYPE_UINTEGER) + case C.DUCKDB_TYPE_UBIGINT: + initPrimitive[uint64](vec, C.DUCKDB_TYPE_UBIGINT) case C.DUCKDB_TYPE_FLOAT: initPrimitive[float32](vec, C.DUCKDB_TYPE_FLOAT) case C.DUCKDB_TYPE_DOUBLE: initPrimitive[float64](vec, C.DUCKDB_TYPE_DOUBLE) - case C.DUCKDB_TYPE_BOOLEAN: - initPrimitive[bool](vec, C.DUCKDB_TYPE_BOOLEAN) - case C.DUCKDB_TYPE_VARCHAR, C.DUCKDB_TYPE_BLOB: - vec.initCString(duckdbType) case C.DUCKDB_TYPE_TIMESTAMP, C.DUCKDB_TYPE_TIMESTAMP_S, C.DUCKDB_TYPE_TIMESTAMP_MS, C.DUCKDB_TYPE_TIMESTAMP_NS, C.DUCKDB_TYPE_TIMESTAMP_TZ: vec.initTS(duckdbType) - case C.DUCKDB_TYPE_UUID: - vec.initUUID() case C.DUCKDB_TYPE_DATE: vec.initDate() + case C.DUCKDB_TYPE_TIME: + return columnError(unsupportedTypeError(duckdbTypeMap[duckdbType]), colIdx) + case C.DUCKDB_TYPE_INTERVAL: + return columnError(unsupportedTypeError(duckdbTypeMap[duckdbType]), colIdx) + case C.DUCKDB_TYPE_HUGEINT: + return columnError(unsupportedTypeError(duckdbTypeMap[duckdbType]), colIdx) + case C.DUCKDB_TYPE_UHUGEINT: + return columnError(unsupportedTypeError(duckdbTypeMap[duckdbType]), colIdx) + case C.DUCKDB_TYPE_VARCHAR, C.DUCKDB_TYPE_BLOB: + vec.initCString(duckdbType) + case C.DUCKDB_TYPE_DECIMAL: + return columnError(unsupportedTypeError(duckdbTypeMap[duckdbType]), colIdx) + case C.DUCKDB_TYPE_ENUM: + return columnError(unsupportedTypeError(duckdbTypeMap[duckdbType]), colIdx) case C.DUCKDB_TYPE_LIST: return vec.initList(logicalType, colIdx) case C.DUCKDB_TYPE_STRUCT: - return vec.initStruct(logicalType) + return vec.initStruct(logicalType, colIdx) + case C.DUCKDB_TYPE_MAP: + return columnError(unsupportedTypeError(duckdbTypeMap[duckdbType]), colIdx) + case C.DUCKDB_TYPE_ARRAY: + return columnError(unsupportedTypeError(duckdbTypeMap[duckdbType]), colIdx) + case C.DUCKDB_TYPE_UUID: + vec.initUUID() + case C.DUCKDB_TYPE_UNION: + return columnError(unsupportedTypeError(duckdbTypeMap[duckdbType]), colIdx) + case C.DUCKDB_TYPE_BIT: + return columnError(unsupportedTypeError(duckdbTypeMap[duckdbType]), colIdx) + case C.DUCKDB_TYPE_TIME_TZ: + return columnError(unsupportedTypeError(duckdbTypeMap[duckdbType]), colIdx) default: - name, found := unsupportedAppenderTypeMap[duckdbType] - if !found { - name = "unknown type" - } - return columnError(unsupportedTypeError(name), colIdx+1) + return columnError(unsupportedTypeError("unknown type"), colIdx) } - return nil } @@ -252,163 +267,52 @@ func (vec *vector) getChildVectors(vector C.duckdb_vector) { } } -func (vec *vector) setNull(rowIdx C.idx_t) { - C.duckdb_vector_ensure_validity_writable(vec.duckdbVector) - mask := C.duckdb_vector_get_validity(vec.duckdbVector) - C.duckdb_validity_set_row_invalid(mask, rowIdx) - - if vec.duckdbType == C.DUCKDB_TYPE_STRUCT { - for i := 0; i < len(vec.childVectors); i++ { - vec.childVectors[i].setNull(rowIdx) - } - } -} - -func setPrimitive[T any](vec *vector, rowIdx C.idx_t, val any) { - if val == nil { - vec.setNull(rowIdx) - return - } - - ptr := C.duckdb_vector_get_data(vec.duckdbVector) - xs := (*[1 << 31]T)(ptr) - xs[rowIdx] = val.(T) -} - -func (vec *vector) setCString(rowIdx C.idx_t, val any) { - if val == nil { - vec.setNull(rowIdx) - return - } - - var str string - switch vec.duckdbType { - case C.DUCKDB_TYPE_VARCHAR: - str = val.(string) - case C.DUCKDB_TYPE_BLOB: - str = string(val.([]byte)[:]) - } - - // This setter also writes BLOBs. - cStr := C.CString(str) - C.duckdb_vector_assign_string_element_len(vec.duckdbVector, rowIdx, cStr, C.idx_t(len(str))) - C.free(unsafe.Pointer(cStr)) -} - -func (vec *vector) setTime(rowIdx C.idx_t, ticks int64) { - var ts C.duckdb_timestamp - ts.micros = C.int64_t(ticks) - setPrimitive[C.duckdb_timestamp](vec, rowIdx, ts) -} - -func (vec *vector) setDate(rowIdx C.idx_t, days int32) { - var date C.duckdb_date - date.days = C.int32_t(days) - setPrimitive[C.duckdb_date](vec, rowIdx, date) -} - -func (vec *vector) setList(rowIdx C.idx_t, val any) { - if val == nil { - vec.setNull(rowIdx) - return - } - - v := val.([]any) - childVectorSize := C.duckdb_list_vector_get_size(vec.duckdbVector) - - // Set the offset and length of the list vector using the current size of the child vector. - listEntry := C.duckdb_list_entry{ - offset: C.idx_t(childVectorSize), - length: C.idx_t(len(v)), - } - setPrimitive[C.duckdb_list_entry](vec, rowIdx, listEntry) - - newLength := C.idx_t(len(v)) + childVectorSize - C.duckdb_list_vector_set_size(vec.duckdbVector, newLength) - C.duckdb_list_vector_reserve(vec.duckdbVector, newLength) - - // Insert the values into the child vector. - childVector := vec.childVectors[0] - for i, e := range v { - offset := C.idx_t(i) + childVectorSize - childVector.fn(&childVector, offset, e) - } -} - -func (vec *vector) setStruct(rowIdx C.idx_t, val any) { - if val == nil { - vec.setNull(rowIdx) - return - } - m := val.(map[string]any) - - for i := 0; i < len(vec.childVectors); i++ { - childVector := vec.childVectors[i] - childName := vec.childNames[i] - childVector.fn(&childVector, rowIdx, m[childName]) - } -} - func initPrimitive[T any](vec *vector, duckdbType C.duckdb_type) { - vec.fn = func(vec *vector, rowIdx C.idx_t, val any) { + vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { + vec.size++ + if val == nil { + vec.setNull(rowIdx) + return + } setPrimitive[T](vec, rowIdx, val) } vec.duckdbType = duckdbType } -func (vec *vector) initCString(duckdbType C.duckdb_type) { - vec.fn = func(vec *vector, rowIdx C.idx_t, val any) { - vec.setCString(rowIdx, val) - } - vec.duckdbType = duckdbType -} - func (vec *vector) initTS(duckdbType C.duckdb_type) { - vec.fn = func(vec *vector, rowIdx C.idx_t, val any) { + vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { + vec.size++ if val == nil { vec.setNull(rowIdx) return } - - v := val.(time.Time) - var ticks int64 - switch duckdbType { - case C.DUCKDB_TYPE_TIMESTAMP: - ticks = v.UTC().UnixMicro() - case C.DUCKDB_TYPE_TIMESTAMP_S: - ticks = v.UTC().Unix() - case C.DUCKDB_TYPE_TIMESTAMP_MS: - ticks = v.UTC().UnixMilli() - case C.DUCKDB_TYPE_TIMESTAMP_NS: - ticks = v.UTC().UnixNano() - case C.DUCKDB_TYPE_TIMESTAMP_TZ: - ticks = v.UTC().UnixMicro() - } - vec.setTime(rowIdx, ticks) + vec.setTS(duckdbType, rowIdx, val) } vec.duckdbType = duckdbType } -func (vec *vector) initUUID() { - vec.fn = func(vec *vector, rowIdx C.idx_t, val any) { - setPrimitive[C.duckdb_hugeint](vec, rowIdx, uuidToHugeInt(val.(UUID))) +func (vec *vector) initDate() { + vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { + vec.size++ + if val == nil { + vec.setNull(rowIdx) + return + } + vec.setDate(rowIdx, val) } - vec.duckdbType = C.DUCKDB_TYPE_UUID + vec.duckdbType = C.DUCKDB_TYPE_DATE } -func (vec *vector) initDate() { - vec.fn = func(vec *vector, rowIdx C.idx_t, val any) { +func (vec *vector) initCString(duckdbType C.duckdb_type) { + vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { + vec.size++ if val == nil { vec.setNull(rowIdx) return } - - v := val.(time.Time) - // Days since 1970-01-01. - days := int32(v.UTC().Unix() / secondsPerDay) - vec.setDate(rowIdx, days) + vec.setCString(rowIdx, val) } - vec.duckdbType = C.DUCKDB_TYPE_DATE + vec.duckdbType = duckdbType } func (vec *vector) initList(logicalType C.duckdb_logical_type, colIdx int) error { @@ -423,14 +327,19 @@ func (vec *vector) initList(logicalType C.duckdb_logical_type, colIdx int) error return err } - vec.fn = func(vec *vector, rowIdx C.idx_t, val any) { + vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { + vec.size++ + if val == nil { + vec.setNull(rowIdx) + return + } vec.setList(rowIdx, val) } vec.duckdbType = C.DUCKDB_TYPE_LIST return nil } -func (vec *vector) initStruct(logicalType C.duckdb_logical_type) error { +func (vec *vector) initStruct(logicalType C.duckdb_logical_type, colIdx int) error { childCount := int(C.duckdb_struct_type_child_count(logicalType)) var childNames []string for i := 0; i < childCount; i++ { @@ -439,17 +348,13 @@ func (vec *vector) initStruct(logicalType C.duckdb_logical_type) error { C.free(unsafe.Pointer(childName)) } - vec.fn = func(vec *vector, rowIdx C.idx_t, val any) { - vec.setStruct(rowIdx, val) - } - vec.duckdbType = C.DUCKDB_TYPE_STRUCT vec.childVectors = make([]vector, childCount) vec.childNames = childNames // Recurse into the children. for i := 0; i < childCount; i++ { childType := C.duckdb_struct_type_child_type(logicalType, C.idx_t(i)) - err := vec.childVectors[i].init(childType, i) + err := vec.childVectors[i].init(childType, colIdx) C.duckdb_destroy_logical_type(&childType) if err != nil { @@ -457,5 +362,26 @@ func (vec *vector) initStruct(logicalType C.duckdb_logical_type) error { } } + vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { + vec.size++ + if val == nil { + vec.setNull(rowIdx) + return + } + vec.setStruct(rowIdx, val) + } + vec.duckdbType = C.DUCKDB_TYPE_STRUCT return nil } + +func (vec *vector) initUUID() { + vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { + vec.size++ + if val == nil { + vec.setNull(rowIdx) + return + } + setPrimitive[C.duckdb_hugeint](vec, rowIdx, uuidToHugeInt(val.(UUID))) + } + vec.duckdbType = C.DUCKDB_TYPE_UUID +} diff --git a/vector_setters.go b/vector_setters.go new file mode 100644 index 00000000..0abcfe49 --- /dev/null +++ b/vector_setters.go @@ -0,0 +1,109 @@ +package duckdb + +/* +#include +#include +*/ +import "C" +import ( + "time" + "unsafe" +) + +// fnSetVectorValue is the setter callback function for any (nested) vector. +type fnSetVectorValue func(vec *vector, rowIdx C.idx_t, val any) + +func (vec *vector) setNull(rowIdx C.idx_t) { + C.duckdb_vector_ensure_validity_writable(vec.duckdbVector) + mask := C.duckdb_vector_get_validity(vec.duckdbVector) + C.duckdb_validity_set_row_invalid(mask, rowIdx) + + if vec.duckdbType == C.DUCKDB_TYPE_STRUCT { + for i := 0; i < len(vec.childVectors); i++ { + vec.childVectors[i].setNull(rowIdx) + } + } +} + +func setPrimitive[T any](vec *vector, rowIdx C.idx_t, val any) { + ptr := C.duckdb_vector_get_data(vec.duckdbVector) + xs := (*[1 << 31]T)(ptr) + xs[rowIdx] = val.(T) +} + +func (vec *vector) setTS(duckdbType C.duckdb_type, rowIdx C.idx_t, val any) { + v := val.(time.Time) + var ticks int64 + switch duckdbType { + case C.DUCKDB_TYPE_TIMESTAMP: + ticks = v.UTC().UnixMicro() + case C.DUCKDB_TYPE_TIMESTAMP_S: + ticks = v.UTC().Unix() + case C.DUCKDB_TYPE_TIMESTAMP_MS: + ticks = v.UTC().UnixMilli() + case C.DUCKDB_TYPE_TIMESTAMP_NS: + ticks = v.UTC().UnixNano() + case C.DUCKDB_TYPE_TIMESTAMP_TZ: + ticks = v.UTC().UnixMicro() + } + + var ts C.duckdb_timestamp + ts.micros = C.int64_t(ticks) + setPrimitive[C.duckdb_timestamp](vec, rowIdx, ts) +} + +func (vec *vector) setDate(rowIdx C.idx_t, val any) { + // Days since 1970-01-01. + v := val.(time.Time) + days := int32(v.UTC().Unix() / secondsPerDay) + + var date C.duckdb_date + date.days = C.int32_t(days) + setPrimitive[C.duckdb_date](vec, rowIdx, date) +} + +func (vec *vector) setCString(rowIdx C.idx_t, val any) { + var str string + if vec.duckdbType == C.DUCKDB_TYPE_VARCHAR { + str = val.(string) + } else if vec.duckdbType == C.DUCKDB_TYPE_BLOB { + str = string(val.([]byte)[:]) + } + + // This setter also writes BLOBs. + cStr := C.CString(str) + C.duckdb_vector_assign_string_element_len(vec.duckdbVector, rowIdx, cStr, C.idx_t(len(str))) + C.free(unsafe.Pointer(cStr)) +} + +func (vec *vector) setList(rowIdx C.idx_t, val any) { + list := val.([]any) + childVectorSize := C.duckdb_list_vector_get_size(vec.duckdbVector) + + // Set the offset and length of the list vector using the current size of the child vector. + listEntry := C.duckdb_list_entry{ + offset: C.idx_t(childVectorSize), + length: C.idx_t(len(list)), + } + setPrimitive[C.duckdb_list_entry](vec, rowIdx, listEntry) + + newLength := C.idx_t(len(list)) + childVectorSize + C.duckdb_list_vector_set_size(vec.duckdbVector, newLength) + C.duckdb_list_vector_reserve(vec.duckdbVector, newLength) + + // Insert the values into the child vector. + childVector := &vec.childVectors[0] + for i, entry := range list { + offset := C.idx_t(i) + childVectorSize + childVector.setFn(childVector, offset, entry) + } +} + +func (vec *vector) setStruct(rowIdx C.idx_t, val any) { + m := val.(map[string]any) + for i := 0; i < len(vec.childVectors); i++ { + childVector := &vec.childVectors[i] + childName := vec.childNames[i] + childVector.setFn(childVector, rowIdx, m[childName]) + } +} From a6a127313139181bf5a326f7b00e254b485df043 Mon Sep 17 00:00:00 2001 From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com> Date: Fri, 7 Jun 2024 09:38:40 +0200 Subject: [PATCH 2/9] linter --- statement.go | 2 +- vector_setters.go | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/statement.go b/statement.go index 91729a92..c3932205 100644 --- a/statement.go +++ b/statement.go @@ -59,7 +59,7 @@ func (s *stmt) bind(args []driver.NamedValue) error { C.duckdb_free(unsafe.Pointer(name)) // fallback on index position - var arg = args[i] + arg := args[i] // override with ordinal if set for _, v := range args { diff --git a/vector_setters.go b/vector_setters.go index 0abcfe49..3417d35f 100644 --- a/vector_setters.go +++ b/vector_setters.go @@ -5,6 +5,7 @@ package duckdb #include */ import "C" + import ( "time" "unsafe" From 53ecfe39783ad74511c3f7679f6968ef6f4b7fa8 Mon Sep 17 00:00:00 2001 From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com> Date: Fri, 7 Jun 2024 09:58:40 +0200 Subject: [PATCH 3/9] remove some functions for now --- data_chunk.go | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/data_chunk.go b/data_chunk.go index bb937a55..b424ac59 100644 --- a/data_chunk.go +++ b/data_chunk.go @@ -16,8 +16,6 @@ type DataChunk struct { data C.duckdb_data_chunk // columns is a helper slice providing direct access to all columns. columns []vector - // columnNames holds the column names, if known. - columnNames []string } // InitFromTypes initializes a data chunk by providing its column types. @@ -79,13 +77,7 @@ func (chunk *DataChunk) SetSize() error { return nil } -// GetSize returns the internal size of the data chunk. -func (chunk *DataChunk) GetSize() int { - return int(C.duckdb_data_chunk_get_size(chunk.data)) -} - -// SetValue writes a single value to a column. Note that this requires casting the type for -// each invocation. Try to use the columnar function SetColumn for performance. +// SetValue writes a single value to a column. Note that this requires casting the type for each invocation. func (chunk *DataChunk) SetValue(colIdx int, rowIdx int, val any) error { if colIdx >= len(chunk.columns) { return errDriver From 5dcc5457df17611a2751bba6f5812adf13b7e2e0 Mon Sep 17 00:00:00 2001 From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com> Date: Mon, 10 Jun 2024 10:32:14 +0200 Subject: [PATCH 4/9] implement review suggestions, i.e., more conservative with exposing functions --- appender.go | 10 +++++----- data_chunk.go | 45 +++++++++++++++++++++------------------------ 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/appender.go b/appender.go index ed043bd5..6db44ea7 100644 --- a/appender.go +++ b/appender.go @@ -139,7 +139,7 @@ func (a *Appender) AppendRow(args ...driver.Value) error { func (a *Appender) addDataChunk() error { var chunk DataChunk - if err := chunk.InitFromTypes(a.ptr, a.types); err != nil { + if err := chunk.initFromTypes(a.ptr, a.types); err != nil { return err } a.chunks = append(a.chunks, chunk) @@ -177,7 +177,7 @@ func (a *Appender) appendDataChunks() error { var err error for _, chunk := range a.chunks { - if err = chunk.SetSize(); err != nil { + if err = chunk.setSize(); err != nil { break } state = C.duckdb_append_data_chunk(a.duckdbAppender, chunk.data) @@ -187,14 +187,14 @@ func (a *Appender) appendDataChunks() error { } } - a.destroyDataChunks() + a.closeDataChunks() a.rowCount = 0 return err } -func (a *Appender) destroyDataChunks() { +func (a *Appender) closeDataChunks() { for _, chunk := range a.chunks { - chunk.Destroy() + chunk.close() } a.chunks = a.chunks[:0] } diff --git a/data_chunk.go b/data_chunk.go index b424ac59..28b96a62 100644 --- a/data_chunk.go +++ b/data_chunk.go @@ -18,8 +18,25 @@ type DataChunk struct { columns []vector } -// InitFromTypes initializes a data chunk by providing its column types. -func (chunk *DataChunk) InitFromTypes(ptr unsafe.Pointer, types []C.duckdb_logical_type) error { +// SetValue writes a single value to a column in a data chunk. Note that this requires casting the type for each invocation. +func (chunk *DataChunk) SetValue(colIdx int, rowIdx int, val any) error { + if colIdx >= len(chunk.columns) { + return errDriver + } + column := &chunk.columns[colIdx] + + // Ensure that the types match before attempting to set anything. + v, err := column.tryCast(val) + if err != nil { + return columnError(err, colIdx) + } + + // Set the value. + column.setFn(column, C.idx_t(rowIdx), v) + return nil +} + +func (chunk *DataChunk) initFromTypes(ptr unsafe.Pointer, types []C.duckdb_logical_type) error { columnCount := len(types) // Initialize the callback functions to read and write values. @@ -47,13 +64,11 @@ func (chunk *DataChunk) InitFromTypes(ptr unsafe.Pointer, types []C.duckdb_logic return nil } -// Destroy the memory of a data chunk. This is crucial to avoid leaks. -func (chunk *DataChunk) Destroy() { +func (chunk *DataChunk) close() { C.duckdb_destroy_data_chunk(&chunk.data) } -// SetSize sets the internal size of the data chunk. This fails if columns have different sizes. -func (chunk *DataChunk) SetSize() error { +func (chunk *DataChunk) setSize() error { if len(chunk.columns) == 0 { C.duckdb_data_chunk_set_size(chunk.data, C.idx_t(0)) return nil @@ -76,21 +91,3 @@ func (chunk *DataChunk) SetSize() error { C.duckdb_data_chunk_set_size(chunk.data, maxSize) return nil } - -// SetValue writes a single value to a column. Note that this requires casting the type for each invocation. -func (chunk *DataChunk) SetValue(colIdx int, rowIdx int, val any) error { - if colIdx >= len(chunk.columns) { - return errDriver - } - column := &chunk.columns[colIdx] - - // Ensure that the types match before attempting to set anything. - v, err := column.tryCast(val) - if err != nil { - return columnError(err, colIdx) - } - - // Set the value. - column.setFn(column, C.idx_t(rowIdx), v) - return nil -} From 63c33a114d86f89d46193b4cdfb976c62567beca Mon Sep 17 00:00:00 2001 From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com> Date: Mon, 10 Jun 2024 10:42:07 +0200 Subject: [PATCH 5/9] add test for exposed function --- data_chunk.go | 2 +- errors.go | 3 ++- errors_test.go | 6 ++++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/data_chunk.go b/data_chunk.go index 28b96a62..d9cda071 100644 --- a/data_chunk.go +++ b/data_chunk.go @@ -21,7 +21,7 @@ type DataChunk struct { // SetValue writes a single value to a column in a data chunk. Note that this requires casting the type for each invocation. func (chunk *DataChunk) SetValue(colIdx int, rowIdx int, val any) error { if colIdx >= len(chunk.columns) { - return errDriver + return getError(errAPI, columnCountError(colIdx, len(chunk.columns))) } column := &chunk.columns[colIdx] diff --git a/errors.go b/errors.go index ae9f0f78..4a2a0070 100644 --- a/errors.go +++ b/errors.go @@ -56,7 +56,8 @@ const ( ) var ( - errDriver = errors.New("internal driver error, please file a bug report") + errDriver = errors.New("internal driver error: please file a bug report") + errAPI = errors.New("API error") errParseDSN = errors.New("could not parse DSN for database") errOpen = errors.New("could not open database") diff --git a/errors_test.go b/errors_test.go index adb33862..bf210982 100644 --- a/errors_test.go +++ b/errors_test.go @@ -267,3 +267,9 @@ func TestErrAppendNestedList(t *testing.T) { cleanupAppender(t, c, con, a) } + +func TestErrAPISetValue(t *testing.T) { + var chunk DataChunk + err := chunk.SetValue(1, 42, "hello") + testError(t, err, errAPI.Error(), columnCountErrMsg) +} From a7d78a792089c1ebf9840e2c9838e26d109f804b Mon Sep 17 00:00:00 2001 From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com> Date: Tue, 11 Jun 2024 15:07:29 +0200 Subject: [PATCH 6/9] returning T instead of any --- vector.go | 20 +++++++++++--------- vector_setters.go | 4 ++-- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/vector.go b/vector.go index f69cdd47..47103a53 100644 --- a/vector.go +++ b/vector.go @@ -84,29 +84,31 @@ func (*vector) canNil(val reflect.Value) bool { return false } -func tryPrimitiveCast[T any](val any, expected string) (any, error) { - if v, ok := val.(T); ok { +func tryPrimitiveCast[T any](val any, expected string) (T, error) { + v, ok := val.(T) + if ok { return v, nil } goType := reflect.TypeOf(val) - return nil, castError(goType.String(), expected) + return v, castError(goType.String(), expected) } -func tryNumericCast[T numericType](val any, expected string) (any, error) { - if v, ok := val.(T); ok { +func tryNumericCast[T numericType](val any, expected string) (T, error) { + v, ok := val.(T) + if ok { return v, nil } // JSON unmarshalling uses float64 for numbers. // We might want to add more implicit casts here. - switch v := val.(type) { + switch value := val.(type) { case float64: - return convertNumericType[float64, T](v), nil + return convertNumericType[float64, T](value), nil } goType := reflect.TypeOf(val) - return nil, castError(goType.String(), expected) + return v, castError(goType.String(), expected) } func (vec *vector) tryCastList(val any) ([]any, error) { @@ -274,7 +276,7 @@ func initPrimitive[T any](vec *vector, duckdbType C.duckdb_type) { vec.setNull(rowIdx) return } - setPrimitive[T](vec, rowIdx, val) + setPrimitive[T](vec, rowIdx, val.(T)) } vec.duckdbType = duckdbType } diff --git a/vector_setters.go b/vector_setters.go index 3417d35f..c56db361 100644 --- a/vector_setters.go +++ b/vector_setters.go @@ -26,10 +26,10 @@ func (vec *vector) setNull(rowIdx C.idx_t) { } } -func setPrimitive[T any](vec *vector, rowIdx C.idx_t, val any) { +func setPrimitive[T any](vec *vector, rowIdx C.idx_t, v T) { ptr := C.duckdb_vector_get_data(vec.duckdbVector) xs := (*[1 << 31]T)(ptr) - xs[rowIdx] = val.(T) + xs[rowIdx] = v } func (vec *vector) setTS(duckdbType C.duckdb_type, rowIdx C.idx_t, val any) { From fe5a3096af8b30d419dcef55cd36c1c749aedde2 Mon Sep 17 00:00:00 2001 From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com> Date: Tue, 18 Jun 2024 10:24:35 +0200 Subject: [PATCH 7/9] nits, linters, fixes --- appender.go | 11 +++++++++-- data_chunk.go | 38 ++++++++++++++------------------------ errors.go | 4 +++- replacement_scan.go | 1 + replacement_scan_test.go | 7 +++---- vector.go | 10 +--------- 6 files changed, 31 insertions(+), 40 deletions(-) diff --git a/appender.go b/appender.go index 6db44ea7..6260a405 100644 --- a/appender.go +++ b/appender.go @@ -176,10 +176,17 @@ func (a *Appender) appendDataChunks() error { var state C.duckdb_state var err error - for _, chunk := range a.chunks { - if err = chunk.setSize(); err != nil { + for i, chunk := range a.chunks { + + // All data chunks except the last are at maximum capacity. + size := chunk.GetCapacity() + if i == len(a.chunks)-1 { + size = a.rowCount + } + if err = chunk.SetSize(size); err != nil { break } + state = C.duckdb_append_data_chunk(a.duckdbAppender, chunk.data) if state == C.DuckDBError { err = duckdbError(C.duckdb_appender_error(a.duckdbAppender)) diff --git a/data_chunk.go b/data_chunk.go index d9cda071..99285dea 100644 --- a/data_chunk.go +++ b/data_chunk.go @@ -18,6 +18,20 @@ type DataChunk struct { columns []vector } +// GetCapacity returns the capacity of a data chunk. +func (chunk *DataChunk) GetCapacity() int { + return int(C.duckdb_vector_size()) +} + +// SetSize sets the internal size of the data chunk. Cannot exceed GetCapacity(). +func (chunk *DataChunk) SetSize(size int) error { + if size > chunk.GetCapacity() { + return getError(errAPI, errVectorSize) + } + C.duckdb_data_chunk_set_size(chunk.data, C.idx_t(size)) + return nil +} + // SetValue writes a single value to a column in a data chunk. Note that this requires casting the type for each invocation. func (chunk *DataChunk) SetValue(colIdx int, rowIdx int, val any) error { if colIdx >= len(chunk.columns) { @@ -67,27 +81,3 @@ func (chunk *DataChunk) initFromTypes(ptr unsafe.Pointer, types []C.duckdb_logic func (chunk *DataChunk) close() { C.duckdb_destroy_data_chunk(&chunk.data) } - -func (chunk *DataChunk) setSize() error { - if len(chunk.columns) == 0 { - C.duckdb_data_chunk_set_size(chunk.data, C.idx_t(0)) - return nil - } - - allEqual := true - maxSize := C.idx_t(chunk.columns[0].size) - for i := 0; i < len(chunk.columns); i++ { - if chunk.columns[i].size != maxSize { - allEqual = false - } - if chunk.columns[i].size > maxSize { - maxSize = chunk.columns[i].size - } - } - - if !allEqual { - return errDriver - } - C.duckdb_data_chunk_set_size(chunk.data, maxSize) - return nil -} diff --git a/errors.go b/errors.go index 4a2a0070..978b555b 100644 --- a/errors.go +++ b/errors.go @@ -57,7 +57,9 @@ const ( var ( errDriver = errors.New("internal driver error: please file a bug report") - errAPI = errors.New("API error") + + errAPI = errors.New("API error") + errVectorSize = errors.New("data chunks cannot exceed duckdb's internal vector size") errParseDSN = errors.New("could not parse DSN for database") errOpen = errors.New("could not open database") diff --git a/replacement_scan.go b/replacement_scan.go index 380b42ab..cd960237 100644 --- a/replacement_scan.go +++ b/replacement_scan.go @@ -8,6 +8,7 @@ package duckdb void replacement_scan_destroy_data(void *); */ import "C" + import ( "runtime/cgo" "unsafe" diff --git a/replacement_scan_test.go b/replacement_scan_test.go index 26c8cbab..72795efe 100644 --- a/replacement_scan_test.go +++ b/replacement_scan_test.go @@ -3,12 +3,12 @@ package duckdb import ( "database/sql" "database/sql/driver" - "github.com/stretchr/testify/require" "testing" + + "github.com/stretchr/testify/require" ) func TestReplacementScan(t *testing.T) { - connector, err := NewConnector("", func(execer driver.ExecerContext) error { return nil }) @@ -16,7 +16,7 @@ func TestReplacementScan(t *testing.T) { require.NoError(t, err) defer connector.Close() - var rangeRows = 100 + rangeRows := 100 RegisterReplacementScan(connector, func(tableName string) (string, []any, error) { return "range", []any{int64(rangeRows)}, nil }) @@ -40,5 +40,4 @@ func TestReplacementScan(t *testing.T) { if rangeRows != 0 { require.Fail(t, "expected 0, got %d", rangeRows) } - } diff --git a/vector.go b/vector.go index ba91bb66..ee2f2c0b 100644 --- a/vector.go +++ b/vector.go @@ -25,8 +25,6 @@ type vector struct { childNames []string // The child vectors of nested data types. childVectors []vector - // The number of values in this vector. - size C.idx_t } func (vec *vector) tryCast(val any) (any, error) { @@ -268,9 +266,9 @@ func (vec *vector) getChildVectors(vector C.duckdb_vector) { } } } + func initPrimitive[T any](vec *vector, duckdbType C.duckdb_type) { vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { - vec.size++ if val == nil { vec.setNull(rowIdx) return @@ -282,7 +280,6 @@ func initPrimitive[T any](vec *vector, duckdbType C.duckdb_type) { func (vec *vector) initTS(duckdbType C.duckdb_type) { vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { - vec.size++ if val == nil { vec.setNull(rowIdx) return @@ -294,7 +291,6 @@ func (vec *vector) initTS(duckdbType C.duckdb_type) { func (vec *vector) initDate() { vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { - vec.size++ if val == nil { vec.setNull(rowIdx) return @@ -306,7 +302,6 @@ func (vec *vector) initDate() { func (vec *vector) initCString(duckdbType C.duckdb_type) { vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { - vec.size++ if val == nil { vec.setNull(rowIdx) return @@ -329,7 +324,6 @@ func (vec *vector) initList(logicalType C.duckdb_logical_type, colIdx int) error } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { - vec.size++ if val == nil { vec.setNull(rowIdx) return @@ -364,7 +358,6 @@ func (vec *vector) initStruct(logicalType C.duckdb_logical_type, colIdx int) err } vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { - vec.size++ if val == nil { vec.setNull(rowIdx) return @@ -377,7 +370,6 @@ func (vec *vector) initStruct(logicalType C.duckdb_logical_type, colIdx int) err func (vec *vector) initUUID() { vec.setFn = func(vec *vector, rowIdx C.idx_t, val any) { - vec.size++ if val == nil { vec.setNull(rowIdx) return From ac14a34a0c6e6de428f9cde326b65a56f69f7d46 Mon Sep 17 00:00:00 2001 From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com> Date: Tue, 18 Jun 2024 10:43:30 +0200 Subject: [PATCH 8/9] comment --- data_chunk.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/data_chunk.go b/data_chunk.go index 99285dea..966ac329 100644 --- a/data_chunk.go +++ b/data_chunk.go @@ -40,6 +40,8 @@ func (chunk *DataChunk) SetValue(colIdx int, rowIdx int, val any) error { column := &chunk.columns[colIdx] // Ensure that the types match before attempting to set anything. + // This is done to prevent failures 'halfway through' writing column values, + // potentially corrupting data in that column. v, err := column.tryCast(val) if err != nil { return columnError(err, colIdx) From e182f7d01fc1da78940747ea858236d0f9ef340f Mon Sep 17 00:00:00 2001 From: taniabogatsch <44262898+taniabogatsch@users.noreply.github.com> Date: Tue, 18 Jun 2024 12:48:42 +0200 Subject: [PATCH 9/9] remove helper after feedback --- appender.go | 17 +++++++++++++++++ helper.go | 29 ----------------------------- vector_setters.go | 3 +++ 3 files changed, 20 insertions(+), 29 deletions(-) delete mode 100644 helper.go diff --git a/appender.go b/appender.go index 6260a405..c035d5ea 100644 --- a/appender.go +++ b/appender.go @@ -205,3 +205,20 @@ func (a *Appender) closeDataChunks() { } a.chunks = a.chunks[:0] } + +func mallocTypeSlice(count int) (unsafe.Pointer, []C.duckdb_logical_type) { + var dummy C.duckdb_logical_type + size := C.size_t(unsafe.Sizeof(dummy)) + + ptr := unsafe.Pointer(C.malloc(C.size_t(count) * size)) + slice := (*[1 << 30]C.duckdb_logical_type)(ptr)[:count:count] + + return ptr, slice +} + +func destroyTypeSlice(ptr unsafe.Pointer, slice []C.duckdb_logical_type) { + for _, t := range slice { + C.duckdb_destroy_logical_type(&t) + } + C.free(ptr) +} diff --git a/helper.go b/helper.go deleted file mode 100644 index 97f1da78..00000000 --- a/helper.go +++ /dev/null @@ -1,29 +0,0 @@ -package duckdb - -/* -#include -#include -*/ -import "C" - -import "unsafe" - -// secondsPerDay to calculate the days since 1970-01-01. -const secondsPerDay = 24 * 60 * 60 - -func mallocTypeSlice(count int) (unsafe.Pointer, []C.duckdb_logical_type) { - var dummy C.duckdb_logical_type - size := C.size_t(unsafe.Sizeof(dummy)) - - ptr := unsafe.Pointer(C.malloc(C.size_t(count) * size)) - slice := (*[1 << 30]C.duckdb_logical_type)(ptr)[:count:count] - - return ptr, slice -} - -func destroyTypeSlice(ptr unsafe.Pointer, slice []C.duckdb_logical_type) { - for _, t := range slice { - C.duckdb_destroy_logical_type(&t) - } - C.free(ptr) -} diff --git a/vector_setters.go b/vector_setters.go index c56db361..70da14cf 100644 --- a/vector_setters.go +++ b/vector_setters.go @@ -11,6 +11,9 @@ import ( "unsafe" ) +// secondsPerDay to calculate the days since 1970-01-01. +const secondsPerDay = 24 * 60 * 60 + // fnSetVectorValue is the setter callback function for any (nested) vector. type fnSetVectorValue func(vec *vector, rowIdx C.idx_t, val any)