Skip to content

Commit

Permalink
sliiight performance increase by using unsafe functions
Browse files Browse the repository at this point in the history
  • Loading branch information
taniabogatsch committed Jun 21, 2024
1 parent 5ee4860 commit 4d97e4e
Show file tree
Hide file tree
Showing 6 changed files with 388 additions and 254 deletions.
20 changes: 17 additions & 3 deletions appender.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,21 @@ func (a *Appender) AppendRow(args ...driver.Value) error {
return getError(errAppenderAppendAfterClose, nil)
}

err := a.appendRowSlice(args)
err := a.appendRowSlice(args, true)
if err != nil {
return getError(errAppenderAppendRow, err)
}
return nil
}

// AppendRowUnsafe loads a row of values into the appender. The values are provided as separate arguments.
// Panics on casting errors.
func (a *Appender) AppendRowUnsafe(args ...driver.Value) error {
if a.closed {
return getError(errAppenderAppendAfterClose, nil)
}

err := a.appendRowSlice(args, false)
if err != nil {
return getError(errAppenderAppendRow, err)
}
Expand All @@ -146,7 +160,7 @@ func (a *Appender) addDataChunk() error {
return nil
}

func (a *Appender) appendRowSlice(args []driver.Value) error {
func (a *Appender) appendRowSlice(args []driver.Value, safe bool) error {
// Early-out, if the number of args does not match the column count.
if len(args) != len(a.types) {
return columnCountError(len(args), len(a.types))
Expand All @@ -163,7 +177,7 @@ func (a *Appender) appendRowSlice(args []driver.Value) error {
// Set all values.
for i, val := range args {
chunk := &a.chunks[len(a.chunks)-1]
err := chunk.SetValue(i, a.rowCount, val)
err := chunk.setValue(i, a.rowCount, val, safe)
if err != nil {
return err
}
Expand Down
23 changes: 15 additions & 8 deletions data_chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,20 +32,27 @@ func (chunk *DataChunk) SetSize(size int) error {
return nil
}

// SetValue writes a single value to a column in a data chunk. Note that this requires casting the type for each invocation.
// SetValueUnsafe writes a single value to a column in a data chunk. Panics on casting errors.
// NOTE: Custom ENUM types must be passed as string.
func (chunk *DataChunk) SetValueUnsafe(colIdx int, rowIdx int, val any) error {
return chunk.setValue(colIdx, rowIdx, val, false)
}

// SetValue writes a single value to a column in a data chunk. It ensures that the types match before attempting to set anything.
// This is done to prevent failures 'halfway through' writing column values, potentially corrupting data in that column.
// Note that this requires casting the type for each invocation.
// NOTE: Custom ENUM types must be passed as string.
func (chunk *DataChunk) SetValue(colIdx int, rowIdx int, val any) error {
return chunk.setValue(colIdx, rowIdx, val, true)
}

func (chunk *DataChunk) setValue(colIdx int, rowIdx int, val any, safe bool) error {
if colIdx >= len(chunk.columns) {
return getError(errAPI, columnCountError(colIdx, len(chunk.columns)))
}
column := &chunk.columns[colIdx]

// Ensure that the types match before attempting to set anything.
// This is done to prevent failures 'halfway through' writing column values,
// potentially corrupting data in that column.
// FIXME: Can we improve efficiency here? We are casting back-and-forth to any A LOT.
// FIXME: Maybe we can make columnar insertions unsafe, i.e., we always assume a correct type.
v, err := column.tryCast(val)
column := &chunk.columns[colIdx]
v, err := column.tryCast(val, safe)
if err != nil {
return columnError(err, colIdx)
}
Expand Down
101 changes: 69 additions & 32 deletions types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,37 +162,68 @@ func testTypesReset[T require.TestingT](t T, c *Connector) {
require.NoError(t, err)
}

func testTypes[T require.TestingT](t T, c *Connector, a *Appender, expectedRows []testTypesRow) []testTypesRow {
func testTypes[T require.TestingT](t T, c *Connector, a *Appender, expectedRows []testTypesRow, safe bool) []testTypesRow {
// Append the rows. We cannot append Composite types.
for i := 0; i < len(expectedRows); i++ {
r := &expectedRows[i]
err := a.AppendRow(
r.Boolean_col,
r.Tinyint_col,
r.Smallint_col,
r.Integer_col,
r.Bigint_col,
r.Utinyint_col,
r.Usmallint_col,
r.Uinteger_col,
r.Ubigint_col,
r.Float_col,
r.Double_col,
r.Timestamp_col,
r.Date_col,
r.Time_col,
r.Interval_col,
r.Hugeint_col,
r.Varchar_col,
r.Blob_col,
r.Timestamp_s_col,
r.Timestamp_ms_col,
r.Timestamp_ns_col,
string(r.Enum_col),
r.List_col.Get(),
r.Struct_col.Get(),
r.Map_col,
r.Timestamp_tz_col)
var err error
if safe {
err = a.AppendRow(
r.Boolean_col,
r.Tinyint_col,
r.Smallint_col,
r.Integer_col,
r.Bigint_col,
r.Utinyint_col,
r.Usmallint_col,
r.Uinteger_col,
r.Ubigint_col,
r.Float_col,
r.Double_col,
r.Timestamp_col,
r.Date_col,
r.Time_col,
r.Interval_col,
r.Hugeint_col,
r.Varchar_col,
r.Blob_col,
r.Timestamp_s_col,
r.Timestamp_ms_col,
r.Timestamp_ns_col,
string(r.Enum_col),
r.List_col.Get(),
r.Struct_col.Get(),
r.Map_col,
r.Timestamp_tz_col)
} else {
err = a.AppendRowUnsafe(
r.Boolean_col,
r.Tinyint_col,
r.Smallint_col,
r.Integer_col,
r.Bigint_col,
r.Utinyint_col,
r.Usmallint_col,
r.Uinteger_col,
r.Ubigint_col,
r.Float_col,
r.Double_col,
r.Timestamp_col,
r.Date_col,
r.Time_col,
r.Interval_col,
r.Hugeint_col,
r.Varchar_col,
r.Blob_col,
r.Timestamp_s_col,
r.Timestamp_ms_col,
r.Timestamp_ns_col,
string(r.Enum_col),
r.List_col.Get(),
r.Struct_col.Get(),
r.Map_col,
r.Timestamp_tz_col)
}
require.NoError(t, err)
}
require.NoError(t, a.Flush())
Expand Down Expand Up @@ -240,11 +271,10 @@ func testTypes[T require.TestingT](t T, c *Connector, a *Appender, expectedRows
return actualRows
}

func TestTypes(t *testing.T) {
t.Parallel()
func testTypesInternal(t *testing.T, safe bool) {
expectedRows := testTypesGenerateRows(t, 3)
c, con, a := prepareAppender(t, testTypesEnumSQL+";"+testTypesTableSQL)
actualRows := testTypes(t, c, a, expectedRows)
actualRows := testTypes(t, c, a, expectedRows, safe)

for i := range actualRows {
expectedRows[i].toUTC()
Expand All @@ -255,15 +285,22 @@ func TestTypes(t *testing.T) {
cleanupAppender(t, c, con, a)
}

func TestTypes(t *testing.T) {
t.Parallel()
testTypesInternal(t, true)
testTypesInternal(t, false)
}

// NOTE: go-duckdb only contains very few benchmarks. The purpose of those benchmarks is to avoid regressions
// of its main functionalities. I.e., functions related to implementing the database/sql interface.

func BenchmarkTypes(b *testing.B) {
expectedRows := testTypesGenerateRows(b, GetDataChunkCapacity()*3+10)
c, con, a := prepareAppender(b, testTypesEnumSQL+";"+testTypesTableSQL)

b.ResetTimer()
for n := 0; n < b.N; n++ {
_ = testTypes(b, c, a, expectedRows)
_ = testTypes(b, c, a, expectedRows, false)
testTypesReset(b, c)
}
cleanupAppender(b, c, con, a)
Expand Down
Loading

0 comments on commit 4d97e4e

Please sign in to comment.