Skip to content

Commit

Permalink
Merge pull request #274 from taniabogatsch/profiling
Browse files Browse the repository at this point in the history
Add profiling support
  • Loading branch information
taniabogatsch authored Sep 24, 2024
2 parents 338c96e + 9c9511e commit 94736e3
Show file tree
Hide file tree
Showing 7 changed files with 183 additions and 26 deletions.
36 changes: 36 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,42 @@ err = appender.AppendRow(...)
check(err)
```

## DuckDB Profiling API

This section describes using the [DuckDB Profiling API](https://duckdb.org/docs/dev/profiling.html).
DuckDB's profiling information is connection-local.
The following example walks you through the necessary steps to obtain the `ProfilingInformation` type, which contains all available metrics.
Please refer to the [DuckDB documentation](https://duckdb.org/docs/dev/profiling.html) on configuring and collecting specific metrics.

- First, you need to obtain a connection.
- Then, you enable profiling for the connection.
- Now, for each subsequent query on this connection, DuckDB will collect profiling information.
- Optionally, you can turn off profiling at any point.
- Next, you execute the query for which you want to obtain profiling information.
- Finally, directly after executing the query, you use the underlying DuckDB connection to retrieve any available profiling information.

For readability, we omit error handling in this example.
```Go
db, err := sql.Open("duckdb", "")
con, err := db.Conn(context.Background())

_, err = con.ExecContext(context.Background(), `PRAGMA enable_profiling = 'no_output'`)
_, err = con.ExecContext(context.Background(), `PRAGMA profiling_mode = 'detailed'`)

res, err := con.QueryContext(context.Background(), `SELECT 42`)

var info ProfilingInfo
err = con.Raw(func(driverCon any) error {
info, err = GetProfilingInfo(driverCon)
return err
})
err = res.Close()

_, err = con.ExecContext(context.Background(), `PRAGMA disable_profiling`)
err = con.Close()
err = db.Close()
```

## DuckDB Apache Arrow Interface

If you want to use the [DuckDB Arrow Interface](https://duckdb.org/docs/api/c/api#arrow-interface), you can obtain a new `Arrow` by passing a DuckDB connection to `NewArrowFromConn()`.
Expand Down
4 changes: 2 additions & 2 deletions appender.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,10 @@ type Appender struct {
func NewAppenderFromConn(driverConn driver.Conn, schema, table string) (*Appender, error) {
con, ok := driverConn.(*conn)
if !ok {
return nil, getError(errAppenderInvalidCon, nil)
return nil, getError(errInvalidCon, nil)
}
if con.closed {
return nil, getError(errAppenderClosedCon, nil)
return nil, getError(errClosedCon, nil)
}

var cSchema *C.char
Expand Down
35 changes: 16 additions & 19 deletions errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,27 +76,24 @@ var (
errAPI = errors.New("API error")
errVectorSize = errors.New("data chunks cannot exceed duckdb's internal vector size")

errParseDSN = errors.New("could not parse DSN for database")
errOpen = errors.New("could not open database")
errSetConfig = errors.New("could not set invalid or local option for global database config")

errUnsupportedMapKeyType = errors.New("MAP key type not supported")

errAppenderCreation = errors.New("could not create appender")
errAppenderInvalidCon = fmt.Errorf("%w: not a DuckDB driver connection", errAppenderCreation)
errAppenderClosedCon = fmt.Errorf("%w: appender creation on a closed connection", errAppenderCreation)

errAppenderClose = errors.New("could not close appender")
errAppenderDoubleClose = fmt.Errorf("%w: already closed", errAppenderClose)

errParseDSN = errors.New("could not parse DSN for database")
errOpen = errors.New("could not open database")
errSetConfig = errors.New("could not set invalid or local option for global database config")
errInvalidCon = errors.New("not a DuckDB driver connection")
errClosedCon = errors.New("closed connection")

errAppenderCreation = errors.New("could not create appender")
errAppenderClose = errors.New("could not close appender")
errAppenderDoubleClose = fmt.Errorf("%w: already closed", errAppenderClose)
errAppenderAppendRow = errors.New("could not append row")
errAppenderAppendAfterClose = fmt.Errorf("%w: appender already closed", errAppenderAppendRow)
errAppenderFlush = errors.New("could not flush appender")

errAppenderFlush = errors.New("could not flush appender")

errEmptyName = errors.New("empty name")
errInvalidDecimalWidth = fmt.Errorf("the DECIMAL with must be between 1 and %d", MAX_DECIMAL_WIDTH)
errInvalidDecimalScale = errors.New("the DECIMAL scale must be less than or equal to the width")
errUnsupportedMapKeyType = errors.New("MAP key type not supported")
errEmptyName = errors.New("empty name")
errInvalidDecimalWidth = fmt.Errorf("the DECIMAL with must be between 1 and %d", MAX_DECIMAL_WIDTH)
errInvalidDecimalScale = errors.New("the DECIMAL scale must be less than or equal to the width")
errSetSQLNULLValue = errors.New("cannot write to a NULL column")

errScalarUDFCreate = errors.New("could not create scalar UDF")
errScalarUDFNoName = fmt.Errorf("%w: missing name", errScalarUDFCreate)
Expand All @@ -108,7 +105,7 @@ var (
errScalarUDFCreateSet = fmt.Errorf("could not create scalar UDF set")
errScalarUDFAddToSet = fmt.Errorf("%w: could not add the function to the set", errScalarUDFCreateSet)

errSetSQLNULLValue = errors.New("cannot write to a NULL column")
errProfilingInfoEmpty = errors.New("no profiling information available for this connection")

// Errors not covered in tests.
errConnect = errors.New("could not connect to database")
Expand Down
8 changes: 4 additions & 4 deletions errors_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,13 @@ func TestErrNestedMap(t *testing.T) {
func TestErrAppender(t *testing.T) {
t.Parallel()

t.Run(errAppenderInvalidCon.Error(), func(t *testing.T) {
t.Run(errInvalidCon.Error(), func(t *testing.T) {
var con driver.Conn
_, err := NewAppenderFromConn(con, "", "test")
testError(t, err, errAppenderInvalidCon.Error())
testError(t, err, errInvalidCon.Error())
})

t.Run(errAppenderClosedCon.Error(), func(t *testing.T) {
t.Run(errClosedCon.Error(), func(t *testing.T) {
c, err := NewConnector("", nil)
require.NoError(t, err)

Expand All @@ -74,7 +74,7 @@ func TestErrAppender(t *testing.T) {
require.NoError(t, con.Close())

_, err = NewAppenderFromConn(con, "", "test")
testError(t, err, errAppenderClosedCon.Error())
testError(t, err, errClosedCon.Error())
require.NoError(t, c.Close())
})

Expand Down
72 changes: 72 additions & 0 deletions profiling.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package duckdb

/*
#include <duckdb.h>
*/
import "C"

import (
"database/sql"
"unsafe"
)

// ProfilingInfo is a recursive type containing metrics for each node in DuckDB's query plan.
// There are two types of nodes: the QUERY_ROOT and OPERATOR nodes.
// The QUERY_ROOT refers exclusively to the top-level node; its metrics are measured over the entire query.
// The OPERATOR nodes refer to the individual operators in the query plan.
type ProfilingInfo struct {
// Metrics contains all key-value pairs of the current node.
// The key represents the name and corresponds to the measured value.
Metrics map[string]string
// Children contains all children of the node and their respective metrics.
Children []ProfilingInfo
}

// GetProfilingInfo obtains all available metrics set by the current connection.
func GetProfilingInfo(c *sql.Conn) (ProfilingInfo, error) {
info := ProfilingInfo{}
err := c.Raw(func(driverConn any) error {
con := driverConn.(*conn)
duckdbInfo := C.duckdb_get_profiling_info(con.duckdbCon)
if duckdbInfo == nil {
return getError(errProfilingInfoEmpty, nil)
}

// Recursive tree traversal.
info.getMetrics(duckdbInfo)
return nil
})
return info, err
}

func (info *ProfilingInfo) getMetrics(duckdbInfo C.duckdb_profiling_info) {
m := C.duckdb_profiling_info_get_metrics(duckdbInfo)
count := C.duckdb_get_map_size(m)
info.Metrics = make(map[string]string, count)

for i := C.idx_t(0); i < count; i++ {
key := C.duckdb_get_map_key(m, i)
value := C.duckdb_get_map_value(m, i)

cKey := C.duckdb_get_varchar(key)
cValue := C.duckdb_get_varchar(value)
keyStr := C.GoString(cKey)
valueStr := C.GoString(cValue)

info.Metrics[keyStr] = valueStr

C.duckdb_destroy_value(&key)
C.duckdb_destroy_value(&value)
C.duckdb_free(unsafe.Pointer(cKey))
C.duckdb_free(unsafe.Pointer(cValue))
}
C.duckdb_destroy_value(&m)

childCount := C.duckdb_profiling_info_get_child_count(duckdbInfo)
for i := C.idx_t(0); i < childCount; i++ {
duckdbChildInfo := C.duckdb_profiling_info_get_child(duckdbInfo, i)
childInfo := ProfilingInfo{}
childInfo.getMetrics(duckdbChildInfo)
info.Children = append(info.Children, childInfo)
}
}
52 changes: 52 additions & 0 deletions profiling_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package duckdb

import (
"context"
"database/sql"
"testing"

"github.com/stretchr/testify/require"
)

func TestProfiling(t *testing.T) {
t.Parallel()

db, err := sql.Open("duckdb", "")
require.NoError(t, err)
con, err := db.Conn(context.Background())
require.NoError(t, err)

_, err = con.ExecContext(context.Background(), `PRAGMA enable_profiling = 'no_output'`)
require.NoError(t, err)
_, err = con.ExecContext(context.Background(), `PRAGMA profiling_mode = 'detailed'`)
require.NoError(t, err)
res, err := con.QueryContext(context.Background(), `SELECT range AS i FROM range(100) ORDER BY i`)
require.NoError(t, err)

info, err := GetProfilingInfo(con)
require.NoError(t, err)

_, err = con.ExecContext(context.Background(), `PRAGMA disable_profiling`)
require.NoError(t, err)
require.NoError(t, res.Close())
require.NoError(t, con.Close())
require.NoError(t, db.Close())

// Verify the metrics.
require.NotEmpty(t, info.Metrics, "metrics must not be empty")
require.NotEmpty(t, info.Children, "children must not be empty")
require.NotEmpty(t, info.Children[0].Metrics, "child metrics must not be empty")
}

func TestErrProfiling(t *testing.T) {
t.Parallel()
db, err := sql.Open("duckdb", "")
require.NoError(t, err)
con, err := db.Conn(context.Background())
require.NoError(t, err)

_, err = GetProfilingInfo(con)
testError(t, err, errProfilingInfoEmpty.Error())
require.NoError(t, con.Close())
require.NoError(t, db.Close())
}
2 changes: 1 addition & 1 deletion scalar_udf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ func TestANYScalarUDF(t *testing.T) {
require.NoError(t, db.Close())
}

func TestScalarUDFErrors(t *testing.T) {
func TestErrScalarUDF(t *testing.T) {
t.Parallel()

db, err := sql.Open("duckdb", "")
Expand Down

0 comments on commit 94736e3

Please sign in to comment.