Skip to content

Commit

Permalink
Merge ssh://github.com/davidhcoe/arrow-adbc into dev/go-flight-sql-in…
Browse files Browse the repository at this point in the history
…terop
  • Loading branch information
David Coe committed Nov 13, 2024
2 parents 333e645 + 0cad889 commit d03804b
Show file tree
Hide file tree
Showing 44 changed files with 810 additions and 444 deletions.
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ ARCH_SHORT=amd64
ARCH_CONDA_FORGE=linux_64_

# Default versions for various dependencies
JDK=8
JDK=11
MANYLINUX=2-28
MAVEN=3.6.3
PLATFORM=linux/amd64
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
java: ['8', '11', '17', '21', '22']
java: ['11', '17', '21', '22']
steps:
- uses: actions/checkout@v4
with:
Expand Down
28 changes: 14 additions & 14 deletions .github/workflows/native-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,10 @@ jobs:
miniforge-version: latest
use-mamba: true
- name: Install Dependencies
shell: bash -l {0}
shell: pwsh
run: |
mamba install -c conda-forge \
--file ci/conda_env_cpp.txt
mamba install -c conda-forge `
--file ci\conda_env_cpp.txt
# Force bundled gtest
mamba uninstall gtest
Expand Down Expand Up @@ -137,10 +137,10 @@ jobs:
miniforge-version: latest
use-mamba: true
- name: Install Dependencies
shell: bash -l {0}
shell: pwsh
run: |
mamba install -c conda-forge \
--file ci/conda_env_cpp.txt
mamba install -c conda-forge `
--file ci\conda_env_cpp.txt
# Force bundled gtest
mamba uninstall gtest
Expand Down Expand Up @@ -213,10 +213,10 @@ jobs:
miniforge-version: latest
use-mamba: true
- name: Install Dependencies
shell: bash -l {0}
shell: pwsh
run: |
mamba install -c conda-forge \
--file ci/conda_env_cpp.txt
mamba install -c conda-forge `
--file ci\conda_env_cpp.txt
- uses: actions/setup-go@v5
with:
go-version: "${{ env.GO_VERSION }}"
Expand Down Expand Up @@ -280,12 +280,12 @@ jobs:
miniforge-version: latest
use-mamba: true
- name: Install Dependencies
shell: bash -l {0}
shell: pwsh
run: |
mamba install -c conda-forge \
python=${{ matrix.python }} \
--file ci/conda_env_cpp.txt \
--file ci/conda_env_python.txt
mamba install -c conda-forge `
python=${{ matrix.python }} `
--file ci\conda_env_cpp.txt `
--file ci\conda_env_python.txt
- uses: actions/download-artifact@v4
with:
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/nightly-verify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,9 @@ jobs:
VERBOSE: "1"
VERIFICATION_MOCK_DIST_DIR: ${{ github.workspace }}
run: |
# Rust uses a lot of disk space, free up some space
# https://github.com/actions/runner-images/issues/2840
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
./arrow-adbc/dev/release/verify-release-candidate.sh $VERSION 0
- name: Verify
Expand Down
8 changes: 6 additions & 2 deletions .github/workflows/packaging.yml
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ jobs:
docs.tgz
java:
name: "Java 1.8"
name: "Java 11"
runs-on: ubuntu-latest
needs:
- source
Expand Down Expand Up @@ -609,6 +609,7 @@ jobs:
name: python-${{ matrix.arch }}-manylinux${{ matrix.manylinux_version }}
retention-days: 7
path: |
adbc/python/adbc_driver_bigquery/repaired_wheels/*.whl
adbc/python/adbc_driver_flightsql/repaired_wheels/*.whl
adbc/python/adbc_driver_manager/repaired_wheels/*.whl
adbc/python/adbc_driver_postgresql/repaired_wheels/*.whl
Expand Down Expand Up @@ -746,6 +747,7 @@ jobs:
name: python-${{ matrix.arch }}-macos
retention-days: 7
path: |
adbc/python/adbc_driver_bigquery/repaired_wheels/*.whl
adbc/python/adbc_driver_flightsql/repaired_wheels/*.whl
adbc/python/adbc_driver_manager/repaired_wheels/*.whl
adbc/python/adbc_driver_postgresql/repaired_wheels/*.whl
Expand Down Expand Up @@ -801,7 +803,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python_version: ["3.9", "3.10", "3.11", "3.12"]
python_version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
env:
PYTHON_VERSION: "${{ matrix.python_version }}"
# Where to install vcpkg
Expand Down Expand Up @@ -884,6 +886,7 @@ jobs:
name: python${{ matrix.python_version }}-windows
retention-days: 7
path: |
adbc/python/adbc_driver_bigquery/repaired_wheels/*.whl
adbc/python/adbc_driver_flightsql/repaired_wheels/*.whl
adbc/python/adbc_driver_manager/repaired_wheels/*.whl
adbc/python/adbc_driver_postgresql/repaired_wheels/*.whl
Expand Down Expand Up @@ -943,6 +946,7 @@ jobs:
name: python-sdist
retention-days: 7
path: |
adbc/python/adbc_driver_bigquery/dist/*.tar.gz
adbc/python/adbc_driver_flightsql/dist/*.tar.gz
adbc/python/adbc_driver_manager/dist/*.tar.gz
adbc/python/adbc_driver_postgresql/dist/*.tar.gz
Expand Down
16 changes: 9 additions & 7 deletions .github/workflows/verify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,10 @@ on:
required: false
type: string
default: ""
pull_request:
branches:
- main
paths:
- '.github/workflows/verify.yml'
- 'dev/release/verify-release-candidate.sh'
- 'dev/release/verify-release-candidate.ps1'

# Don't automatically run on pull requests. While we're only using a
# read-only token below, let's play it safe since we are running code out of
# the given branch.

permissions:
contents: read
Expand Down Expand Up @@ -70,6 +67,8 @@ jobs:
TEST_BINARIES: "1"
USE_CONDA: "1"
VERBOSE: "1"
# Make this available to download_rc_binaries.py
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
./dev/release/verify-release-candidate.sh ${{ inputs.version }} ${{ inputs.rc }}
Expand Down Expand Up @@ -106,6 +105,9 @@ jobs:
USE_CONDA: "1"
VERBOSE: "1"
run: |
# Rust uses a lot of disk space, free up some space
# https://github.com/actions/runner-images/issues/2840
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
./dev/release/verify-release-candidate.sh ${{ inputs.version }} ${{ inputs.rc }}
- name: Verify
if: matrix.os == 'windows-latest'
Expand Down
2 changes: 1 addition & 1 deletion c/driver/flightsql/sqlite_flightsql_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ class SqliteFlightSqlQuirks : public adbc_validation::DriverQuirks {
bool supports_get_objects() const override { return true; }
bool supports_partitioned_data() const override { return true; }
bool supports_dynamic_parameter_binding() const override { return true; }
std::string catalog() const { return "main"; }
std::string catalog() const override { return "main"; }
};

class SqliteFlightSqlTest : public ::testing::Test, public adbc_validation::DatabaseTest {
Expand Down
91 changes: 64 additions & 27 deletions c/driver/postgresql/connection.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include "connection.h"

#include <array>
#include <cassert>
#include <cinttypes>
#include <cmath>
Expand Down Expand Up @@ -175,6 +176,13 @@ class PostgresGetObjectsHelper : public adbc::driver::GetObjectsHelper {
all_constraints_(conn, kConstraintsQueryAll),
some_constraints_(conn, ConstraintsQuery()) {}

// Allow Redshift to execute this query without constraints
// TODO(paleolimbot): Investigate to see if we can simplify the constraits query so that
// it works on both!
void SetEnableConstraints(bool enable_constraints) {
enable_constraints_ = enable_constraints;
}

Status Load(adbc::driver::GetObjectsDepth depth,
std::optional<std::string_view> catalog_filter,
std::optional<std::string_view> schema_filter,
Expand Down Expand Up @@ -262,16 +270,23 @@ class PostgresGetObjectsHelper : public adbc::driver::GetObjectsHelper {
std::optional<std::string_view> column_filter) override {
if (column_filter.has_value()) {
UNWRAP_STATUS(some_columns_.Execute(
{std::string(schema), std::string(table), std::string(*column_filter)}))
UNWRAP_STATUS(some_constraints_.Execute(
{std::string(schema), std::string(table), std::string(*column_filter)}))
{std::string(schema), std::string(table), std::string(*column_filter)}));
next_column_ = some_columns_.Row(-1);
next_constraint_ = some_constraints_.Row(-1);
} else {
UNWRAP_STATUS(all_columns_.Execute({std::string(schema), std::string(table)}))
UNWRAP_STATUS(all_constraints_.Execute({std::string(schema), std::string(table)}))
UNWRAP_STATUS(all_columns_.Execute({std::string(schema), std::string(table)}));
next_column_ = all_columns_.Row(-1);
next_constraint_ = all_constraints_.Row(-1);
}

if (enable_constraints_) {
if (column_filter.has_value()) {
UNWRAP_STATUS(some_constraints_.Execute(
{std::string(schema), std::string(table), std::string(*column_filter)}))
next_constraint_ = some_constraints_.Row(-1);
} else {
UNWRAP_STATUS(
all_constraints_.Execute({std::string(schema), std::string(table)}));
next_constraint_ = all_constraints_.Row(-1);
}
}

return Status::Ok();
Expand Down Expand Up @@ -348,6 +363,9 @@ class PostgresGetObjectsHelper : public adbc::driver::GetObjectsHelper {
PqResultHelper all_constraints_;
PqResultHelper some_constraints_;

// On Redshift, the constraints query fails
bool enable_constraints_{true};

// Iterator state for the catalogs/schema/table/column queries
PqResultRow next_catalog_;
PqResultRow next_schema_;
Expand Down Expand Up @@ -478,19 +496,30 @@ AdbcStatusCode PostgresConnection::GetInfo(struct AdbcConnection* connection,
for (size_t i = 0; i < info_codes_length; i++) {
switch (info_codes[i]) {
case ADBC_INFO_VENDOR_NAME:
infos.push_back({info_codes[i], "PostgreSQL"});
infos.push_back({info_codes[i], std::string(VendorName())});
break;
case ADBC_INFO_VENDOR_VERSION: {
const char* stmt = "SHOW server_version_num";
auto result_helper = PqResultHelper{conn_, std::string(stmt)};
RAISE_STATUS(error, result_helper.Execute());
auto it = result_helper.begin();
if (it == result_helper.end()) {
SetError(error, "[libpq] PostgreSQL returned no rows for '%s'", stmt);
return ADBC_STATUS_INTERNAL;
if (VendorName() == "Redshift") {
const std::array<int, 3>& version = VendorVersion();
std::string version_string = std::to_string(version[0]) + "." +
std::to_string(version[1]) + "." +
std::to_string(version[2]);
infos.push_back({info_codes[i], std::move(version_string)});

} else {
// Gives a version in the form 140000 instead of 14.0.0
const char* stmt = "SHOW server_version_num";
auto result_helper = PqResultHelper{conn_, std::string(stmt)};
RAISE_STATUS(error, result_helper.Execute());
auto it = result_helper.begin();
if (it == result_helper.end()) {
SetError(error, "[libpq] PostgreSQL returned no rows for '%s'", stmt);
return ADBC_STATUS_INTERNAL;
}
const char* server_version_num = (*it)[0].data;
infos.push_back({info_codes[i], server_version_num});
}
const char* server_version_num = (*it)[0].data;
infos.push_back({info_codes[i], server_version_num});

break;
}
case ADBC_INFO_DRIVER_NAME:
Expand Down Expand Up @@ -520,7 +549,8 @@ AdbcStatusCode PostgresConnection::GetObjects(
struct AdbcConnection* connection, int c_depth, const char* catalog,
const char* db_schema, const char* table_name, const char** table_type,
const char* column_name, struct ArrowArrayStream* out, struct AdbcError* error) {
PostgresGetObjectsHelper new_helper(conn_);
PostgresGetObjectsHelper helper(conn_);
helper.SetEnableConstraints(VendorName() != "Redshift");

const auto catalog_filter =
catalog ? std::make_optional(std::string_view(catalog)) : std::nullopt;
Expand Down Expand Up @@ -559,9 +589,9 @@ AdbcStatusCode PostgresConnection::GetObjects(
.ToAdbc(error);
}

auto status = BuildGetObjects(&new_helper, depth, catalog_filter, schema_filter,
auto status = BuildGetObjects(&helper, depth, catalog_filter, schema_filter,
table_filter, column_filter, table_type_filter, out);
RAISE_STATUS(error, new_helper.Close());
RAISE_STATUS(error, helper.Close());
RAISE_STATUS(error, status);

return ADBC_STATUS_OK;
Expand All @@ -573,11 +603,12 @@ AdbcStatusCode PostgresConnection::GetOption(const char* option, char* value,
if (std::strcmp(option, ADBC_CONNECTION_OPTION_CURRENT_CATALOG) == 0) {
output = PQdb(conn_);
} else if (std::strcmp(option, ADBC_CONNECTION_OPTION_CURRENT_DB_SCHEMA) == 0) {
PqResultHelper result_helper{conn_, "SELECT CURRENT_SCHEMA"};
PqResultHelper result_helper{conn_, "SELECT CURRENT_SCHEMA()"};
RAISE_STATUS(error, result_helper.Execute());
auto it = result_helper.begin();
if (it == result_helper.end()) {
SetError(error, "[libpq] PostgreSQL returned no rows for 'SELECT CURRENT_SCHEMA'");
SetError(error,
"[libpq] PostgreSQL returned no rows for 'SELECT CURRENT_SCHEMA()'");
return ADBC_STATUS_INTERNAL;
}
output = (*it)[0].data;
Expand Down Expand Up @@ -989,22 +1020,22 @@ AdbcStatusCode PostgresConnection::GetTableSchema(const char* catalog,
CHECK_NA(INTERNAL, ArrowSchemaSetTypeStruct(uschema.get(), result_helper.NumRows()),
error);

ArrowError na_error;
int row_counter = 0;
for (auto row : result_helper) {
const char* colname = row[0].data;
const Oid pg_oid =
static_cast<uint32_t>(std::strtol(row[1].data, /*str_end=*/nullptr, /*base=*/10));

PostgresType pg_type;
if (type_resolver_->Find(pg_oid, &pg_type, &na_error) != NANOARROW_OK) {
SetError(error, "%s%d%s%s%s%" PRIu32, "Column #", row_counter + 1, " (\"", colname,
"\") has unknown type code ", pg_oid);
if (type_resolver_->FindWithDefault(pg_oid, &pg_type) != NANOARROW_OK) {
SetError(error, "%s%d%s%s%s%" PRIu32, "Error resolving type code for column #",
row_counter + 1, " (\"", colname, "\") with oid ", pg_oid);
final_status = ADBC_STATUS_NOT_IMPLEMENTED;
break;
}
CHECK_NA(INTERNAL,
pg_type.WithFieldName(colname).SetSchema(uschema->children[row_counter]),
pg_type.WithFieldName(colname).SetSchema(uschema->children[row_counter],
std::string(VendorName())),
error);
row_counter++;
}
Expand Down Expand Up @@ -1136,4 +1167,10 @@ AdbcStatusCode PostgresConnection::SetOptionInt(const char* key, int64_t value,
return ADBC_STATUS_NOT_IMPLEMENTED;
}

std::string_view PostgresConnection::VendorName() { return database_->VendorName(); }

const std::array<int, 3>& PostgresConnection::VendorVersion() {
return database_->VendorVersion();
}

} // namespace adbcpq
3 changes: 3 additions & 0 deletions c/driver/postgresql/connection.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#pragma once

#include <array>
#include <cstdint>
#include <memory>

Expand Down Expand Up @@ -73,6 +74,8 @@ class PostgresConnection {
return type_resolver_;
}
bool autocommit() const { return autocommit_; }
std::string_view VendorName();
const std::array<int, 3>& VendorVersion();

private:
std::shared_ptr<PostgresDatabase> database_;
Expand Down
2 changes: 1 addition & 1 deletion c/driver/postgresql/copy/postgres_copy_reader_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class PostgresCopyStreamTester {
public:
ArrowErrorCode Init(const PostgresType& root_type, ArrowError* error = nullptr) {
NANOARROW_RETURN_NOT_OK(reader_.Init(root_type));
NANOARROW_RETURN_NOT_OK(reader_.InferOutputSchema(error));
NANOARROW_RETURN_NOT_OK(reader_.InferOutputSchema("PostgreSQL Tester", error));
NANOARROW_RETURN_NOT_OK(reader_.InitFieldReaders(error));
return NANOARROW_OK;
}
Expand Down
Loading

0 comments on commit d03804b

Please sign in to comment.