Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

run tests on CI #18

Merged
merged 9 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 110 additions & 0 deletions .github/pg_container/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
ARG PG_VERSION
FROM postgres:${PG_VERSION}-bookworm

RUN apt-get update \
&& apt-cache showpkg postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR \
&& apt-get install -y --no-install-recommends \
ca-certificates \
postgresql-$PG_MAJOR-postgis-3 \
postgresql-$PG_MAJOR-postgis-3-scripts \
postgresql-$PG_MAJOR-pgvector \
postgresql-$PG_MAJOR-age \
postgresql-$PG_MAJOR-rational


# Other available Debian packages for PG extensions:
# postgresql-16-age - Graph database optimized for fast analysis and real-time data processing
# postgresql-16-asn1oid - ASN.1 OID data type for PostgreSQL
# postgresql-16-auto-failover - Postgres high availability support
# postgresql-16-bgw-replstatus - report whether PostgreSQL node is master or standby
# postgresql-16-credcheck - PostgreSQL username/password checks
# postgresql-16-cron - Run periodic jobs in PostgreSQL
# postgresql-16-debversion - Debian version number type for PostgreSQL
# postgresql-16-decoderbufs - logical decoder output plugin to deliver data as Protocol Buffers
# postgresql-16-dirtyread - Read dead but unvacuumed tuples from a PostgreSQL relation
# postgresql-16-extra-window-functions - Extra Window Functions for PostgreSQL
# postgresql-16-first-last-agg - PostgreSQL extension providing first and last aggregate functions
# postgresql-16-h3 - PostgreSQL bindings for H3, a hierarchical hexagonal geospatial indexing system
# postgresql-16-hll - HyperLogLog extension for PostgreSQL
# postgresql-16-http - HTTP client for PostgreSQL, retrieve a web page from inside the database
# postgresql-16-hypopg - PostgreSQL extension adding support for hypothetical indexes.
# postgresql-16-icu-ext - PostgreSQL extension exposing functionality from the ICU library
# postgresql-16-ip4r - IPv4 and IPv6 types for PostgreSQL 16
# postgresql-16-jsquery - PostgreSQL JSON query language with GIN indexing support
# postgresql-16-londiste-sql - SQL infrastructure for Londiste
# postgresql-16-mimeo - specialized, per-table replication between PostgreSQL instances
# postgresql-16-mobilitydb - Open source geospatial trajectory data management & analysis platform
# postgresql-16-mysql-fdw - Postgres 16 Foreign Data Wrapper for MySQL
# postgresql-16-numeral - numeral datatypes for PostgreSQL
# postgresql-16-ogr-fdw - PostgreSQL foreign data wrapper for OGR
# postgresql-16-omnidb - PostgreSQL PL/pgSQL debugger extension for OmniDB
# postgresql-16-oracle-fdw - PostgreSQL Foreign Data Wrapper for Oracle
# postgresql-16-orafce - Oracle support functions for PostgreSQL 16
# postgresql-16-partman - PostgreSQL Partition Manager
# postgresql-16-periods - PERIODs and SYSTEM VERSIONING for PostgreSQL
# postgresql-16-pg-catcheck - Postgres system catalog checker
# postgresql-16-pg-checksums - Activate/deactivate/verify PostgreSQL data checksums
# postgresql-16-pg-fact-loader - Build fact tables asynchronously with Postgres
# postgresql-16-pg-failover-slots - High-availability support for PostgreSQL logical replication
# postgresql-16-pg-hint-plan - support for optimizer hints in PostgreSQL
# postgresql-16-pg-qualstats - PostgreSQL extension to gather statistics about predicates.
# postgresql-16-pg-rrule - RRULE data type for PostgreSQL
# postgresql-16-pg-stat-kcache - PostgreSQL extension to gather per-query kernel statistics.
# postgresql-16-pg-track-settings - PostgreSQL extension tracking of configuration settings
# postgresql-16-pg-wait-sampling - Extension providing statistics about PostgreSQL wait events
# postgresql-16-pgaudit - PostgreSQL Audit Extension
# postgresql-16-pgauditlogtofile - PostgreSQL pgAudit Add-On to redirect audit logs
# postgresql-16-pgextwlist - PostgreSQL Extension Whitelisting
# postgresql-16-pgfaceting - Faceted query acceleration for PostgreSQL using roaring bitmaps
# postgresql-16-pgfincore - set of PostgreSQL functions to manage blocks in memory
# postgresql-16-pgl-ddl-deploy - Transparent DDL replication for PostgreSQL
# postgresql-16-pglogical - Logical Replication Extension for PostgreSQL
# postgresql-16-pglogical-ticker - Have time-based replication delay for pglogical
# postgresql-16-pgmemcache - PostgreSQL interface to memcached
# postgresql-16-pgmp - arbitrary precision integers and rationals for PostgreSQL 16
# postgresql-16-pgpcre - Perl Compatible Regular Expressions (PCRE) extension for PostgreSQL
# postgresql-16-pgpool2 - connection pool server and replication proxy for PostgreSQL - modules
# postgresql-16-pgq-node - Cascaded queueing on top of PgQ
# postgresql-16-pgq3 - Generic queue for PostgreSQL
# postgresql-16-pgrouting - Routing functionality support for PostgreSQL/PostGIS
# postgresql-16-pgrouting-doc - Routing functionality support for PostgreSQL/PostGIS (Documentation)
# postgresql-16-pgrouting-scripts - Routing functionality support for PostgreSQL/PostGIS - SQL scripts
# postgresql-16-pgsphere - Spherical data types for PostgreSQL
# postgresql-16-pgtap - Unit testing framework extension for PostgreSQL 16
# postgresql-16-pgvector - Open-source vector similarity search for Postgres
# postgresql-16-pldebugger - PostgreSQL pl/pgsql Debugger API
# postgresql-16-pljava - Java procedural language for PostgreSQL 16
# postgresql-16-pllua - Lua procedural language for PostgreSQL 16
# postgresql-16-plpgsql-check - plpgsql_check extension for PostgreSQL
# postgresql-16-plprofiler - PostgreSQL PL/pgSQL functions performance profiler
# postgresql-16-plproxy - database partitioning system for PostgreSQL 16
# postgresql-16-plr - Procedural language interface between PostgreSQL and R
# postgresql-16-plsh - PL/sh procedural language for PostgreSQL 16
# postgresql-16-pointcloud - PostgreSQL extension for storing point cloud (LIDAR) data
# postgresql-16-postgis-3 - Geographic objects support for PostgreSQL 16
# postgresql-16-postgis-3-scripts - Geographic objects support for PostgreSQL 16 -- SQL scripts
# postgresql-16-powa - PostgreSQL Workload Analyzer -- PostgreSQL 16 extension
# postgresql-16-prefix - Prefix Range module for PostgreSQL
# postgresql-16-preprepare - pre prepare your PostgreSQL statements server side
# postgresql-16-prioritize - Get and set the nice priorities of PostgreSQL backends
# postgresql-16-q3c - PostgreSQL 16 extension used for indexing the sky
# postgresql-16-rational - Precise fractional arithmetic for PostgreSQL
# postgresql-16-rdkit - Cheminformatics and machine-learning software (PostgreSQL Cartridge)
# postgresql-16-repack - reorganize tables in PostgreSQL databases with minimal locks
# postgresql-16-repmgr - replication manager for PostgreSQL 16
# postgresql-16-roaringbitmap - RoaringBitmap extension for PostgreSQL
# postgresql-16-rum - PostgreSQL RUM access method
# postgresql-16-semver - Semantic version number type for PostgreSQL
# postgresql-16-set-user - PostgreSQL privilege escalation with enhanced logging and control
# postgresql-16-show-plans - Show query plans of currently running PostgreSQL statements
# postgresql-16-similarity - PostgreSQL similarity functions extension
# postgresql-16-slony1-2 - replication system for PostgreSQL: PostgreSQL 16 server plug-in
# postgresql-16-snakeoil - PostgreSQL anti-virus scanner based on ClamAV
# postgresql-16-squeeze - PostgreSQL extension for automatic bloat cleanup
# postgresql-16-statviz - PostgreSQL internal statistics analysis and visualization
# postgresql-16-tablelog - log changes on tables and restore tables to point in time
# postgresql-16-tdigest - t-digest algorithm for on-line accumulation of rank-based statistics
# postgresql-16-tds-fdw - PostgreSQL foreign data wrapper for TDS databases
# postgresql-16-toastinfo - Show storage structure of varlena datatypes in PostgreSQL
# postgresql-16-unit - SI Units for PostgreSQL
# postgresql-16-wal2json - PostgreSQL logical decoding JSON output plugin
38 changes: 38 additions & 0 deletions .github/setupandbuild/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: setup_and_build
description: Setups repository, Rust and build pg2parquet

inputs:
rust_target:
description: Target triple
required: true
rust_profile:
description: Cargo Profile to use
default: ci-build
required: true
runs:
using: composite
steps:
- uses: actions-rs/toolchain@master
with:
target: ${{ inputs.rust_target }}
toolchain: stable
override: true

- name: Build
uses: actions-rs/cargo@v1
with:
use-cross: ${{ runner.os != 'Windows' && (inputs.rust_target == 'x86_64-unknown-linux-musl' || inputs.rust_target == 'x86_64-pc-windows-gnu' || inputs.rust_target == 'aarch64-unknown-linux-gnu' || inputs.rust_target == 'riscv64gc-unknown-linux-gnu') }}
command: build
args: --locked --profile=${{ inputs.rust_profile }} --manifest-path=cli/Cargo.toml --target=${{ inputs.rust_target }}
- name: Copy binary
if: inputs.rust_target != 'x86_64-pc-windows-gnu'
shell: bash
run: |
mkdir output_dir
cp cli/target/${{ inputs.rust_target }}/${{ inputs.rust_profile == 'dev' && 'debug' || inputs.rust_profile }}/pg2parquet output_dir/
- name: Copy binary
if: inputs.rust_target == 'x86_64-pc-windows-gnu'
shell: bash
run: |
mkdir output_dir
cp cli/target/${{ inputs.rust_target }}/${{ inputs.rust_profile == 'dev' && 'debug' || inputs.rust_profile }}/pg2parquet.exe output_dir/
45 changes: 45 additions & 0 deletions .github/test/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: test
description: Runs Python integration tests for pg2parquet

inputs:
binary:
description: The pg2parquet compiled binary
pg_version:
default: '16'
description: PostgreSQL version
required: true

runs:
using: composite
steps:
- run: cd .github/pg_container && docker build --build-arg PG_VERSION=${{ inputs.pg_version }} -t pg2parquet-postgres .
shell: bash
- run: docker run -d --name pg2parquet-postgres1 -p 5432:5432 -e POSTGRES_PASSWORD=postgres pg2parquet-postgres
shell: bash
- run: timeout 120s bash -c 'until docker exec pg2parquet-postgres1 psql -U postgres -c "select 1;"; do echo "Waiting for PG startup..."; sleep 0.3; done'
shell: bash
- run: docker exec pg2parquet-postgres1 psql -U postgres -c "CREATE DATABASE pg2parquet_test;"
shell: bash

- uses: actions/setup-python@v4
with:
python-version: 3.12

- uses: abatilo/actions-poetry@v2
with:
poetry-version: 1.8.2

- run: poetry install --no-root
working-directory: py-tests
shell: bash

- run: poetry -C py-tests run python -m pytest
shell: bash
env:
PG2PARQUET_TEST_BIN: ${{ inputs.binary }}
PG2PARQUET_TEST_DB_HOST: 127.0.0.1
PG2PARQUET_TEST_DB_PORT: 5432
PG2PARQUET_TEST_DB_NAME: testdb
PG2PARQUET_TEST_DB_USER: postgres
PG2PARQUET_TEST_DB_PASSWORD: postgres

34 changes: 34 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Build release binaries
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
build-rust:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
rust_target: [x86_64-unknown-linux-gnu, x86_64-unknown-linux-musl, x86_64-pc-windows-gnu, aarch64-unknown-linux-gnu, riscv64gc-unknown-linux-gnu]
os: [ubuntu-latest]
include:
- rust_target: x86_64-apple-darwin
os: macos-latest
steps:
- uses: actions/checkout@v3
- run: find .github
shell: bash
- uses: ./.github/setupandbuild
with:
rust_target: ${{ matrix.rust_target }}
- name: Strip binary
if: matrix.rust_target == 'x86_64-unknown-linux-musl'
run: |
strip output_dir/pg2parquet
- name: Upload artifact
uses: actions/upload-artifact@v3
with:
name: release-${{ matrix.rust_target }}
path: output_dir/pg2parquet${{ matrix.rust_target == 'x86_64-pc-windows-gnu' && '.exe' || '' }}
52 changes: 0 additions & 52 deletions .github/workflows/build.yml

This file was deleted.

36 changes: 36 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Tests

on:
workflow_dispatch:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
tests:
name: Integration Tests
runs-on: ${{ matrix.os }}
timeout-minutes: 15
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
target: [x86_64-unknown-linux-gnu, x86_64-unknown-linux-musl]
pg_version: [ '16' ]
include:
- os: ubuntu-latest
target: x86_64-unknown-linux-gnu
pg_version: '12' # oldest supported version as of 2024
steps:
- uses: actions/checkout@v3
- name: Set up and build
uses: ./.github/setupandbuild
with:
rust_target: ${{ matrix.target }}
rust_profile: dev
- name: Run Tests
uses: ./.github/test
with:
binary: output_dir/pg2parquet${{ matrix.target == 'x86_64-pc-windows-gnu' && '.exe' || '' }}
pg_version: ${{ matrix.pg_version }}
4 changes: 2 additions & 2 deletions cli/src/appenders/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ pub fn create_pg_raw_appender<TRow: PgAbstractRow + Clone>(max_dl: i16, max_rl:
a
}

pub fn create_jsonb_appender<TRow: PgAbstractRow + Clone, const TYPE_OID: u32>(max_dl: i16, max_rl: i16, column_index: usize) -> impl ColumnAppender<TRow> {
pub fn create_jsonb_appender<TRow: PgAbstractRow + Clone>(max_dl: i16, max_rl: i16, column_index: usize) -> impl ColumnAppender<TRow> {
let a = ByteArrayColumnAppender::new(max_dl, max_rl, move |row: &TRow, buffer: &mut Vec<u8>| {
if let Some(value) = row.ab_get::<Option<PgAnyRef>>(column_index) {

Expand All @@ -198,5 +198,5 @@ pub fn create_jsonb_appender<TRow: PgAbstractRow + Clone, const TYPE_OID: u32>(m
a
}
// pub fn create_string_appender<TRow: PgAbstractRow>(max_dl: i16, max_rl: i16, column_index: usize) -> impl ColumnAppender<Arc<TRow>> {
// create_pg_raw_appender::<TRow, TYPE_OID>(max_dl, max_rl, column_index)
// create_pg_raw_appender::<TRow>(max_dl, max_rl, column_index)
// }
2 changes: 2 additions & 0 deletions cli/src/postgres_cloner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,8 @@ fn map_simple_type<TRow: PgAbstractRow + Clone + 'static>(
"bytea" => resolve_primitive::<Vec<u8>, ByteArrayType, _>(name, c, None, None),
"name" | "text" | "xml" | "bpchar" | "varchar" | "citext" =>
resolve_primitive::<String, ByteArrayType, _>(name, c, Some(LogicalType::String), Some(ConvertedType::UTF8)),
// (Box::new(crate::appenders::byte_array::create_pg_raw_appender(c.definition_level + 1, c.repetition_level, c.col_i)),
// ParquetType::primitive_type_builder(name, basic::Type::BYTE_ARRAY).with_logical_type(Some(LogicalType::String)).with_converted_type(ConvertedType::UTF8).build().unwrap()),
"jsonb" | "json" =>
resolve_primitive::<PgRawJsonb, ByteArrayType, _>(name, c, Some(match s.json_handling {
SchemaSettingsJsonHandling::Text => LogicalType::String,
Expand Down
Loading
Loading