Skip to content

Commit

Permalink
Merge pull request #18 from exyi/tmp-ci
Browse files Browse the repository at this point in the history
run tests on CI
  • Loading branch information
exyi authored May 17, 2024
2 parents ffcc01c + dccc83d commit 22088f3
Show file tree
Hide file tree
Showing 12 changed files with 644 additions and 215 deletions.
110 changes: 110 additions & 0 deletions .github/pg_container/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
ARG PG_VERSION
FROM postgres:${PG_VERSION}-bookworm

RUN apt-get update \
&& apt-cache showpkg postgresql-$PG_MAJOR-postgis-$POSTGIS_MAJOR \
&& apt-get install -y --no-install-recommends \
ca-certificates \
postgresql-$PG_MAJOR-postgis-3 \
postgresql-$PG_MAJOR-postgis-3-scripts \
postgresql-$PG_MAJOR-pgvector \
postgresql-$PG_MAJOR-age \
postgresql-$PG_MAJOR-rational


# Other available Debian packages for PG extensions:
# postgresql-16-age - Graph database optimized for fast analysis and real-time data processing
# postgresql-16-asn1oid - ASN.1 OID data type for PostgreSQL
# postgresql-16-auto-failover - Postgres high availability support
# postgresql-16-bgw-replstatus - report whether PostgreSQL node is master or standby
# postgresql-16-credcheck - PostgreSQL username/password checks
# postgresql-16-cron - Run periodic jobs in PostgreSQL
# postgresql-16-debversion - Debian version number type for PostgreSQL
# postgresql-16-decoderbufs - logical decoder output plugin to deliver data as Protocol Buffers
# postgresql-16-dirtyread - Read dead but unvacuumed tuples from a PostgreSQL relation
# postgresql-16-extra-window-functions - Extra Window Functions for PostgreSQL
# postgresql-16-first-last-agg - PostgreSQL extension providing first and last aggregate functions
# postgresql-16-h3 - PostgreSQL bindings for H3, a hierarchical hexagonal geospatial indexing system
# postgresql-16-hll - HyperLogLog extension for PostgreSQL
# postgresql-16-http - HTTP client for PostgreSQL, retrieve a web page from inside the database
# postgresql-16-hypopg - PostgreSQL extension adding support for hypothetical indexes.
# postgresql-16-icu-ext - PostgreSQL extension exposing functionality from the ICU library
# postgresql-16-ip4r - IPv4 and IPv6 types for PostgreSQL 16
# postgresql-16-jsquery - PostgreSQL JSON query language with GIN indexing support
# postgresql-16-londiste-sql - SQL infrastructure for Londiste
# postgresql-16-mimeo - specialized, per-table replication between PostgreSQL instances
# postgresql-16-mobilitydb - Open source geospatial trajectory data management & analysis platform
# postgresql-16-mysql-fdw - Postgres 16 Foreign Data Wrapper for MySQL
# postgresql-16-numeral - numeral datatypes for PostgreSQL
# postgresql-16-ogr-fdw - PostgreSQL foreign data wrapper for OGR
# postgresql-16-omnidb - PostgreSQL PL/pgSQL debugger extension for OmniDB
# postgresql-16-oracle-fdw - PostgreSQL Foreign Data Wrapper for Oracle
# postgresql-16-orafce - Oracle support functions for PostgreSQL 16
# postgresql-16-partman - PostgreSQL Partition Manager
# postgresql-16-periods - PERIODs and SYSTEM VERSIONING for PostgreSQL
# postgresql-16-pg-catcheck - Postgres system catalog checker
# postgresql-16-pg-checksums - Activate/deactivate/verify PostgreSQL data checksums
# postgresql-16-pg-fact-loader - Build fact tables asynchronously with Postgres
# postgresql-16-pg-failover-slots - High-availability support for PostgreSQL logical replication
# postgresql-16-pg-hint-plan - support for optimizer hints in PostgreSQL
# postgresql-16-pg-qualstats - PostgreSQL extension to gather statistics about predicates.
# postgresql-16-pg-rrule - RRULE data type for PostgreSQL
# postgresql-16-pg-stat-kcache - PostgreSQL extension to gather per-query kernel statistics.
# postgresql-16-pg-track-settings - PostgreSQL extension tracking of configuration settings
# postgresql-16-pg-wait-sampling - Extension providing statistics about PostgreSQL wait events
# postgresql-16-pgaudit - PostgreSQL Audit Extension
# postgresql-16-pgauditlogtofile - PostgreSQL pgAudit Add-On to redirect audit logs
# postgresql-16-pgextwlist - PostgreSQL Extension Whitelisting
# postgresql-16-pgfaceting - Faceted query acceleration for PostgreSQL using roaring bitmaps
# postgresql-16-pgfincore - set of PostgreSQL functions to manage blocks in memory
# postgresql-16-pgl-ddl-deploy - Transparent DDL replication for PostgreSQL
# postgresql-16-pglogical - Logical Replication Extension for PostgreSQL
# postgresql-16-pglogical-ticker - Have time-based replication delay for pglogical
# postgresql-16-pgmemcache - PostgreSQL interface to memcached
# postgresql-16-pgmp - arbitrary precision integers and rationals for PostgreSQL 16
# postgresql-16-pgpcre - Perl Compatible Regular Expressions (PCRE) extension for PostgreSQL
# postgresql-16-pgpool2 - connection pool server and replication proxy for PostgreSQL - modules
# postgresql-16-pgq-node - Cascaded queueing on top of PgQ
# postgresql-16-pgq3 - Generic queue for PostgreSQL
# postgresql-16-pgrouting - Routing functionality support for PostgreSQL/PostGIS
# postgresql-16-pgrouting-doc - Routing functionality support for PostgreSQL/PostGIS (Documentation)
# postgresql-16-pgrouting-scripts - Routing functionality support for PostgreSQL/PostGIS - SQL scripts
# postgresql-16-pgsphere - Spherical data types for PostgreSQL
# postgresql-16-pgtap - Unit testing framework extension for PostgreSQL 16
# postgresql-16-pgvector - Open-source vector similarity search for Postgres
# postgresql-16-pldebugger - PostgreSQL pl/pgsql Debugger API
# postgresql-16-pljava - Java procedural language for PostgreSQL 16
# postgresql-16-pllua - Lua procedural language for PostgreSQL 16
# postgresql-16-plpgsql-check - plpgsql_check extension for PostgreSQL
# postgresql-16-plprofiler - PostgreSQL PL/pgSQL functions performance profiler
# postgresql-16-plproxy - database partitioning system for PostgreSQL 16
# postgresql-16-plr - Procedural language interface between PostgreSQL and R
# postgresql-16-plsh - PL/sh procedural language for PostgreSQL 16
# postgresql-16-pointcloud - PostgreSQL extension for storing point cloud (LIDAR) data
# postgresql-16-postgis-3 - Geographic objects support for PostgreSQL 16
# postgresql-16-postgis-3-scripts - Geographic objects support for PostgreSQL 16 -- SQL scripts
# postgresql-16-powa - PostgreSQL Workload Analyzer -- PostgreSQL 16 extension
# postgresql-16-prefix - Prefix Range module for PostgreSQL
# postgresql-16-preprepare - pre prepare your PostgreSQL statements server side
# postgresql-16-prioritize - Get and set the nice priorities of PostgreSQL backends
# postgresql-16-q3c - PostgreSQL 16 extension used for indexing the sky
# postgresql-16-rational - Precise fractional arithmetic for PostgreSQL
# postgresql-16-rdkit - Cheminformatics and machine-learning software (PostgreSQL Cartridge)
# postgresql-16-repack - reorganize tables in PostgreSQL databases with minimal locks
# postgresql-16-repmgr - replication manager for PostgreSQL 16
# postgresql-16-roaringbitmap - RoaringBitmap extension for PostgreSQL
# postgresql-16-rum - PostgreSQL RUM access method
# postgresql-16-semver - Semantic version number type for PostgreSQL
# postgresql-16-set-user - PostgreSQL privilege escalation with enhanced logging and control
# postgresql-16-show-plans - Show query plans of currently running PostgreSQL statements
# postgresql-16-similarity - PostgreSQL similarity functions extension
# postgresql-16-slony1-2 - replication system for PostgreSQL: PostgreSQL 16 server plug-in
# postgresql-16-snakeoil - PostgreSQL anti-virus scanner based on ClamAV
# postgresql-16-squeeze - PostgreSQL extension for automatic bloat cleanup
# postgresql-16-statviz - PostgreSQL internal statistics analysis and visualization
# postgresql-16-tablelog - log changes on tables and restore tables to point in time
# postgresql-16-tdigest - t-digest algorithm for on-line accumulation of rank-based statistics
# postgresql-16-tds-fdw - PostgreSQL foreign data wrapper for TDS databases
# postgresql-16-toastinfo - Show storage structure of varlena datatypes in PostgreSQL
# postgresql-16-unit - SI Units for PostgreSQL
# postgresql-16-wal2json - PostgreSQL logical decoding JSON output plugin
38 changes: 38 additions & 0 deletions .github/setupandbuild/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
name: setup_and_build
description: Setups repository, Rust and build pg2parquet

inputs:
rust_target:
description: Target triple
required: true
rust_profile:
description: Cargo Profile to use
default: ci-build
required: true
runs:
using: composite
steps:
- uses: actions-rs/toolchain@master
with:
target: ${{ inputs.rust_target }}
toolchain: stable
override: true

- name: Build
uses: actions-rs/cargo@v1
with:
use-cross: ${{ runner.os != 'Windows' && (inputs.rust_target == 'x86_64-unknown-linux-musl' || inputs.rust_target == 'x86_64-pc-windows-gnu' || inputs.rust_target == 'aarch64-unknown-linux-gnu' || inputs.rust_target == 'riscv64gc-unknown-linux-gnu') }}
command: build
args: --locked --profile=${{ inputs.rust_profile }} --manifest-path=cli/Cargo.toml --target=${{ inputs.rust_target }}
- name: Copy binary
if: inputs.rust_target != 'x86_64-pc-windows-gnu'
shell: bash
run: |
mkdir output_dir
cp cli/target/${{ inputs.rust_target }}/${{ inputs.rust_profile == 'dev' && 'debug' || inputs.rust_profile }}/pg2parquet output_dir/
- name: Copy binary
if: inputs.rust_target == 'x86_64-pc-windows-gnu'
shell: bash
run: |
mkdir output_dir
cp cli/target/${{ inputs.rust_target }}/${{ inputs.rust_profile == 'dev' && 'debug' || inputs.rust_profile }}/pg2parquet.exe output_dir/
45 changes: 45 additions & 0 deletions .github/test/action.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: test
description: Runs Python integration tests for pg2parquet

inputs:
binary:
description: The pg2parquet compiled binary
pg_version:
default: '16'
description: PostgreSQL version
required: true

runs:
using: composite
steps:
- run: cd .github/pg_container && docker build --build-arg PG_VERSION=${{ inputs.pg_version }} -t pg2parquet-postgres .
shell: bash
- run: docker run -d --name pg2parquet-postgres1 -p 5432:5432 -e POSTGRES_PASSWORD=postgres pg2parquet-postgres
shell: bash
- run: timeout 120s bash -c 'until docker exec pg2parquet-postgres1 psql -U postgres -c "select 1;"; do echo "Waiting for PG startup..."; sleep 0.3; done'
shell: bash
- run: docker exec pg2parquet-postgres1 psql -U postgres -c "CREATE DATABASE pg2parquet_test;"
shell: bash

- uses: actions/setup-python@v4
with:
python-version: 3.12

- uses: abatilo/actions-poetry@v2
with:
poetry-version: 1.8.2

- run: poetry install --no-root
working-directory: py-tests
shell: bash

- run: poetry -C py-tests run python -m pytest
shell: bash
env:
PG2PARQUET_TEST_BIN: ${{ inputs.binary }}
PG2PARQUET_TEST_DB_HOST: 127.0.0.1
PG2PARQUET_TEST_DB_PORT: 5432
PG2PARQUET_TEST_DB_NAME: testdb
PG2PARQUET_TEST_DB_USER: postgres
PG2PARQUET_TEST_DB_PASSWORD: postgres

34 changes: 34 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
name: Build release binaries
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
build-rust:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
rust_target: [x86_64-unknown-linux-gnu, x86_64-unknown-linux-musl, x86_64-pc-windows-gnu, aarch64-unknown-linux-gnu, riscv64gc-unknown-linux-gnu]
os: [ubuntu-latest]
include:
- rust_target: x86_64-apple-darwin
os: macos-latest
steps:
- uses: actions/checkout@v3
- run: find .github
shell: bash
- uses: ./.github/setupandbuild
with:
rust_target: ${{ matrix.rust_target }}
- name: Strip binary
if: matrix.rust_target == 'x86_64-unknown-linux-musl'
run: |
strip output_dir/pg2parquet
- name: Upload artifact
uses: actions/upload-artifact@v3
with:
name: release-${{ matrix.rust_target }}
path: output_dir/pg2parquet${{ matrix.rust_target == 'x86_64-pc-windows-gnu' && '.exe' || '' }}
52 changes: 0 additions & 52 deletions .github/workflows/build.yml

This file was deleted.

36 changes: 36 additions & 0 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: Tests

on:
workflow_dispatch:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
tests:
name: Integration Tests
runs-on: ${{ matrix.os }}
timeout-minutes: 15
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
target: [x86_64-unknown-linux-gnu, x86_64-unknown-linux-musl]
pg_version: [ '16' ]
include:
- os: ubuntu-latest
target: x86_64-unknown-linux-gnu
pg_version: '12' # oldest supported version as of 2024
steps:
- uses: actions/checkout@v3
- name: Set up and build
uses: ./.github/setupandbuild
with:
rust_target: ${{ matrix.target }}
rust_profile: dev
- name: Run Tests
uses: ./.github/test
with:
binary: output_dir/pg2parquet${{ matrix.target == 'x86_64-pc-windows-gnu' && '.exe' || '' }}
pg_version: ${{ matrix.pg_version }}
4 changes: 2 additions & 2 deletions cli/src/appenders/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ pub fn create_pg_raw_appender<TRow: PgAbstractRow + Clone>(max_dl: i16, max_rl:
a
}

pub fn create_jsonb_appender<TRow: PgAbstractRow + Clone, const TYPE_OID: u32>(max_dl: i16, max_rl: i16, column_index: usize) -> impl ColumnAppender<TRow> {
pub fn create_jsonb_appender<TRow: PgAbstractRow + Clone>(max_dl: i16, max_rl: i16, column_index: usize) -> impl ColumnAppender<TRow> {
let a = ByteArrayColumnAppender::new(max_dl, max_rl, move |row: &TRow, buffer: &mut Vec<u8>| {
if let Some(value) = row.ab_get::<Option<PgAnyRef>>(column_index) {

Expand All @@ -198,5 +198,5 @@ pub fn create_jsonb_appender<TRow: PgAbstractRow + Clone, const TYPE_OID: u32>(m
a
}
// pub fn create_string_appender<TRow: PgAbstractRow>(max_dl: i16, max_rl: i16, column_index: usize) -> impl ColumnAppender<Arc<TRow>> {
// create_pg_raw_appender::<TRow, TYPE_OID>(max_dl, max_rl, column_index)
// create_pg_raw_appender::<TRow>(max_dl, max_rl, column_index)
// }
2 changes: 2 additions & 0 deletions cli/src/postgres_cloner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,8 @@ fn map_simple_type<TRow: PgAbstractRow + Clone + 'static>(
"bytea" => resolve_primitive::<Vec<u8>, ByteArrayType, _>(name, c, None, None),
"name" | "text" | "xml" | "bpchar" | "varchar" | "citext" =>
resolve_primitive::<String, ByteArrayType, _>(name, c, Some(LogicalType::String), Some(ConvertedType::UTF8)),
// (Box::new(crate::appenders::byte_array::create_pg_raw_appender(c.definition_level + 1, c.repetition_level, c.col_i)),
// ParquetType::primitive_type_builder(name, basic::Type::BYTE_ARRAY).with_logical_type(Some(LogicalType::String)).with_converted_type(ConvertedType::UTF8).build().unwrap()),
"jsonb" | "json" =>
resolve_primitive::<PgRawJsonb, ByteArrayType, _>(name, c, Some(match s.json_handling {
SchemaSettingsJsonHandling::Text => LogicalType::String,
Expand Down
Loading

0 comments on commit 22088f3

Please sign in to comment.