Merge branch 'dev' into pg13-support

paradedb · Oct 6, 2024 · 9109128 · 9109128
2 parents 7bb5037 + e373c70
commit 9109128
Show file tree

Hide file tree

Showing 12 changed files with 44 additions and 101 deletions.
diff --git a/.codespellignore b/.codespellignore
@@ -0,0 +1,2 @@
+crate
+socio-economic
diff --git a/.github/workflows/check-pg_analytics-schema-upgrade.yml b/.github/workflows/check-pg_analytics-schema-upgrade.yml
@@ -84,6 +84,7 @@
 #       - name: Switch to Base git rev and Generate Schema Again
 #         run: |
 #           # Switch to the base git rev
+#           git checkout .
 #           git checkout ${{ github.event.pull_request.base.ref }}
 
 #           # See if we need a different cargo-pgrx and install it if so

diff --git a/.github/workflows/check-typo.yml b/.github/workflows/check-typo.yml
@@ -0,0 +1,32 @@
+# workflows/check-typo.yml
+#
+# Check Typo
+# Check Typo using codespell.
+
+name: Check Typo
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened, ready_for_review]
+  workflow_dispatch:
+
+concurrency:
+  group: check-typo-${{ github.head_ref || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  check-typo:
+    name: Check Typo using codespell
+    runs-on: depot-ubuntu-latest-2
+    if: github.event.pull_request.draft == false
+
+    steps:
+      - name: Checkout Git Repository
+        uses: actions/checkout@v4
+
+      - name: Check Typo using codespell
+        uses: codespell-project/actions-codespell@v2
+        with:
+          check_filenames: true
+          ignore_words_file: .codespellignore
+          skip: "Cargo.lock"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -37,7 +37,7 @@ All development of ParadeDB is done via Docker and Compose. Our Docker setup is
 
 - The `docker-compose.yml` file pulls the latest published ParadeDB image from DockerHub. It is used for hobby production deployments. We recommend using it to deploy ParadeDB in your own infrastructure.
 
-### Pull Request Worfklow
+### Pull Request Workflow
 
 All changes to ParadeDB happen through Github Pull Requests. Here is the recommended
 flow for making a change:

diff --git a/src/duckdb/csv.rs b/src/duckdb/csv.rs
@@ -24,74 +24,41 @@ use crate::fdw::base::OptionValidator;
 use super::utils;
 
 #[derive(EnumIter, AsRefStr, PartialEq, Debug)]
+#[strum(serialize_all = "snake_case")]
 pub enum CsvOption {
-    #[strum(serialize = "all_varchar")]
     AllVarchar,
-    #[strum(serialize = "allow_quoted_nulls")]
     AllowQuotedNulls,
-    #[strum(serialize = "auto_detect")]
     AutoDetect,
-    #[strum(serialize = "auto_type_candidates")]
     AutoTypeCandidates,
-    #[strum(serialize = "columns")]
     Columns,
-    #[strum(serialize = "compression")]
     Compression,
-    #[strum(serialize = "dateformat")]
     Dateformat,
-    #[strum(serialize = "decimal_separator")]
     DecimalSeparator,
-    #[strum(serialize = "delim")]
     Delim,
-    #[strum(serialize = "escape")]
     Escape,
-    #[strum(serialize = "filename")]
     Filename,
-    #[strum(serialize = "files")]
     Files,
-    #[strum(serialize = "force_not_null")]
     ForceNotNull,
-    #[strum(serialize = "header")]
     Header,
-    #[strum(serialize = "hive_partitioning")]
     HivePartitioning,
-    #[strum(serialize = "hive_types")]
     HiveTypes,
-    #[strum(serialize = "hive_types_autocast")]
     HiveTypesAutocast,
-    #[strum(serialize = "ignore_errors")]
     IgnoreErrors,
-    #[strum(serialize = "max_line_size")]
     MaxLineSize,
-    #[strum(serialize = "names")]
     Names,
-    #[strum(serialize = "new_line")]
     NewLine,
-    #[strum(serialize = "normalize_names")]
     NormalizeNames,
-    #[strum(serialize = "null_padding")]
     NullPadding,
-    #[strum(serialize = "nullstr")]
     Nullstr,
-    #[strum(serialize = "parallel")]
     Parallel,
-    #[strum(serialize = "preserve_casing")]
     PreserveCasing,
-    #[strum(serialize = "quote")]
     Quote,
-    #[strum(serialize = "sample_size")]
     SampleSize,
-    #[strum(serialize = "select")]
     Select,
-    #[strum(serialize = "sep")]
     Sep,
-    #[strum(serialize = "skip")]
     Skip,
-    #[strum(serialize = "timestampformat")]
     Timestampformat,
-    #[strum(serialize = "types")]
     Types,
-    #[strum(serialize = "union_by_name")]
     UnionByName,
 }
 

diff --git a/src/duckdb/delta.rs b/src/duckdb/delta.rs
@@ -21,12 +21,10 @@ use std::collections::HashMap;
 use strum::{AsRefStr, EnumIter};
 
 #[derive(EnumIter, AsRefStr, PartialEq, Debug)]
+#[strum(serialize_all = "snake_case")]
 pub enum DeltaOption {
-    #[strum(serialize = "files")]
     Files,
-    #[strum(serialize = "preserve_casing")]
     PreserveCasing,
-    #[strum(serialize = "select")]
     Select,
 }
 

diff --git a/src/duckdb/iceberg.rs b/src/duckdb/iceberg.rs
@@ -22,14 +22,11 @@ use strum::{AsRefStr, EnumIter};
 use crate::fdw::base::OptionValidator;
 
 #[derive(EnumIter, AsRefStr, PartialEq, Debug)]
+#[strum(serialize_all = "snake_case")]
 pub enum IcebergOption {
-    #[strum(serialize = "allow_moved_paths")]
     AllowMovedPaths,
-    #[strum(serialize = "files")]
     Files,
-    #[strum(serialize = "preserve_casing")]
     PreserveCasing,
-    #[strum(serialize = "select")]
     Select,
 }
 

diff --git a/src/duckdb/json.rs b/src/duckdb/json.rs
@@ -24,40 +24,24 @@ use crate::fdw::base::OptionValidator;
 use super::utils;
 
 #[derive(EnumIter, AsRefStr, PartialEq, Debug, Display)]
+#[strum(serialize_all = "snake_case")]
 pub enum JsonOption {
-    #[strum(serialize = "auto_detect")]
     AutoDetect,
-    #[strum(serialize = "columns")]
     Columns,
-    #[strum(serialize = "compression")]
     Compression,
-    #[strum(serialize = "convert_strings_to_integers")]
     ConvertStringsToIntegers,
-    #[strum(serialize = "dateformat")]
     Dateformat,
-    #[strum(serialize = "filename")]
     Filename,
-    #[strum(serialize = "files")]
     Files,
-    #[strum(serialize = "format")]
     Format,
-    #[strum(serialize = "hive_partitioning")]
     HivePartitioning,
-    #[strum(serialize = "ignore_errors")]
     IgnoreErrors,
-    #[strum(serialize = "maximum_depth")]
     MaximumDepth,
-    #[strum(serialize = "maximum_object_size")]
     MaximumObjectSize,
-    #[strum(serialize = "records")]
     Records,
-    #[strum(serialize = "sample_size")]
     SampleSize,
-    #[strum(serialize = "select")]
     Select,
-    #[strum(serialize = "timestampformat")]
     Timestampformat,
-    #[strum(serialize = "union_by_name")]
     UnionByName,
 }
 

diff --git a/src/duckdb/parquet.rs b/src/duckdb/parquet.rs
@@ -24,26 +24,17 @@ use crate::fdw::base::OptionValidator;
 use super::utils;
 
 #[derive(EnumIter, AsRefStr, PartialEq, Debug)]
+#[strum(serialize_all = "snake_case")]
 pub enum ParquetOption {
-    #[strum(serialize = "binary_as_string")]
     BinaryAsString,
-    #[strum(serialize = "filename")]
     FileName,
-    #[strum(serialize = "file_row_number")]
     FileRowNumber,
-    #[strum(serialize = "files")]
     Files,
-    #[strum(serialize = "hive_partitioning")]
     HivePartitioning,
-    #[strum(serialize = "hive_types")]
     HiveTypes,
-    #[strum(serialize = "hive_types_autocast")]
     HiveTypesAutocast,
-    #[strum(serialize = "preserve_casing")]
     PreserveCasing,
-    #[strum(serialize = "union_by_name")]
     UnionByName,
-    #[strum(serialize = "select")]
     Select,
     // TODO: EncryptionConfig
 }

diff --git a/src/duckdb/secret.rs b/src/duckdb/secret.rs
@@ -22,53 +22,32 @@ use strum::{AsRefStr, EnumIter};
 use crate::fdw::base::OptionValidator;
 
 #[derive(EnumIter, AsRefStr, PartialEq, Debug)]
+#[strum(serialize_all = "snake_case")]
 pub enum UserMappingOptions {
     // Universal
-    #[strum(serialize = "type")]
     Type,
-    #[strum(serialize = "provider")]
     Provider,
-    #[strum(serialize = "scope")]
     Scope,
-    #[strum(serialize = "chain")]
     Chain,
     // S3/GCS/R2
-    #[strum(serialize = "key_id")]
     KeyId,
-    #[strum(serialize = "secret")]
     Secret,
-    #[strum(serialize = "region")]
     Region,
-    #[strum(serialize = "session_token")]
     SessionToken,
-    #[strum(serialize = "endpoint")]
     Endpoint,
-    #[strum(serialize = "url_style")]
     UrlStyle,
-    #[strum(serialize = "use_ssl")]
     UseSsl,
-    #[strum(serialize = "url_compatibility_mode")]
     UrlCompatibilityMode,
-    #[strum(serialize = "account_id")]
     AccountId,
     // Azure
-    #[strum(serialize = "connection_string")]
     ConnectionString,
-    #[strum(serialize = "account_name")]
     AccountName,
-    #[strum(serialize = "tenant_id")]
     TenantId,
-    #[strum(serialize = "client_id")]
     ClientId,
-    #[strum(serialize = "client_secret")]
     ClientSecret,
-    #[strum(serialize = "client_certificate_path")]
     ClientCertificatePath,
-    #[strum(serialize = "http_proxy")]
     HttpProxy,
-    #[strum(serialize = "proxy_user_name")]
     ProxyUserName,
-    #[strum(serialize = "proxy_password")]
     ProxyPassword,
 }
 

diff --git a/src/duckdb/spatial.rs b/src/duckdb/spatial.rs
@@ -25,24 +25,16 @@ use crate::fdw::base::OptionValidator;
 /// SpatialOption is an enum that represents the options that can be passed to the st_read function.
 /// Reference https://github.com/duckdb/duckdb_spatial/blob/main/docs/functions.md#st_read
 #[derive(EnumIter, AsRefStr, PartialEq, Debug)]
+#[strum(serialize_all = "snake_case")]
 pub enum SpatialOption {
-    #[strum(serialize = "files")]
     Files,
-    #[strum(serialize = "sequential_layer_scan")]
     SequentialLayerScan,
-    #[strum(serialize = "spatial_filter")]
     SpatialFilter,
-    #[strum(serialize = "open_options")]
     OpenOptions,
-    #[strum(serialize = "layer")]
     Layer,
-    #[strum(serialize = "allowed_drivers")]
     AllowedDrivers,
-    #[strum(serialize = "sibling_files")]
     SiblingFiles,
-    #[strum(serialize = "spatial_filter_box")]
     SpatialFilterBox,
-    #[strum(serialize = "keep_wkb")]
     KeepWkb,
 }
 

diff --git a/tests/fixtures/arrow.rs b/tests/fixtures/arrow.rs
@@ -617,7 +617,7 @@ pub fn schema_to_batch(schema: &SchemaRef, rows: &[PgRow]) -> Result<RecordBatch
                 },
                 DataType::Time64(unit) => match unit {
                     TimeUnit::Second => bail!("arrow time64i does not support seconds"),
-                    TimeUnit::Millisecond => bail!("arrow time64 does not support millseconds"),
+                    TimeUnit::Millisecond => bail!("arrow time64 does not support milliseconds"),
                     TimeUnit::Microsecond => Arc::new(Time64MicrosecondArray::from(
                         rows.iter()
                             .map(|row| decode::<Option<NaiveTime>>(field, row))