diff --git a/.github/workflows/backend.yml b/.github/workflows/backend.yml index 7bdcf6b165a..7c07d3dab6d 100644 --- a/.github/workflows/backend.yml +++ b/.github/workflows/backend.yml @@ -393,6 +393,7 @@ jobs: run: | sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 0` if [ ! -z $sub_modules ]; then + echo $sub_modules ./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci else echo "sub modules is empty, skipping" @@ -423,6 +424,7 @@ jobs: run: | sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 1` if [ ! -z $sub_modules ]; then + echo $sub_modules ./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci else echo "sub modules is empty, skipping" @@ -453,6 +455,7 @@ jobs: run: | sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 2` if [ ! -z $sub_modules ]; then + echo $sub_modules ./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci else echo "sub modules is empty, skipping" @@ -483,6 +486,7 @@ jobs: run: | sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 3` if [ ! -z $sub_modules ]; then + echo $sub_modules ./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci else echo "sub modules is empty, skipping" @@ -512,6 +516,7 @@ jobs: run: | sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 4` if [ ! -z $sub_modules ]; then + echo $sub_modules ./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci else echo "sub modules is empty, skipping" @@ -541,6 +546,7 @@ jobs: run: | sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 5` if [ ! -z $sub_modules ]; then + echo $sub_modules ./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci else echo "sub modules is empty, skipping" @@ -570,6 +576,7 @@ jobs: run: | sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 6` if [ ! -z $sub_modules ]; then + echo $sub_modules ./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci else echo "sub modules is empty, skipping" @@ -600,6 +607,7 @@ jobs: run: | sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 8 7` if [ ! -z $sub_modules ]; then + echo $sub_modules ./mvnw -T 1 -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci else echo "sub modules is empty, skipping" diff --git a/docs/en/concept/schema-evolution.md b/docs/en/concept/schema-evolution.md index a5a052042f0..b1db0573870 100644 --- a/docs/en/concept/schema-evolution.md +++ b/docs/en/concept/schema-evolution.md @@ -13,6 +13,7 @@ Now we only support the operation about `add column`、`drop column`、`rename c [Jdbc-Mysql](https://github.com/apache/seatunnel/blob/dev/docs/en/connector-v2/sink/Jdbc.md) [Jdbc-Oracle](https://github.com/apache/seatunnel/blob/dev/docs/en/connector-v2/sink/Jdbc.md) [StarRocks](https://github.com/apache/seatunnel/blob/dev/docs/en/connector-v2/sink/StarRocks.md) +[Paimon](https://github.com/apache/seatunnel/blob/dev/docs/en/connector-v2/sink/Paimon.md#Schema-Evolution) Note: The schema evolution is not support the transform at now. The schema evolution of different types of databases(Oracle-CDC -> Jdbc-Mysql)is currently not supported the default value of the column in ddl. diff --git a/docs/en/connector-v2/sink/Greenplum.md b/docs/en/connector-v2/sink/Greenplum.md index 5ad83f0873e..d6925ba5fa9 100644 --- a/docs/en/connector-v2/sink/Greenplum.md +++ b/docs/en/connector-v2/sink/Greenplum.md @@ -24,7 +24,7 @@ Optional jdbc drivers: - `org.postgresql.Driver` - `com.pivotal.jdbc.GreenplumDriver` -Warn: for license compliance, if you use `GreenplumDriver` the have to provide Greenplum JDBC driver yourself, e.g. copy greenplum-xxx.jar to $SEATNUNNEL_HOME/lib for Standalone. +Warn: for license compliance, if you use `GreenplumDriver` the have to provide Greenplum JDBC driver yourself, e.g. copy greenplum-xxx.jar to $SEATUNNEL_HOME/lib for Standalone. ### url [string] diff --git a/docs/en/connector-v2/sink/Kingbase.md b/docs/en/connector-v2/sink/Kingbase.md index d4a5b8b56d5..eae51632f87 100644 --- a/docs/en/connector-v2/sink/Kingbase.md +++ b/docs/en/connector-v2/sink/Kingbase.md @@ -30,9 +30,9 @@ ## Database Dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' > working directory
-> For example: cp kingbase8-8.6.0.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> For example: cp kingbase8-8.6.0.jar $SEATUNNEL_HOME/plugins/jdbc/lib/ ## Data Type Mapping diff --git a/docs/en/connector-v2/sink/ObsFile.md b/docs/en/connector-v2/sink/ObsFile.md index 249d152f733..ef515c0eb1a 100644 --- a/docs/en/connector-v2/sink/ObsFile.md +++ b/docs/en/connector-v2/sink/ObsFile.md @@ -37,16 +37,16 @@ It only supports hadoop version **2.9.X+**. ## Required Jar List -| jar | supported versions | maven | -|--------------------|-----------------------------|----------------------------------------------------------------------------------------------------------------| -| hadoop-huaweicloud | support version >= 3.1.1.29 | [Download](https://repo.huaweicloud.com/repository/maven/huaweicloudsdk/org/apache/hadoop/hadoop-huaweicloud/) | -| esdk-obs-java | support version >= 3.19.7.3 | [Download](https://repo.huaweicloud.com/repository/maven/huaweicloudsdk/com/huawei/storage/esdk-obs-java/) | -| okhttp | support version >= 3.11.0 | [Download](https://repo1.maven.org/maven2/com/squareup/okhttp3/okhttp/) | -| okio | support version >= 1.14.0 | [Download](https://repo1.maven.org/maven2/com/squareup/okio/okio/) | - -> Please download the support list corresponding to 'Maven' and copy them to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory. +| jar | supported versions | maven | +|--------------------|-----------------------------|-------------------------------------------------------------------------------------------------------| +| hadoop-huaweicloud | support version >= 3.1.1.29 | [Download](https://repo.huaweicloud.com/artifactory/sdk_public/org/apache/hadoop/hadoop-huaweicloud/) | +| esdk-obs-java | support version >= 3.19.7.3 | [Download](https://repo.huaweicloud.com/artifactory/sdk_public/com/huawei/storage/esdk-obs-java/) | +| okhttp | support version >= 3.11.0 | [Download](https://repo1.maven.org/maven2/com/squareup/okhttp3/okhttp/) | +| okio | support version >= 1.14.0 | [Download](https://repo1.maven.org/maven2/com/squareup/okio/okio/) | + +> Please download the support list corresponding to 'Maven' and copy them to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory. > -> And copy all jars to $SEATNUNNEL_HOME/lib/ +> And copy all jars to $SEATUNNEL_HOME/lib/ ## Options diff --git a/docs/en/connector-v2/sink/OceanBase.md b/docs/en/connector-v2/sink/OceanBase.md index accbbd72cd4..acaaa53253a 100644 --- a/docs/en/connector-v2/sink/OceanBase.md +++ b/docs/en/connector-v2/sink/OceanBase.md @@ -25,8 +25,8 @@ Write data through jdbc. Support Batch mode and Streaming mode, support concurre ## Database Dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example: cp oceanbase-client-xxx.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example: cp oceanbase-client-xxx.jar $SEATUNNEL_HOME/plugins/jdbc/lib/ ## Data Type Mapping diff --git a/docs/en/connector-v2/sink/Oracle.md b/docs/en/connector-v2/sink/Oracle.md index d42e3b00fb4..23773dbac23 100644 --- a/docs/en/connector-v2/sink/Oracle.md +++ b/docs/en/connector-v2/sink/Oracle.md @@ -39,9 +39,9 @@ semantics (using XA transaction guarantee). ## Database Dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example Oracle datasource: cp ojdbc8-xxxxxx.jar $SEATNUNNEL_HOME/lib/
-> To support the i18n character set, copy the orai18n.jar to the $SEATNUNNEL_HOME/lib/ directory. +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example Oracle datasource: cp ojdbc8-xxxxxx.jar $SEATUNNEL_HOME/lib/
+> To support the i18n character set, copy the orai18n.jar to the $SEATUNNEL_HOME/lib/ directory. ## Data Type Mapping diff --git a/docs/en/connector-v2/sink/Paimon.md b/docs/en/connector-v2/sink/Paimon.md index 68c0755cfd3..29598551207 100644 --- a/docs/en/connector-v2/sink/Paimon.md +++ b/docs/en/connector-v2/sink/Paimon.md @@ -66,9 +66,61 @@ The Paimon connector supports writing data to multiple file systems. Currently, If you use the s3 filesystem. You can configure the `fs.s3a.access-key`、`fs.s3a.secret-key`、`fs.s3a.endpoint`、`fs.s3a.path.style.access`、`fs.s3a.aws.credentials.provider` properties in the `paimon.hadoop.conf` option. Besides, the warehouse should start with `s3a://`. +## Schema Evolution +Cdc Ingestion supports a limited number of schema changes. Currently supported schema changes includes: + +* Adding columns. + +* Modify column. More specifically, If you modify the column type, the following changes are supported: + + * altering from a string type (char, varchar, text) to another string type with longer length, + * altering from a binary type (binary, varbinary, blob) to another binary type with longer length, + * altering from an integer type (tinyint, smallint, int, bigint) to another integer type with wider range, + * altering from a floating-point type (float, double) to another floating-point type with wider range, + + are supported. + > Note: + > + > If {oldType} and {newType} belongs to the same type family, but old type has higher precision than new type. Ignore this convert. + +* Drop columns. + +* Change columns. ## Examples +### Schema evolution +```hocon +env { + # You can set engine configuration here + parallelism = 5 + job.mode = "STREAMING" + checkpoint.interval = 5000 + read_limit.bytes_per_second=7000000 + read_limit.rows_per_second=400 +} + +source { + MySQL-CDC { + server-id = 5652-5657 + username = "st_user_source" + password = "mysqlpw" + table-names = ["shop.products"] + base-url = "jdbc:mysql://mysql_cdc_e2e:3306/shop" + debezium = { + include.schema.changes = true + } + } +} + +sink { + Paimon { + warehouse = "file:///tmp/paimon" + database = "mysql_to_paimon" + table = "products" + } +} +``` ### Single table diff --git a/docs/en/connector-v2/sink/PostgreSql.md b/docs/en/connector-v2/sink/PostgreSql.md index cf4bc2e3ada..a02519977fe 100644 --- a/docs/en/connector-v2/sink/PostgreSql.md +++ b/docs/en/connector-v2/sink/PostgreSql.md @@ -40,9 +40,9 @@ semantics (using XA transaction guarantee). ## Database Dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example PostgreSQL datasource: cp postgresql-xxx.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/
-> If you want to manipulate the GEOMETRY type in PostgreSQL, add postgresql-xxx.jar and postgis-jdbc-xxx.jar to $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example PostgreSQL datasource: cp postgresql-xxx.jar $SEATUNNEL_HOME/plugins/jdbc/lib/
+> If you want to manipulate the GEOMETRY type in PostgreSQL, add postgresql-xxx.jar and postgis-jdbc-xxx.jar to $SEATUNNEL_HOME/plugins/jdbc/lib/ ## Data Type Mapping diff --git a/docs/en/connector-v2/sink/Snowflake.md b/docs/en/connector-v2/sink/Snowflake.md index dd84bcc2cef..10b4d8034ba 100644 --- a/docs/en/connector-v2/sink/Snowflake.md +++ b/docs/en/connector-v2/sink/Snowflake.md @@ -25,8 +25,8 @@ Write data through jdbc. Support Batch mode and Streaming mode, support concurre ## Database dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example Snowflake datasource: cp snowflake-connector-java-xxx.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example Snowflake datasource: cp snowflake-connector-java-xxx.jar $SEATUNNEL_HOME/plugins/jdbc/lib/ ## Data Type Mapping diff --git a/docs/en/connector-v2/sink/SqlServer.md b/docs/en/connector-v2/sink/SqlServer.md index 3a03d3a2df8..c68c02db9f5 100644 --- a/docs/en/connector-v2/sink/SqlServer.md +++ b/docs/en/connector-v2/sink/SqlServer.md @@ -43,8 +43,8 @@ semantics (using XA transaction guarantee). ## Database dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example SQL Server datasource: cp mssql-jdbc-xxx.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example SQL Server datasource: cp mssql-jdbc-xxx.jar $SEATUNNEL_HOME/plugins/jdbc/lib/ ## Data Type Mapping diff --git a/docs/en/connector-v2/sink/Vertica.md b/docs/en/connector-v2/sink/Vertica.md index 04aa77f0e6f..79942183cc9 100644 --- a/docs/en/connector-v2/sink/Vertica.md +++ b/docs/en/connector-v2/sink/Vertica.md @@ -39,8 +39,8 @@ semantics (using XA transaction guarantee). ## Database Dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example Vertica datasource: cp vertica-jdbc-xxx.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example Vertica datasource: cp vertica-jdbc-xxx.jar $SEATUNNEL_HOME/plugins/jdbc/lib/ ## Data Type Mapping diff --git a/docs/en/connector-v2/source/DB2.md b/docs/en/connector-v2/source/DB2.md index 67fc3151d01..6c8da74a89c 100644 --- a/docs/en/connector-v2/source/DB2.md +++ b/docs/en/connector-v2/source/DB2.md @@ -41,8 +41,8 @@ Read external data source data through JDBC. ## Database Dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example DB2 datasource: cp db2-connector-java-xxx.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example DB2 datasource: cp db2-connector-java-xxx.jar $SEATUNNEL_HOME/plugins/jdbc/lib/ ## Data Type Mapping diff --git a/docs/en/connector-v2/source/Greenplum.md b/docs/en/connector-v2/source/Greenplum.md index 28d527bef20..d28fce2cb9f 100644 --- a/docs/en/connector-v2/source/Greenplum.md +++ b/docs/en/connector-v2/source/Greenplum.md @@ -24,7 +24,7 @@ Optional jdbc drivers: - `org.postgresql.Driver` - `com.pivotal.jdbc.GreenplumDriver` -Warn: for license compliance, if you use `GreenplumDriver` the have to provide Greenplum JDBC driver yourself, e.g. copy greenplum-xxx.jar to $SEATNUNNEL_HOME/lib for Standalone. +Warn: for license compliance, if you use `GreenplumDriver` the have to provide Greenplum JDBC driver yourself, e.g. copy greenplum-xxx.jar to $SEATUNNEL_HOME/lib for Standalone. ::: diff --git a/docs/en/connector-v2/source/HiveJdbc.md b/docs/en/connector-v2/source/HiveJdbc.md index 23227aa306f..6a5fdcd7c7a 100644 --- a/docs/en/connector-v2/source/HiveJdbc.md +++ b/docs/en/connector-v2/source/HiveJdbc.md @@ -35,9 +35,9 @@ Read external data source data through JDBC. ## Database Dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' > working directory
-> For example Hive datasource: cp hive-jdbc-xxx.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> For example Hive datasource: cp hive-jdbc-xxx.jar $SEATUNNEL_HOME/plugins/jdbc/lib/ ## Data Type Mapping diff --git a/docs/en/connector-v2/source/Jdbc.md b/docs/en/connector-v2/source/Jdbc.md index 2b5897cbaea..b8fbed6d50e 100644 --- a/docs/en/connector-v2/source/Jdbc.md +++ b/docs/en/connector-v2/source/Jdbc.md @@ -8,9 +8,9 @@ Read external data source data through JDBC. :::tip -Warn: for license compliance, you have to provide database driver yourself, copy to `$SEATNUNNEL_HOME/lib/` directory in order to make them work. +Warn: for license compliance, you have to provide database driver yourself, copy to `$SEATUNNEL_HOME/lib/` directory in order to make them work. -e.g. If you use MySQL, should download and copy `mysql-connector-java-xxx.jar` to `$SEATNUNNEL_HOME/lib/`. For Spark/Flink, you should also copy it to `$SPARK_HOME/jars/` or `$FLINK_HOME/lib/`. +e.g. If you use MySQL, should download and copy `mysql-connector-java-xxx.jar` to `$SEATUNNEL_HOME/lib/`. For Spark/Flink, you should also copy it to `$SPARK_HOME/jars/` or `$FLINK_HOME/lib/`. ::: diff --git a/docs/en/connector-v2/source/Kingbase.md b/docs/en/connector-v2/source/Kingbase.md index 877b84445ac..f668937ef5f 100644 --- a/docs/en/connector-v2/source/Kingbase.md +++ b/docs/en/connector-v2/source/Kingbase.md @@ -33,8 +33,8 @@ Read external data source data through JDBC. ## Database Dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example: cp kingbase8-8.6.0.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example: cp kingbase8-8.6.0.jar $SEATUNNEL_HOME/plugins/jdbc/lib/ ## Data Type Mapping diff --git a/docs/en/connector-v2/source/ObsFile.md b/docs/en/connector-v2/source/ObsFile.md index 4ee1bc15773..b6e662b1986 100644 --- a/docs/en/connector-v2/source/ObsFile.md +++ b/docs/en/connector-v2/source/ObsFile.md @@ -42,16 +42,16 @@ It only supports hadoop version **2.9.X+**. ## Required Jar List -| jar | supported versions | maven | -|--------------------|-----------------------------|----------------------------------------------------------------------------------------------------------------| -| hadoop-huaweicloud | support version >= 3.1.1.29 | [Download](https://repo.huaweicloud.com/repository/maven/huaweicloudsdk/org/apache/hadoop/hadoop-huaweicloud/) | -| esdk-obs-java | support version >= 3.19.7.3 | [Download](https://repo.huaweicloud.com/repository/maven/huaweicloudsdk/com/huawei/storage/esdk-obs-java/) | -| okhttp | support version >= 3.11.0 | [Download](https://repo1.maven.org/maven2/com/squareup/okhttp3/okhttp/) | -| okio | support version >= 1.14.0 | [Download](https://repo1.maven.org/maven2/com/squareup/okio/okio/) | - -> Please download the support list corresponding to 'Maven' and copy them to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory. +| jar | supported versions | maven | +|--------------------|-----------------------------|--------------------------------------------------------------------------------------------------------| +| hadoop-huaweicloud | support version >= 3.1.1.29 | [Download](https://repo.huaweicloud.com/artifactory/sdk_public/org/apache/hadoop/hadoop-huaweicloud/) | +| esdk-obs-java | support version >= 3.19.7.3 | [Download](https://repo.huaweicloud.com/artifactory/sdk_public/com/huawei/storage/esdk-obs-java/) | +| okhttp | support version >= 3.11.0 | [Download](https://repo1.maven.org/maven2/com/squareup/okhttp3/okhttp/) | +| okio | support version >= 1.14.0 | [Download](https://repo1.maven.org/maven2/com/squareup/okio/okio/) | + +> Please download the support list corresponding to 'Maven' and copy them to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory. > -> And copy all jars to $SEATNUNNEL_HOME/lib/ +> And copy all jars to $SEATUNNEL_HOME/lib/ ## Options diff --git a/docs/en/connector-v2/source/OceanBase.md b/docs/en/connector-v2/source/OceanBase.md index ee2a9945ee5..ba5d081b8e0 100644 --- a/docs/en/connector-v2/source/OceanBase.md +++ b/docs/en/connector-v2/source/OceanBase.md @@ -29,8 +29,8 @@ Read external data source data through JDBC. ## Database Dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example: cp oceanbase-client-xxx.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example: cp oceanbase-client-xxx.jar $SEATUNNEL_HOME/plugins/jdbc/lib/ ## Data Type Mapping diff --git a/docs/en/connector-v2/source/Oracle-CDC.md b/docs/en/connector-v2/source/Oracle-CDC.md index dad52faa0b1..8e5c332bef0 100644 --- a/docs/en/connector-v2/source/Oracle-CDC.md +++ b/docs/en/connector-v2/source/Oracle-CDC.md @@ -39,12 +39,12 @@ So, you can not set this property named `log.mining.continuous.mine` in the debe #### For Spark/Flink Engine > 1. You need to ensure that the [jdbc driver jar package](https://mvnrepository.com/artifact/com.oracle.database.jdbc/ojdbc8) has been placed in directory `${SEATUNNEL_HOME}/plugins/`. -> 2. To support the i18n character set, copy the `orai18n.jar` to the `$SEATNUNNEL_HOME/plugins/` directory. +> 2. To support the i18n character set, copy the `orai18n.jar` to the `$SEATUNNEL_HOME/plugins/` directory. #### For SeaTunnel Zeta Engine > 1. You need to ensure that the [jdbc driver jar package](https://mvnrepository.com/artifact/com.oracle.database.jdbc/ojdbc8) has been placed in directory `${SEATUNNEL_HOME}/lib/`. -> 2. To support the i18n character set, copy the `orai18n.jar` to the `$SEATNUNNEL_HOME/lib/` directory. +> 2. To support the i18n character set, copy the `orai18n.jar` to the `$SEATUNNEL_HOME/lib/` directory. ### Enable Oracle Logminer diff --git a/docs/en/connector-v2/source/Oracle.md b/docs/en/connector-v2/source/Oracle.md index 1756a76db1d..847371b9c80 100644 --- a/docs/en/connector-v2/source/Oracle.md +++ b/docs/en/connector-v2/source/Oracle.md @@ -17,12 +17,12 @@ Read external data source data through JDBC. ### For Spark/Flink Engine > 1. You need to ensure that the [jdbc driver jar package](https://mvnrepository.com/artifact/com.oracle.database.jdbc/ojdbc8) has been placed in directory `${SEATUNNEL_HOME}/plugins/`. -> 2. To support the i18n character set, copy the `orai18n.jar` to the `$SEATNUNNEL_HOME/plugins/` directory. +> 2. To support the i18n character set, copy the `orai18n.jar` to the `$SEATUNNEL_HOME/plugins/` directory. ### For SeaTunnel Zeta Engine > 1. You need to ensure that the [jdbc driver jar package](https://mvnrepository.com/artifact/com.oracle.database.jdbc/ojdbc8) has been placed in directory `${SEATUNNEL_HOME}/lib/`. -> 2. To support the i18n character set, copy the `orai18n.jar` to the `$SEATNUNNEL_HOME/lib/` directory. +> 2. To support the i18n character set, copy the `orai18n.jar` to the `$SEATUNNEL_HOME/lib/` directory. ## Key Features @@ -43,9 +43,9 @@ Read external data source data through JDBC. ## Database Dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example Oracle datasource: cp ojdbc8-xxxxxx.jar $SEATNUNNEL_HOME/lib/
-> To support the i18n character set, copy the orai18n.jar to the $SEATNUNNEL_HOME/lib/ directory. +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example Oracle datasource: cp ojdbc8-xxxxxx.jar $SEATUNNEL_HOME/lib/
+> To support the i18n character set, copy the orai18n.jar to the $SEATUNNEL_HOME/lib/ directory. ## Data Type Mapping diff --git a/docs/en/connector-v2/source/PostgreSQL-CDC.md b/docs/en/connector-v2/source/PostgreSQL-CDC.md index 21afa42f701..0efdceed49b 100644 --- a/docs/en/connector-v2/source/PostgreSQL-CDC.md +++ b/docs/en/connector-v2/source/PostgreSQL-CDC.md @@ -40,7 +40,7 @@ describes how to set up the Postgre CDC connector to run SQL queries against Pos > 1. You need to ensure that the [jdbc driver jar package](https://mvnrepository.com/artifact/org.postgresql/postgresql) has been placed in directory `${SEATUNNEL_HOME}/lib/`. -Please download and put PostgreSQL driver in `${SEATUNNEL_HOME}/lib/` dir. For example: cp postgresql-xxx.jar `$SEATNUNNEL_HOME/lib/` +Please download and put PostgreSQL driver in `${SEATUNNEL_HOME}/lib/` dir. For example: cp postgresql-xxx.jar `$SEATUNNEL_HOME/lib/` > Here are the steps to enable CDC (Change Data Capture) in PostgreSQL: diff --git a/docs/en/connector-v2/source/PostgreSQL.md b/docs/en/connector-v2/source/PostgreSQL.md index d383b113c2e..7e303b9d353 100644 --- a/docs/en/connector-v2/source/PostgreSQL.md +++ b/docs/en/connector-v2/source/PostgreSQL.md @@ -42,9 +42,9 @@ Read external data source data through JDBC. ## Database Dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example PostgreSQL datasource: cp postgresql-xxx.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/
-> If you want to manipulate the GEOMETRY type in PostgreSQL, add postgresql-xxx.jar and postgis-jdbc-xxx.jar to $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example PostgreSQL datasource: cp postgresql-xxx.jar $SEATUNNEL_HOME/plugins/jdbc/lib/
+> If you want to manipulate the GEOMETRY type in PostgreSQL, add postgresql-xxx.jar and postgis-jdbc-xxx.jar to $SEATUNNEL_HOME/plugins/jdbc/lib/ ## Data Type Mapping diff --git a/docs/en/connector-v2/source/Redshift.md b/docs/en/connector-v2/source/Redshift.md index 8da5ea9391d..be92bcbf7dc 100644 --- a/docs/en/connector-v2/source/Redshift.md +++ b/docs/en/connector-v2/source/Redshift.md @@ -38,8 +38,8 @@ Read external data source data through JDBC. ## Database dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example Redshift datasource: cp RedshiftJDBC42-xxx.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example Redshift datasource: cp RedshiftJDBC42-xxx.jar $SEATUNNEL_HOME/plugins/jdbc/lib/ ## Data Type Mapping diff --git a/docs/en/connector-v2/source/Snowflake.md b/docs/en/connector-v2/source/Snowflake.md index 3e8163d758d..2aa67b273a1 100644 --- a/docs/en/connector-v2/source/Snowflake.md +++ b/docs/en/connector-v2/source/Snowflake.md @@ -31,8 +31,8 @@ Read external data source data through JDBC. ## Database dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example Snowflake datasource: cp snowflake-connector-java-xxx.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example Snowflake datasource: cp snowflake-connector-java-xxx.jar $SEATUNNEL_HOME/plugins/jdbc/lib/ > ## Data Type Mapping diff --git a/docs/en/connector-v2/source/SqlServer.md b/docs/en/connector-v2/source/SqlServer.md index 2905f45fbdb..e25c8ef406b 100644 --- a/docs/en/connector-v2/source/SqlServer.md +++ b/docs/en/connector-v2/source/SqlServer.md @@ -45,8 +45,8 @@ Read external data source data through JDBC. ## Database dependency -> Please download the support list corresponding to 'Maven' and copy it to the '$SEATNUNNEL_HOME/plugins/jdbc/lib/' working directory
-> For example SQL Server datasource: cp mssql-jdbc-xxx.jar $SEATNUNNEL_HOME/plugins/jdbc/lib/ +> Please download the support list corresponding to 'Maven' and copy it to the '$SEATUNNEL_HOME/plugins/jdbc/lib/' working directory
+> For example SQL Server datasource: cp mssql-jdbc-xxx.jar $SEATUNNEL_HOME/plugins/jdbc/lib/ ## Data Type Mapping diff --git a/docs/en/connector-v2/source/TiDB-CDC.md b/docs/en/connector-v2/source/TiDB-CDC.md index 1cce8ec3ac2..4b725c246e2 100644 --- a/docs/en/connector-v2/source/TiDB-CDC.md +++ b/docs/en/connector-v2/source/TiDB-CDC.md @@ -40,7 +40,7 @@ describes how to set up the TiDB CDC connector to snapshot data and capture stre > 1. You need to ensure that the [jdbc driver jar package](https://mvnrepository.com/artifact/mysql/mysql-connector-java) and the [tikv-client-java jar package](https://mvnrepository.com/artifact/org.tikv/tikv-client-java/3.2.0) has been placed in directory `${SEATUNNEL_HOME}/lib/`. -Please download and put Mysql driver and tikv-java-client in `${SEATUNNEL_HOME}/lib/` dir. For example: cp mysql-connector-java-xxx.jar `$SEATNUNNEL_HOME/lib/` +Please download and put Mysql driver and tikv-java-client in `${SEATUNNEL_HOME}/lib/` dir. For example: cp mysql-connector-java-xxx.jar `$SEATUNNEL_HOME/lib/` ## Data Type Mapping diff --git a/docs/sidebars.js b/docs/sidebars.js index 3257181b11a..cd5f61213e7 100644 --- a/docs/sidebars.js +++ b/docs/sidebars.js @@ -192,11 +192,17 @@ const sidebars = { "label": "SeaTunnel Engine", "items": [ "seatunnel-engine/about", - "seatunnel-engine/download-seatunnel", - "seatunnel-engine/deployment", - "seatunnel-engine/local-mode-deployment", - "seatunnel-engine/hybrid-cluster-deployment", - "seatunnel-engine/separated-cluster-deployment", + { + "type": "category", + "label": "Deployment", + "items": [ + "seatunnel-engine/download-seatunnel", + "seatunnel-engine/deployment", + "seatunnel-engine/local-mode-deployment", + "seatunnel-engine/hybrid-cluster-deployment", + "seatunnel-engine/separated-cluster-deployment" + ] + }, "seatunnel-engine/savepoint", "seatunnel-engine/checkpoint-storage", "seatunnel-engine/engine-jar-storage-mode", diff --git a/docs/zh/concept/schema-evolution.md b/docs/zh/concept/schema-evolution.md index bb1a2564ef3..f8770abed56 100644 --- a/docs/zh/concept/schema-evolution.md +++ b/docs/zh/concept/schema-evolution.md @@ -12,6 +12,7 @@ [Jdbc-Mysql](https://github.com/apache/seatunnel/blob/dev/docs/zh/connector-v2/sink/Jdbc.md) [Jdbc-Oracle](https://github.com/apache/seatunnel/blob/dev/docs/en/connector-v2/sink/Jdbc.md) [StarRocks](https://github.com/apache/seatunnel/blob/dev/docs/en/connector-v2/sink/StarRocks.md) +[Paimon](https://github.com/apache/seatunnel/blob/dev/docs/zh/connector-v2/sink/Paimon.md#模式演变) 注意: 目前模式演进不支持transform。不同类型数据库(Oracle-CDC -> Jdbc-Mysql)的模式演进目前不支持ddl中列的默认值。 diff --git a/docs/zh/connector-v2/sink/Paimon.md b/docs/zh/connector-v2/sink/Paimon.md index 09f4e63fbfc..4d83dcb6c76 100644 --- a/docs/zh/connector-v2/sink/Paimon.md +++ b/docs/zh/connector-v2/sink/Paimon.md @@ -65,8 +65,61 @@ Paimon连接器支持向多文件系统写入数据。目前支持的文件系 如果您使用s3文件系统。您可以配置`fs.s3a.access-key `, `fs.s3a.secret-key`, `fs.s3a.endpoint`, `fs.s3a.path.style.access`, `fs.s3a.aws.credentials`。在`paimon.hadoop.conf`选项中设置提供程序的属性。 除此之外,warehouse应该以`s3a://`开头。 +## 模式演变 +Cdc采集支持有限数量的模式更改。目前支持的模式更改包括: + +* 添加列。 + +* 修改列。更具体地说,如果修改列类型,则支持以下更改: + + * 将字符串类型(char、varchar、text)更改为另一种长度更长的字符串类型, + * 将二进制类型(binary, varbinary, blob)更改为另一种长度更长的二进制类型, + * 将整数类型(tinyint, smallint, int, bigint)更改为另一种范围更大的整数类型, + * 将浮点类型(float、double)更改为另一种范围更大的浮点类型, + +> 注意: +> +> 如果{oldType}和{newType}属于同一个类型族,但旧类型的精度高于新类型。忽略这个转换。 + +* 删除列。 + +* 更改列。 + ## 示例 +### 模式演变 +```hocon +env { + # You can set engine configuration here + parallelism = 5 + job.mode = "STREAMING" + checkpoint.interval = 5000 + read_limit.bytes_per_second=7000000 + read_limit.rows_per_second=400 +} + +source { + MySQL-CDC { + server-id = 5652-5657 + username = "st_user_source" + password = "mysqlpw" + table-names = ["shop.products"] + base-url = "jdbc:mysql://mysql_cdc_e2e:3306/shop" + debezium = { + include.schema.changes = true + } + } +} + +sink { + Paimon { + warehouse = "file:///tmp/paimon" + database = "mysql_to_paimon" + table = "products" + } +} +``` + ### 单表 ```hocon diff --git a/docs/zh/connector-v2/source/TiDB-CDC.md b/docs/zh/connector-v2/source/TiDB-CDC.md index a2f4ba21af4..bf06dbb4750 100644 --- a/docs/zh/connector-v2/source/TiDB-CDC.md +++ b/docs/zh/connector-v2/source/TiDB-CDC.md @@ -39,7 +39,7 @@ TiDB-CDC连接器允许从 TiDB 数据库读取快照数据和增量数据。本 > 1. 你需要确保 [jdbc 驱动 jar 包](https:/mvnrepository.com/artifact/mysql/mysql-connector-java) 和 [tikv-client-java jar 包](https:/mvnrepository.com/artifact/org.tikv/tikv-client-java/3.2.0) 已经放在目录 `${SEATUNNEL_HOME}/lib/` . -请下载Mysql驱动和tikv-java-client并将其放在`${SEATUNNEL_HOME}/lib/`目录中。例如:cp mysql-connector-java-xxx.jar`$SEATNUNNEL_HOME/lib/` +请下载Mysql驱动和tikv-java-client并将其放在`${SEATUNNEL_HOME}/lib/`目录中。例如:cp mysql-connector-java-xxx.jar`$SEATUNNEL_HOME/lib/` ## 数据类型映射 diff --git a/pom.xml b/pom.xml index ee528ff743f..2664393a782 100644 --- a/pom.xml +++ b/pom.xml @@ -803,6 +803,9 @@ org.apache.maven.plugins maven-dependency-plugin ${maven-dependency-plugin.version} + + true + org.codehaus.mojo diff --git a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableWriterRunnable.java b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableWriterRunnable.java index 7d1a19c8859..a715280c1f0 100644 --- a/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableWriterRunnable.java +++ b/seatunnel-api/src/main/java/org/apache/seatunnel/api/sink/multitablesink/MultiTableWriterRunnable.java @@ -44,8 +44,9 @@ public MultiTableWriterRunnable( @Override public void run() { while (true) { + SeaTunnelRow row = null; try { - SeaTunnelRow row = queue.poll(100, TimeUnit.MILLISECONDS); + row = queue.poll(100, TimeUnit.MILLISECONDS); if (row == null) { continue; } @@ -71,7 +72,8 @@ public void run() { throwable = e; break; } catch (Throwable e) { - log.error("MultiTableWriterRunnable error", e); + log.error( + String.format("MultiTableWriterRunnable error when write row %s", row), e); throwable = e; break; } diff --git a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/RsyncFileTransfer.java b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/RsyncFileTransfer.java index 478af2f6647..793dd6cf15c 100644 --- a/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/RsyncFileTransfer.java +++ b/seatunnel-connectors-v2/connector-clickhouse/src/main/java/org/apache/seatunnel/connectors/seatunnel/clickhouse/sink/file/RsyncFileTransfer.java @@ -97,7 +97,7 @@ public void transferAndChown(String sourcePath, String targetPath) { rsyncCommand.add("-e"); rsyncCommand.add(sshParameter); rsyncCommand.add(sourcePath); - rsyncCommand.add(String.format("root@%s:%s", host, targetPath)); + rsyncCommand.add(String.format("%s@%s:%s", user, host, targetPath)); log.info("Generate rsync command: {}", String.join(" ", rsyncCommand)); ProcessBuilder processBuilder = new ProcessBuilder("bash", "-c", String.join(" ", rsyncCommand)); diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/JdbcColumnConverter.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/JdbcColumnConverter.java index 664141b450b..1184e5e59e5 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/JdbcColumnConverter.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/JdbcColumnConverter.java @@ -88,6 +88,7 @@ public static List convert(DatabaseMetaData metadata, TablePath tablePat int columnSize = columnsResultSet.getInt("COLUMN_SIZE"); int decimalDigits = columnsResultSet.getInt("DECIMAL_DIGITS"); int nullable = columnsResultSet.getInt("NULLABLE"); + String comment = columnsResultSet.getString("REMARKS"); Column column = convert( @@ -96,7 +97,8 @@ public static List convert(DatabaseMetaData metadata, TablePath tablePat nativeType, nullable, columnSize, - decimalDigits); + decimalDigits, + comment); columns.add(column); } } @@ -110,7 +112,7 @@ public static Column convert(ResultSetMetaData metadata, int index) throws SQLEx int isNullable = metadata.isNullable(index); int precision = metadata.getPrecision(index); int scale = metadata.getScale(index); - return convert(columnName, jdbcType, nativeType, isNullable, precision, scale); + return convert(columnName, jdbcType, nativeType, isNullable, precision, scale, null); } public static Column convert( @@ -119,7 +121,8 @@ public static Column convert( String nativeType, int isNullable, int precision, - int scale) + int scale, + String comment) throws SQLException { int columnLength = precision; long longColumnLength = precision; @@ -206,7 +209,7 @@ public static Column convert( columnLength, isNullable != ResultSetMetaData.columnNoNulls, null, - null, + comment, nativeType, false, false, diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialectTypeMapper.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialectTypeMapper.java index 0b87f7b0d97..45da6d7611d 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialectTypeMapper.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/dialect/JdbcDialectTypeMapper.java @@ -98,6 +98,7 @@ default List mappingColumn( int columnSize = rs.getInt("COLUMN_SIZE"); int decimalDigits = rs.getInt("DECIMAL_DIGITS"); int nullable = rs.getInt("NULLABLE"); + String comment = rs.getString("REMARKS"); BasicTypeDefine typeDefine = BasicTypeDefine.builder() @@ -109,6 +110,7 @@ default List mappingColumn( .precision((long) columnSize) .scale(decimalDigits) .nullable(nullable == DatabaseMetaData.columnNullable) + .comment(comment) .build(); columns.add(mappingColumn(typeDefine)); } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/executor/FieldNamedPreparedStatement.java b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/executor/FieldNamedPreparedStatement.java index 88e658fc382..8b7f15f3647 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/executor/FieldNamedPreparedStatement.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/main/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/executor/FieldNamedPreparedStatement.java @@ -17,6 +17,8 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.executor; +import org.apache.seatunnel.shade.com.google.common.annotations.VisibleForTesting; + import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -47,6 +49,8 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkArgument; import static org.apache.seatunnel.shade.com.google.common.base.Preconditions.checkNotNull; @@ -669,29 +673,26 @@ public static FieldNamedPreparedStatement prepareStatement( connection.prepareStatement(parsedSQL), indexMapping); } - private static String parseNamedStatement(String sql, Map> paramMap) { - StringBuilder parsedSql = new StringBuilder(); - int fieldIndex = 1; // SQL statement parameter index starts from 1 - int length = sql.length(); - for (int i = 0; i < length; i++) { - char c = sql.charAt(i); - if (':' == c) { - int j = i + 1; - while (j < length && Character.isJavaIdentifierPart(sql.charAt(j))) { - j++; - } - String parameterName = sql.substring(i + 1, j); - checkArgument( - !parameterName.isEmpty(), - "Named parameters in SQL statement must not be empty."); - paramMap.computeIfAbsent(parameterName, n -> new ArrayList<>()).add(fieldIndex); - fieldIndex++; - i = j - 1; - parsedSql.append('?'); - } else { - parsedSql.append(c); - } + @VisibleForTesting + public static String parseNamedStatement(String sql, Map> paramMap) { + Pattern pattern = + Pattern.compile(":([\\p{L}\\p{Nl}\\p{Nd}\\p{Pc}\\$\\-\\.@%&*#~!?^+=<>|]+)"); + Matcher matcher = pattern.matcher(sql); + + StringBuffer result = new StringBuffer(); + int fieldIndex = 1; + + while (matcher.find()) { + String parameterName = matcher.group(1); + checkArgument( + !parameterName.isEmpty(), + "Named parameters in SQL statement must not be empty."); + paramMap.computeIfAbsent(parameterName, n -> new ArrayList<>()).add(fieldIndex++); + matcher.appendReplacement(result, "?"); } - return parsedSql.toString(); + + matcher.appendTail(result); + + return result.toString(); } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/CatalogUtilsTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/CatalogUtilsTest.java index 25f256fc04e..7fb8741f056 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/CatalogUtilsTest.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/CatalogUtilsTest.java @@ -17,9 +17,15 @@ package org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.utils; +import org.apache.seatunnel.api.table.catalog.Column; import org.apache.seatunnel.api.table.catalog.ConstraintKey; +import org.apache.seatunnel.api.table.catalog.PhysicalColumn; import org.apache.seatunnel.api.table.catalog.PrimaryKey; import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.converter.BasicTypeDefine; +import org.apache.seatunnel.api.table.type.BasicType; +import org.apache.seatunnel.connectors.seatunnel.jdbc.internal.dialect.JdbcDialectTypeMapper; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; @@ -44,4 +50,37 @@ void testConstraintKeysNameWithOutSpecialChar() throws SQLException { new TestDatabaseMetaData(), TablePath.of("test.test")); Assertions.assertEquals("testfdawe_", constraintKeys.get(0).getConstraintName()); } + + @Test + void testGetCommentWithJdbcDialectTypeMapper() throws SQLException { + TableSchema tableSchema = + CatalogUtils.getTableSchema( + new TestDatabaseMetaData(), + TablePath.of("test.test"), + new JdbcDialectTypeMapper() { + @Override + public Column mappingColumn(BasicTypeDefine typeDefine) { + return JdbcDialectTypeMapper.super.mappingColumn(typeDefine); + } + }); + Assertions.assertEquals("id comment", tableSchema.getColumns().get(0).getComment()); + + TableSchema tableSchema2 = + CatalogUtils.getTableSchema( + new TestDatabaseMetaData(), + TablePath.of("test.test"), + new JdbcDialectTypeMapper() { + @Override + public Column mappingColumn(BasicTypeDefine typeDefine) { + return PhysicalColumn.of( + typeDefine.getName(), + BasicType.VOID_TYPE, + typeDefine.getLength(), + typeDefine.isNullable(), + typeDefine.getScale(), + typeDefine.getComment()); + } + }); + Assertions.assertEquals("id comment", tableSchema2.getColumns().get(0).getComment()); + } } diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/TestDatabaseMetaData.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/TestDatabaseMetaData.java index c0ea1c911e8..b7f60851388 100644 --- a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/TestDatabaseMetaData.java +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/catalog/utils/TestDatabaseMetaData.java @@ -660,7 +660,20 @@ public ResultSet getTableTypes() throws SQLException { public ResultSet getColumns( String catalog, String schemaPattern, String tableNamePattern, String columnNamePattern) throws SQLException { - return null; + List> value = new ArrayList<>(); + value.add( + new HashMap() { + { + put("COLUMN_NAME", "id"); + put("DATA_TYPE", 1); + put("TYPE_NAME", "INT"); + put("COLUMN_SIZE", 11); + put("DECIMAL_DIGITS", 0); + put("NULLABLE", 0); + put("REMARKS", "id comment"); + } + }); + return new TestResultSet(value); } @Override diff --git a/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/executor/FieldNamedPreparedStatementTest.java b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/executor/FieldNamedPreparedStatementTest.java new file mode 100644 index 00000000000..b393c844eef --- /dev/null +++ b/seatunnel-connectors-v2/connector-jdbc/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/internal/executor/FieldNamedPreparedStatementTest.java @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.jdbc.internal.executor; + +import org.junit.jupiter.api.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class FieldNamedPreparedStatementTest { + + private static final String[] SPECIAL_FILEDNAMES = + new String[] { + "USER@TOKEN", + "字段%名称", + "field_name", + "field.name", + "field-name", + "$fieldName", + "field&key", + "field*value", + "field#1", + "field~test", + "field!data", + "field?question", + "field^caret", + "field+add", + "field=value", + "fieldmax", + "field|pipe" + }; + + @Test + public void testParseNamedStatementWithSpecialCharacters() { + String sql = + "INSERT INTO `nhp_emr_ws`.`cm_prescriptiondetails_cs` (`USER@TOKEN`, `字段%名称`, `field_name`, `field.name`, `field-name`, `$fieldName`, `field&key`, `field*value`, `field#1`, `field~test`, `field!data`, `field?question`, `field^caret`, `field+add`, `field=value`, `fieldmax`, `field|pipe`) VALUES (:USER@TOKEN, :字段%名称, :field_name, :field.name, :field-name, :$fieldName, :field&key, :field*value, :field#1, :field~test, :field!data, :field?question, :field^caret, :field+add, :field=value, :fieldmax, :field|pipe) ON DUPLICATE KEY UPDATE `USER@TOKEN`=VALUES(`USER@TOKEN`), `字段%名称`=VALUES(`字段%名称`), `field_name`=VALUES(`field_name`), `field.name`=VALUES(`field.name`), `field-name`=VALUES(`field-name`), `$fieldName`=VALUES(`$fieldName`), `field&key`=VALUES(`field&key`), `field*value`=VALUES(`field*value`), `field#1`=VALUES(`field#1`), `field~test`=VALUES(`field~test`), `field!data`=VALUES(`field!data`), `field?question`=VALUES(`field?question`), `field^caret`=VALUES(`field^caret`), `field+add`=VALUES(`field+add`), `field=value`=VALUES(`field=value`), `fieldmax`=VALUES(`fieldmax`), `field|pipe`=VALUES(`field|pipe`)"; + + String exceptPreparedstatement = + "INSERT INTO `nhp_emr_ws`.`cm_prescriptiondetails_cs` (`USER@TOKEN`, `字段%名称`, `field_name`, `field.name`, `field-name`, `$fieldName`, `field&key`, `field*value`, `field#1`, `field~test`, `field!data`, `field?question`, `field^caret`, `field+add`, `field=value`, `fieldmax`, `field|pipe`) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ON DUPLICATE KEY UPDATE `USER@TOKEN`=VALUES(`USER@TOKEN`), `字段%名称`=VALUES(`字段%名称`), `field_name`=VALUES(`field_name`), `field.name`=VALUES(`field.name`), `field-name`=VALUES(`field-name`), `$fieldName`=VALUES(`$fieldName`), `field&key`=VALUES(`field&key`), `field*value`=VALUES(`field*value`), `field#1`=VALUES(`field#1`), `field~test`=VALUES(`field~test`), `field!data`=VALUES(`field!data`), `field?question`=VALUES(`field?question`), `field^caret`=VALUES(`field^caret`), `field+add`=VALUES(`field+add`), `field=value`=VALUES(`field=value`), `fieldmax`=VALUES(`fieldmax`), `field|pipe`=VALUES(`field|pipe`)"; + + Map> paramMap = new HashMap<>(); + String actualSQL = FieldNamedPreparedStatement.parseNamedStatement(sql, paramMap); + assertEquals(exceptPreparedstatement, actualSQL); + for (int i = 0; i < SPECIAL_FILEDNAMES.length; i++) { + assertTrue(paramMap.containsKey(SPECIAL_FILEDNAMES[i])); + assertEquals(i + 1, paramMap.get(SPECIAL_FILEDNAMES[i]).get(0)); + } + } + + @Test + public void testParseNamedStatement() { + String sql = "UPDATE table SET col1 = :param1, col2 = :param1 WHERE col3 = :param2"; + Map> paramMap = new HashMap<>(); + String expectedSQL = "UPDATE table SET col1 = ?, col2 = ? WHERE col3 = ?"; + + String actualSQL = FieldNamedPreparedStatement.parseNamedStatement(sql, paramMap); + + assertEquals(expectedSQL, actualSQL); + assertTrue(paramMap.containsKey("param1")); + assertTrue(paramMap.containsKey("param2")); + assertEquals(1, paramMap.get("param1").get(0).intValue()); + assertEquals(2, paramMap.get("param1").get(1).intValue()); + assertEquals(3, paramMap.get("param2").get(0).intValue()); + } + + @Test + public void testParseNamedStatementWithNoNamedParameters() { + String sql = "SELECT * FROM table"; + Map> paramMap = new HashMap<>(); + String expectedSQL = "SELECT * FROM table"; + + String actualSQL = FieldNamedPreparedStatement.parseNamedStatement(sql, paramMap); + + assertEquals(expectedSQL, actualSQL); + assertTrue(paramMap.isEmpty()); + } +} diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaInternalProducer.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaInternalProducer.java index 515610e9dd0..33d2caeb933 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaInternalProducer.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaInternalProducer.java @@ -55,11 +55,17 @@ public void initTransactions() { @Override public void beginTransaction() throws ProducerFencedException { + if (log.isDebugEnabled()) { + log.debug("KafkaInternalProducer.beginTransaction. " + this.transactionalId); + } super.beginTransaction(); } @Override public void commitTransaction() throws ProducerFencedException { + if (log.isDebugEnabled()) { + log.debug("KafkaInternalProducer.commitTransaction." + this.transactionalId); + } super.commitTransaction(); } @@ -69,7 +75,18 @@ public void abortTransaction() throws ProducerFencedException { } public void setTransactionalId(String transactionalId) { + if (log.isDebugEnabled()) { + log.debug( + "KafkaInternalProducer.abortTransaction. Target transactionalId=" + + transactionalId); + } if (!transactionalId.equals(this.transactionalId)) { + if (log.isDebugEnabled()) { + log.debug( + "KafkaInternalProducer.abortTransaction. Current transactionalId={} not match target transactionalId={}", + this.transactionalId, + transactionalId); + } Object transactionManager = getTransactionManager(); synchronized (transactionManager) { ReflectionUtils.setField(transactionManager, "transactionalId", transactionalId); @@ -97,7 +114,7 @@ public long getProducerId() { return (long) ReflectionUtils.getField(producerIdAndEpoch, "producerId").get(); } - public void resumeTransaction(long producerId, short epoch) { + public void resumeTransaction(long producerId, short epoch, boolean txnStarted) { log.info( "Attempting to resume transaction {} with producerId {} and epoch {}", @@ -125,10 +142,15 @@ public void resumeTransaction(long producerId, short epoch) { transitionTransactionManagerStateTo(transactionManager, "READY"); transitionTransactionManagerStateTo(transactionManager, "IN_TRANSACTION"); - ReflectionUtils.setField(transactionManager, "transactionStarted", true); + ReflectionUtils.setField(transactionManager, "transactionStarted", txnStarted); } } + public boolean isTxnStarted() { + Object transactionManager = getTransactionManager(); + return (boolean) ReflectionUtils.getField(transactionManager, "transactionStarted").get(); + } + private static Object createProducerIdAndEpoch(long producerId, short epoch) { try { Field field = diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkCommitter.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkCommitter.java index ed4e2808091..4be9fba709b 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkCommitter.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaSinkCommitter.java @@ -48,7 +48,7 @@ public List commit(List commitInfos) { for (KafkaCommitInfo commitInfo : commitInfos) { String transactionId = commitInfo.getTransactionId(); if (log.isDebugEnabled()) { - log.debug("Committing transaction {}", transactionId); + log.debug("Committing transaction {}, commitInfo {}", transactionId, commitInfo); } KafkaProducer producer = getProducer(commitInfo); producer.commitTransaction(); @@ -87,7 +87,8 @@ public void abort(List commitInfos) { new KafkaInternalProducer<>( commitInfo.getKafkaProperties(), commitInfo.getTransactionId()); } - kafkaProducer.resumeTransaction(commitInfo.getProducerId(), commitInfo.getEpoch()); + kafkaProducer.resumeTransaction( + commitInfo.getProducerId(), commitInfo.getEpoch(), commitInfo.isTxnStarted()); return kafkaProducer; } } diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaTransactionSender.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaTransactionSender.java index 213bb9db575..1f92bcd5b0d 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaTransactionSender.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/sink/KafkaTransactionSender.java @@ -27,6 +27,7 @@ import lombok.extern.slf4j.Slf4j; +import java.time.Duration; import java.util.List; import java.util.Optional; import java.util.Properties; @@ -46,6 +47,7 @@ public class KafkaTransactionSender implements KafkaProduceSender { private String transactionId; private final String transactionPrefix; private final Properties kafkaProperties; + private int recordNumInTransaction = 0; public KafkaTransactionSender(String transactionPrefix, Properties kafkaProperties) { this.transactionPrefix = transactionPrefix; @@ -55,6 +57,7 @@ public KafkaTransactionSender(String transactionPrefix, Properties kafkaProperti @Override public void send(ProducerRecord producerRecord) { kafkaProducer.send(producerRecord); + recordNumInTransaction++; } @Override @@ -62,6 +65,7 @@ public void beginTransaction(String transactionId) { this.transactionId = transactionId; this.kafkaProducer = getTransactionProducer(kafkaProperties, transactionId); kafkaProducer.beginTransaction(); + recordNumInTransaction = 0; } @Override @@ -71,7 +75,8 @@ public Optional prepareCommit() { transactionId, kafkaProperties, this.kafkaProducer.getProducerId(), - this.kafkaProducer.getEpoch()); + this.kafkaProducer.getEpoch(), + this.kafkaProducer.isTxnStarted()); return Optional.of(kafkaCommitInfo); } @@ -108,6 +113,10 @@ public void abortTransaction(long checkpointId) { @Override public List snapshotState(long checkpointId) { + if (recordNumInTransaction == 0) { + // KafkaSinkCommitter does not support emptyTransaction, so we commit here. + kafkaProducer.commitTransaction(); + } return Lists.newArrayList( new KafkaSinkState( transactionId, transactionPrefix, checkpointId, kafkaProperties)); @@ -117,7 +126,9 @@ public List snapshotState(long checkpointId) { public void close() { if (kafkaProducer != null) { kafkaProducer.flush(); - kafkaProducer.close(); + // kafkaProducer will abort the transaction if you call close() without a duration arg + // which will cause an exception when Committer commit the transaction later. + kafkaProducer.close(Duration.ZERO); } } diff --git a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/state/KafkaCommitInfo.java b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/state/KafkaCommitInfo.java index 99cc3aaf3c4..82ef8af4d3c 100644 --- a/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/state/KafkaCommitInfo.java +++ b/seatunnel-connectors-v2/connector-kafka/src/main/java/org/apache/seatunnel/connectors/seatunnel/kafka/state/KafkaCommitInfo.java @@ -31,4 +31,5 @@ public class KafkaCommitInfo implements Serializable { private final Properties kafkaProperties; private final long producerId; private final short epoch; + private final boolean txnStarted; } diff --git a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/catalog/PaimonCatalog.java b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/catalog/PaimonCatalog.java index 30753584701..6883a47ce5c 100644 --- a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/catalog/PaimonCatalog.java +++ b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/catalog/PaimonCatalog.java @@ -17,6 +17,7 @@ package org.apache.seatunnel.connectors.seatunnel.paimon.catalog; +import org.apache.seatunnel.api.common.SeaTunnelAPIErrorCode; import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.table.catalog.Catalog; import org.apache.seatunnel.api.table.catalog.CatalogTable; @@ -29,14 +30,17 @@ import org.apache.seatunnel.api.table.catalog.exception.TableAlreadyExistException; import org.apache.seatunnel.api.table.catalog.exception.TableNotExistException; import org.apache.seatunnel.api.table.converter.BasicTypeDefine; +import org.apache.seatunnel.common.constants.PluginType; import org.apache.seatunnel.connectors.seatunnel.paimon.config.PaimonConfig; import org.apache.seatunnel.connectors.seatunnel.paimon.config.PaimonSinkConfig; import org.apache.seatunnel.connectors.seatunnel.paimon.exception.PaimonConnectorErrorCode; import org.apache.seatunnel.connectors.seatunnel.paimon.exception.PaimonConnectorException; +import org.apache.seatunnel.connectors.seatunnel.paimon.sink.PaimonSink; import org.apache.seatunnel.connectors.seatunnel.paimon.utils.SchemaUtil; import org.apache.paimon.catalog.Identifier; import org.apache.paimon.schema.Schema; +import org.apache.paimon.schema.SchemaChange; import org.apache.paimon.table.FileStoreTable; import org.apache.paimon.table.Table; import org.apache.paimon.types.DataField; @@ -53,13 +57,15 @@ import java.util.stream.Collectors; import java.util.stream.IntStream; +import static org.apache.seatunnel.api.table.factory.FactoryUtil.discoverFactory; + @Slf4j public class PaimonCatalog implements Catalog, PaimonTable { private static final String DEFAULT_DATABASE = "default"; - private String catalogName; - private ReadonlyConfig readonlyConfig; - private PaimonCatalogLoader paimonCatalogLoader; + private final String catalogName; + private final ReadonlyConfig readonlyConfig; + private final PaimonCatalogLoader paimonCatalogLoader; private org.apache.paimon.catalog.Catalog catalog; public PaimonCatalog(String catalogName, ReadonlyConfig readonlyConfig) { @@ -297,4 +303,44 @@ private void resolveException(Exception e) { } throw new CatalogException("An unexpected error occurred", e); } + + // -------------------------------------------------------------------------------------------- + // SPI load paimon catalog + // -------------------------------------------------------------------------------------------- + + public static PaimonCatalog loadPaimonCatalog(ReadonlyConfig readonlyConfig) { + org.apache.seatunnel.api.table.factory.CatalogFactory catalogFactory = + discoverFactory( + Thread.currentThread().getContextClassLoader(), + org.apache.seatunnel.api.table.factory.CatalogFactory.class, + PaimonSink.PLUGIN_NAME); + if (catalogFactory == null) { + throw new PaimonConnectorException( + SeaTunnelAPIErrorCode.CONFIG_VALIDATION_FAILED, + String.format( + "PluginName: %s, PluginType: %s, Message: %s", + PaimonSink.PLUGIN_NAME, + PluginType.SINK, + "Cannot find paimon catalog factory")); + } + return (PaimonCatalog) + catalogFactory.createCatalog(catalogFactory.factoryIdentifier(), readonlyConfig); + } + + // -------------------------------------------------------------------------------------------- + // alterTable + // -------------------------------------------------------------------------------------------- + + public void alterTable( + Identifier identifier, SchemaChange schemaChange, boolean ignoreIfNotExists) { + try { + catalog.alterTable(identifier, schemaChange, true); + } catch (org.apache.paimon.catalog.Catalog.TableNotExistException e) { + throw new CatalogException("TableNotExistException: {}", e); + } catch (org.apache.paimon.catalog.Catalog.ColumnAlreadyExistException e) { + throw new CatalogException("ColumnAlreadyExistException: {}", e); + } catch (org.apache.paimon.catalog.Catalog.ColumnNotExistException e) { + throw new CatalogException("ColumnNotExistException: {}", e); + } + } } diff --git a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSink.java b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSink.java index 86828c9a587..d657810c95b 100644 --- a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSink.java +++ b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSink.java @@ -18,7 +18,6 @@ package org.apache.seatunnel.connectors.seatunnel.paimon.sink; import org.apache.seatunnel.api.common.JobContext; -import org.apache.seatunnel.api.common.SeaTunnelAPIErrorCode; import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.serialization.DefaultSerializer; import org.apache.seatunnel.api.serialization.Serializer; @@ -28,13 +27,13 @@ import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.sink.SupportMultiTableSink; import org.apache.seatunnel.api.sink.SupportSaveMode; +import org.apache.seatunnel.api.sink.SupportSchemaEvolutionSink; import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.schema.SchemaChangeType; import org.apache.seatunnel.api.table.type.SeaTunnelRow; -import org.apache.seatunnel.api.table.type.SeaTunnelRowType; -import org.apache.seatunnel.common.constants.PluginType; +import org.apache.seatunnel.connectors.seatunnel.paimon.catalog.PaimonCatalog; import org.apache.seatunnel.connectors.seatunnel.paimon.config.PaimonHadoopConfiguration; import org.apache.seatunnel.connectors.seatunnel.paimon.config.PaimonSinkConfig; -import org.apache.seatunnel.connectors.seatunnel.paimon.exception.PaimonConnectorException; import org.apache.seatunnel.connectors.seatunnel.paimon.handler.PaimonSaveModeHandler; import org.apache.seatunnel.connectors.seatunnel.paimon.security.PaimonSecurityContext; import org.apache.seatunnel.connectors.seatunnel.paimon.sink.commit.PaimonAggregatedCommitInfo; @@ -45,11 +44,10 @@ import org.apache.paimon.table.Table; import java.io.IOException; +import java.util.Arrays; import java.util.List; import java.util.Optional; -import static org.apache.seatunnel.api.table.factory.FactoryUtil.discoverFactory; - public class PaimonSink implements SeaTunnelSink< SeaTunnelRow, @@ -57,16 +55,15 @@ public class PaimonSink PaimonCommitInfo, PaimonAggregatedCommitInfo>, SupportSaveMode, + SupportMultiTableSink, SupportLoadTable, - SupportMultiTableSink { + SupportSchemaEvolutionSink { private static final long serialVersionUID = 1L; public static final String PLUGIN_NAME = "Paimon"; - private SeaTunnelRowType seaTunnelRowType; - - private Table table; + private Table paimonTable; private JobContext jobContext; @@ -82,7 +79,6 @@ public PaimonSink(ReadonlyConfig readonlyConfig, CatalogTable catalogTable) { this.readonlyConfig = readonlyConfig; this.paimonSinkConfig = new PaimonSinkConfig(readonlyConfig); this.catalogTable = catalogTable; - this.seaTunnelRowType = catalogTable.getSeaTunnelRowType(); this.paimonHadoopConfiguration = PaimonSecurityContext.loadHadoopConfig(paimonSinkConfig); } @@ -95,8 +91,9 @@ public String getPluginName() { public PaimonSinkWriter createWriter(SinkWriter.Context context) throws IOException { return new PaimonSinkWriter( context, - table, - seaTunnelRowType, + readonlyConfig, + catalogTable, + paimonTable, jobContext, paimonSinkConfig, paimonHadoopConfiguration); @@ -106,7 +103,7 @@ public PaimonSinkWriter createWriter(SinkWriter.Context context) throws IOExcept public Optional> createAggregatedCommitter() throws IOException { return Optional.of( - new PaimonAggregatedCommitter(table, jobContext, paimonHadoopConfiguration)); + new PaimonAggregatedCommitter(paimonTable, jobContext, paimonHadoopConfiguration)); } @Override @@ -114,8 +111,9 @@ public SinkWriter restoreWriter SinkWriter.Context context, List states) throws IOException { return new PaimonSinkWriter( context, - table, - seaTunnelRowType, + readonlyConfig, + catalogTable, + paimonTable, states, jobContext, paimonSinkConfig, @@ -139,39 +137,33 @@ public void setJobContext(JobContext jobContext) { @Override public Optional getSaveModeHandler() { - org.apache.seatunnel.api.table.factory.CatalogFactory catalogFactory = - discoverFactory( - Thread.currentThread().getContextClassLoader(), - org.apache.seatunnel.api.table.factory.CatalogFactory.class, - "Paimon"); - if (catalogFactory == null) { - throw new PaimonConnectorException( - SeaTunnelAPIErrorCode.CONFIG_VALIDATION_FAILED, - String.format( - "PluginName: %s, PluginType: %s, Message: %s", - getPluginName(), - PluginType.SINK, - "Cannot find paimon catalog factory")); - } - org.apache.seatunnel.api.table.catalog.Catalog catalog = - catalogFactory.createCatalog(catalogFactory.factoryIdentifier(), readonlyConfig); + PaimonCatalog paimonCatalog = PaimonCatalog.loadPaimonCatalog(readonlyConfig); return Optional.of( new PaimonSaveModeHandler( this, paimonSinkConfig.getSchemaSaveMode(), paimonSinkConfig.getDataSaveMode(), - catalog, + paimonCatalog, catalogTable, null)); } @Override public void setLoadTable(Table table) { - this.table = table; + this.paimonTable = table; } @Override public Optional getWriteCatalogTable() { return Optional.ofNullable(catalogTable); } + + @Override + public List supports() { + return Arrays.asList( + SchemaChangeType.ADD_COLUMN, + SchemaChangeType.DROP_COLUMN, + SchemaChangeType.RENAME_COLUMN, + SchemaChangeType.UPDATE_COLUMN); + } } diff --git a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkWriter.java b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkWriter.java index e57e62c9814..b208a916bb3 100644 --- a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkWriter.java +++ b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/PaimonSinkWriter.java @@ -18,11 +18,18 @@ package org.apache.seatunnel.connectors.seatunnel.paimon.sink; import org.apache.seatunnel.api.common.JobContext; +import org.apache.seatunnel.api.configuration.ReadonlyConfig; import org.apache.seatunnel.api.sink.SinkWriter; import org.apache.seatunnel.api.sink.SupportMultiTableSinkWriter; +import org.apache.seatunnel.api.sink.SupportSchemaEvolutionSinkWriter; +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.schema.event.SchemaChangeEvent; +import org.apache.seatunnel.api.table.schema.handler.TableSchemaChangeEventDispatcher; import org.apache.seatunnel.api.table.type.SeaTunnelRow; import org.apache.seatunnel.api.table.type.SeaTunnelRowType; import org.apache.seatunnel.common.utils.SeaTunnelException; +import org.apache.seatunnel.connectors.seatunnel.paimon.catalog.PaimonCatalog; import org.apache.seatunnel.connectors.seatunnel.paimon.config.PaimonHadoopConfiguration; import org.apache.seatunnel.connectors.seatunnel.paimon.config.PaimonSinkConfig; import org.apache.seatunnel.connectors.seatunnel.paimon.exception.PaimonConnectorErrorCode; @@ -30,6 +37,7 @@ import org.apache.seatunnel.connectors.seatunnel.paimon.security.PaimonSecurityContext; import org.apache.seatunnel.connectors.seatunnel.paimon.sink.bucket.PaimonBucketAssigner; import org.apache.seatunnel.connectors.seatunnel.paimon.sink.commit.PaimonCommitInfo; +import org.apache.seatunnel.connectors.seatunnel.paimon.sink.schema.handler.AlterPaimonTableSchemaEventHandler; import org.apache.seatunnel.connectors.seatunnel.paimon.sink.state.PaimonSinkState; import org.apache.seatunnel.connectors.seatunnel.paimon.utils.JobContextUtil; import org.apache.seatunnel.connectors.seatunnel.paimon.utils.RowConverter; @@ -66,69 +74,79 @@ @Slf4j public class PaimonSinkWriter implements SinkWriter, - SupportMultiTableSinkWriter { + SupportMultiTableSinkWriter, + SupportSchemaEvolutionSinkWriter { private String commitUser = UUID.randomUUID().toString(); - private final FileStoreTable table; + private FileStoreTable paimonFileStoretable; - private final WriteBuilder tableWriteBuilder; + private WriteBuilder tableWriteBuilder; - private final TableWrite tableWrite; + private TableWrite tableWrite; - private List committables = new ArrayList<>(); + private final List committables = new ArrayList<>(); - private final SeaTunnelRowType seaTunnelRowType; + private SeaTunnelRowType seaTunnelRowType; private final SinkWriter.Context context; private final JobContext jobContext; - private final TableSchema tableSchema; + private org.apache.seatunnel.api.table.catalog.TableSchema sourceTableSchema; + + private TableSchema sinkPaimonTableSchema; private PaimonBucketAssigner bucketAssigner; private final boolean dynamicBucket; + private final PaimonCatalog paimonCatalog; + + private final TablePath paimonTablePath; + + private final PaimonSinkConfig paimonSinkConfig; + + private final TableSchemaChangeEventDispatcher TABLE_SCHEMACHANGER = + new TableSchemaChangeEventDispatcher(); + public PaimonSinkWriter( Context context, - Table table, - SeaTunnelRowType seaTunnelRowType, + ReadonlyConfig readonlyConfig, + CatalogTable catalogTable, + Table paimonFileStoretable, JobContext jobContext, PaimonSinkConfig paimonSinkConfig, PaimonHadoopConfiguration paimonHadoopConfiguration) { - this.table = (FileStoreTable) table; + this.sourceTableSchema = catalogTable.getTableSchema(); + this.seaTunnelRowType = this.sourceTableSchema.toPhysicalRowDataType(); + this.paimonTablePath = catalogTable.getTablePath(); + this.paimonCatalog = PaimonCatalog.loadPaimonCatalog(readonlyConfig); + this.paimonCatalog.open(); + this.paimonFileStoretable = (FileStoreTable) paimonFileStoretable; CoreOptions.ChangelogProducer changelogProducer = - this.table.coreOptions().changelogProducer(); + this.paimonFileStoretable.coreOptions().changelogProducer(); if (Objects.nonNull(paimonSinkConfig.getChangelogProducer()) && changelogProducer != paimonSinkConfig.getChangelogProducer()) { log.warn( "configured the props named 'changelog-producer' which is not compatible with the options in table , so it will use the table's 'changelog-producer'"); } - String changelogTmpPath = paimonSinkConfig.getChangelogTmpPath(); - this.tableWriteBuilder = - JobContextUtil.isBatchJob(jobContext) - ? this.table.newBatchWriteBuilder() - : this.table.newStreamWriteBuilder(); - this.tableWrite = - tableWriteBuilder - .newWrite() - .withIOManager(IOManager.create(splitPaths(changelogTmpPath))); - this.seaTunnelRowType = seaTunnelRowType; + this.paimonSinkConfig = paimonSinkConfig; + this.sinkPaimonTableSchema = this.paimonFileStoretable.schema(); this.context = context; this.jobContext = jobContext; - this.tableSchema = this.table.schema(); - BucketMode bucketMode = this.table.bucketMode(); + this.newTableWrite(); + BucketMode bucketMode = this.paimonFileStoretable.bucketMode(); this.dynamicBucket = BucketMode.DYNAMIC == bucketMode || BucketMode.GLOBAL_DYNAMIC == bucketMode; - int bucket = ((FileStoreTable) table).coreOptions().bucket(); + int bucket = ((FileStoreTable) paimonFileStoretable).coreOptions().bucket(); if (bucket == -1 && BucketMode.UNAWARE == bucketMode) { log.warn("Append only table currently do not support dynamic bucket"); } if (dynamicBucket) { this.bucketAssigner = new PaimonBucketAssigner( - table, + paimonFileStoretable, this.context.getNumberOfParallelSubtasks(), this.context.getIndexOfSubtask()); } @@ -137,16 +155,18 @@ public PaimonSinkWriter( public PaimonSinkWriter( Context context, - Table table, - SeaTunnelRowType seaTunnelRowType, + ReadonlyConfig readonlyConfig, + CatalogTable catalogTable, + Table paimonFileStoretable, List states, JobContext jobContext, PaimonSinkConfig paimonSinkConfig, PaimonHadoopConfiguration paimonHadoopConfiguration) { this( context, - table, - seaTunnelRowType, + readonlyConfig, + catalogTable, + paimonFileStoretable, jobContext, paimonSinkConfig, paimonHadoopConfiguration); @@ -177,7 +197,8 @@ public PaimonSinkWriter( @Override public void write(SeaTunnelRow element) throws IOException { - InternalRow rowData = RowConverter.reconvert(element, seaTunnelRowType, tableSchema); + InternalRow rowData = + RowConverter.reconvert(element, seaTunnelRowType, sinkPaimonTableSchema); try { PaimonSecurityContext.runSecured( () -> { @@ -197,6 +218,40 @@ public void write(SeaTunnelRow element) throws IOException { } } + @Override + public void applySchemaChange(SchemaChangeEvent event) throws IOException { + this.sourceTableSchema = + new AlterPaimonTableSchemaEventHandler( + sourceTableSchema, + paimonCatalog, + sinkPaimonTableSchema, + paimonTablePath) + .apply(event); + reOpenTableWrite(); + } + + private void reOpenTableWrite() { + this.seaTunnelRowType = this.sourceTableSchema.toPhysicalRowDataType(); + this.paimonFileStoretable = (FileStoreTable) paimonCatalog.getPaimonTable(paimonTablePath); + this.sinkPaimonTableSchema = this.paimonFileStoretable.schema(); + this.newTableWrite(); + } + + private void newTableWrite() { + this.tableWriteBuilder = + JobContextUtil.isBatchJob(jobContext) + ? this.paimonFileStoretable.newBatchWriteBuilder() + : this.paimonFileStoretable.newStreamWriteBuilder(); + TableWrite oldTableWrite = this.tableWrite; + this.tableWrite = + tableWriteBuilder + .newWrite() + .withIOManager( + IOManager.create( + splitPaths(paimonSinkConfig.getChangelogTmpPath()))); + tableWriteClose(oldTableWrite); + } + @Override public Optional prepareCommit() throws IOException { return Optional.empty(); @@ -237,22 +292,29 @@ public void abortPrepare() {} @Override public void close() throws IOException { try { - if (Objects.nonNull(tableWrite)) { - try { - tableWrite.close(); - } catch (Exception e) { - log.error("Failed to close table writer in paimon sink writer.", e); - throw new SeaTunnelException(e); - } - } + tableWriteClose(this.tableWrite); } finally { committables.clear(); + if (Objects.nonNull(paimonCatalog)) { + paimonCatalog.close(); + } + } + } + + private void tableWriteClose(TableWrite tableWrite) { + if (Objects.nonNull(tableWrite)) { + try { + tableWrite.close(); + } catch (Exception e) { + log.error("Failed to close table writer in paimon sink writer.", e); + throw new SeaTunnelException(e); + } } } private boolean waitCompaction() { CoreOptions.ChangelogProducer changelogProducer = - this.table.coreOptions().changelogProducer(); + this.paimonFileStoretable.coreOptions().changelogProducer(); return changelogProducer == CoreOptions.ChangelogProducer.LOOKUP || changelogProducer == CoreOptions.ChangelogProducer.FULL_COMPACTION; } diff --git a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/schema/UpdatedDataFields.java b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/schema/UpdatedDataFields.java new file mode 100644 index 00000000000..43f4e5e98f6 --- /dev/null +++ b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/schema/UpdatedDataFields.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.paimon.sink.schema; + +import org.apache.paimon.types.DataType; +import org.apache.paimon.types.DataTypeChecks; +import org.apache.paimon.types.DataTypeRoot; + +import java.util.Arrays; +import java.util.List; + +public class UpdatedDataFields { + private static final List STRING_TYPES = + Arrays.asList(DataTypeRoot.CHAR, DataTypeRoot.VARCHAR); + private static final List BINARY_TYPES = + Arrays.asList(DataTypeRoot.BINARY, DataTypeRoot.VARBINARY); + private static final List INTEGER_TYPES = + Arrays.asList( + DataTypeRoot.TINYINT, + DataTypeRoot.SMALLINT, + DataTypeRoot.INTEGER, + DataTypeRoot.BIGINT); + private static final List FLOATING_POINT_TYPES = + Arrays.asList(DataTypeRoot.FLOAT, DataTypeRoot.DOUBLE); + + private static final List DECIMAL_TYPES = Arrays.asList(DataTypeRoot.DECIMAL); + + private static final List TIMESTAMP_TYPES = + Arrays.asList(DataTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE); + + public static ConvertAction canConvert(DataType oldType, DataType newType) { + if (oldType.equalsIgnoreNullable(newType)) { + return ConvertAction.CONVERT; + } + + int oldIdx = STRING_TYPES.indexOf(oldType.getTypeRoot()); + int newIdx = STRING_TYPES.indexOf(newType.getTypeRoot()); + if (oldIdx >= 0 && newIdx >= 0) { + return DataTypeChecks.getLength(oldType) <= DataTypeChecks.getLength(newType) + ? ConvertAction.CONVERT + : ConvertAction.IGNORE; + } + + oldIdx = BINARY_TYPES.indexOf(oldType.getTypeRoot()); + newIdx = BINARY_TYPES.indexOf(newType.getTypeRoot()); + if (oldIdx >= 0 && newIdx >= 0) { + return DataTypeChecks.getLength(oldType) <= DataTypeChecks.getLength(newType) + ? ConvertAction.CONVERT + : ConvertAction.IGNORE; + } + + oldIdx = INTEGER_TYPES.indexOf(oldType.getTypeRoot()); + newIdx = INTEGER_TYPES.indexOf(newType.getTypeRoot()); + if (oldIdx >= 0 && newIdx >= 0) { + return oldIdx <= newIdx ? ConvertAction.CONVERT : ConvertAction.IGNORE; + } + + oldIdx = FLOATING_POINT_TYPES.indexOf(oldType.getTypeRoot()); + newIdx = FLOATING_POINT_TYPES.indexOf(newType.getTypeRoot()); + if (oldIdx >= 0 && newIdx >= 0) { + return oldIdx <= newIdx ? ConvertAction.CONVERT : ConvertAction.IGNORE; + } + + oldIdx = DECIMAL_TYPES.indexOf(oldType.getTypeRoot()); + newIdx = DECIMAL_TYPES.indexOf(newType.getTypeRoot()); + if (oldIdx >= 0 && newIdx >= 0) { + return DataTypeChecks.getPrecision(newType) <= DataTypeChecks.getPrecision(oldType) + && DataTypeChecks.getScale(newType) <= DataTypeChecks.getScale(oldType) + ? ConvertAction.IGNORE + : ConvertAction.CONVERT; + } + + oldIdx = TIMESTAMP_TYPES.indexOf(oldType.getTypeRoot()); + newIdx = TIMESTAMP_TYPES.indexOf(newType.getTypeRoot()); + if (oldIdx >= 0 && newIdx >= 0) { + return DataTypeChecks.getPrecision(oldType) <= DataTypeChecks.getPrecision(newType) + ? ConvertAction.CONVERT + : ConvertAction.IGNORE; + } + + return ConvertAction.EXCEPTION; + } + + /** + * Return type of {@link UpdatedDataFields#canConvert(DataType, DataType)}. This enum indicates + * the action to perform. + */ + public enum ConvertAction { + + /** {@code oldType} can be converted to {@code newType}. */ + CONVERT, + + /** + * {@code oldType} and {@code newType} belongs to the same type family, but old type has + * higher precision than new type. Ignore this convert request. + */ + IGNORE, + + /** + * {@code oldType} and {@code newType} belongs to different type family. Throw an exception + * indicating that this convert request cannot be handled. + */ + EXCEPTION + } +} diff --git a/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/schema/handler/AlterPaimonTableSchemaEventHandler.java b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/schema/handler/AlterPaimonTableSchemaEventHandler.java new file mode 100644 index 00000000000..1872641c1e4 --- /dev/null +++ b/seatunnel-connectors-v2/connector-paimon/src/main/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/schema/handler/AlterPaimonTableSchemaEventHandler.java @@ -0,0 +1,174 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.paimon.sink.schema.handler; + +import org.apache.seatunnel.api.table.catalog.Column; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.converter.BasicTypeDefine; +import org.apache.seatunnel.api.table.schema.event.AlterTableAddColumnEvent; +import org.apache.seatunnel.api.table.schema.event.AlterTableChangeColumnEvent; +import org.apache.seatunnel.api.table.schema.event.AlterTableColumnEvent; +import org.apache.seatunnel.api.table.schema.event.AlterTableColumnsEvent; +import org.apache.seatunnel.api.table.schema.event.AlterTableDropColumnEvent; +import org.apache.seatunnel.api.table.schema.event.AlterTableModifyColumnEvent; +import org.apache.seatunnel.api.table.schema.event.SchemaChangeEvent; +import org.apache.seatunnel.api.table.schema.handler.TableSchemaChangeEventDispatcher; +import org.apache.seatunnel.connectors.seatunnel.paimon.catalog.PaimonCatalog; +import org.apache.seatunnel.connectors.seatunnel.paimon.data.PaimonTypeMapper; + +import org.apache.commons.lang3.StringUtils; +import org.apache.paimon.catalog.Identifier; +import org.apache.paimon.schema.SchemaChange; +import org.apache.paimon.types.DataField; +import org.apache.paimon.types.DataType; +import org.apache.paimon.utils.Preconditions; + +import lombok.extern.slf4j.Slf4j; + +import static org.apache.seatunnel.connectors.seatunnel.paimon.sink.schema.UpdatedDataFields.canConvert; + +@Slf4j +public class AlterPaimonTableSchemaEventHandler { + + private final TableSchemaChangeEventDispatcher TABLESCHEMACHANGER = + new TableSchemaChangeEventDispatcher(); + + private final TableSchema sourceTableSchema; + + private final PaimonCatalog paimonCatalog; + + private final org.apache.paimon.schema.TableSchema sinkPaimonTableSchema; + + private final TablePath paimonTablePath; + + public AlterPaimonTableSchemaEventHandler( + TableSchema sourceTableSchema, + PaimonCatalog paimonCatalog, + org.apache.paimon.schema.TableSchema sinkPaimonTableSchema, + TablePath paimonTablePath) { + this.sourceTableSchema = sourceTableSchema; + this.paimonCatalog = paimonCatalog; + this.sinkPaimonTableSchema = sinkPaimonTableSchema; + this.paimonTablePath = paimonTablePath; + } + + public TableSchema apply(SchemaChangeEvent event) { + TableSchema newSchema = TABLESCHEMACHANGER.reset(sourceTableSchema).apply(event); + if (event instanceof AlterTableColumnsEvent) { + for (AlterTableColumnEvent columnEvent : ((AlterTableColumnsEvent) event).getEvents()) { + applySingleSchemaChangeEvent(columnEvent); + } + } else if (event instanceof AlterTableColumnEvent) { + applySingleSchemaChangeEvent(event); + } else { + throw new UnsupportedOperationException("Unsupported alter table event: " + event); + } + return newSchema; + } + + private void applySingleSchemaChangeEvent(SchemaChangeEvent event) { + Identifier identifier = + Identifier.create( + paimonTablePath.getDatabaseName(), paimonTablePath.getTableName()); + if (event instanceof AlterTableAddColumnEvent) { + AlterTableAddColumnEvent alterTableAddColumnEvent = (AlterTableAddColumnEvent) event; + Column column = alterTableAddColumnEvent.getColumn(); + String afterColumnName = alterTableAddColumnEvent.getAfterColumn(); + SchemaChange.Move move = + StringUtils.isBlank(afterColumnName) + ? null + : SchemaChange.Move.after(column.getName(), afterColumnName); + BasicTypeDefine reconvertColumn = PaimonTypeMapper.INSTANCE.reconvert(column); + SchemaChange schemaChange = + SchemaChange.addColumn( + column.getName(), + reconvertColumn.getNativeType(), + column.getComment(), + move); + paimonCatalog.alterTable(identifier, schemaChange, false); + } else if (event instanceof AlterTableDropColumnEvent) { + String columnName = ((AlterTableDropColumnEvent) event).getColumn(); + paimonCatalog.alterTable(identifier, SchemaChange.dropColumn(columnName), true); + } else if (event instanceof AlterTableModifyColumnEvent) { + Column column = ((AlterTableModifyColumnEvent) event).getColumn(); + String afterColumn = ((AlterTableModifyColumnEvent) event).getAfterColumn(); + updateColumn(column, column.getName(), identifier, afterColumn); + } else if (event instanceof AlterTableChangeColumnEvent) { + Column column = ((AlterTableChangeColumnEvent) event).getColumn(); + String afterColumn = ((AlterTableChangeColumnEvent) event).getAfterColumn(); + String oldColumn = ((AlterTableChangeColumnEvent) event).getOldColumn(); + updateColumn(column, oldColumn, identifier, afterColumn); + if (!column.getName().equals(oldColumn)) { + paimonCatalog.alterTable( + identifier, SchemaChange.renameColumn(oldColumn, column.getName()), false); + } + } else { + throw new UnsupportedOperationException("Unsupported alter table event: " + event); + } + } + + private void updateColumn( + Column newColumn, String oldColumnName, Identifier identifier, String afterTheColumn) { + BasicTypeDefine reconvertColumn = PaimonTypeMapper.INSTANCE.reconvert(newColumn); + int idx = sinkPaimonTableSchema.fieldNames().indexOf(oldColumnName); + Preconditions.checkState( + idx >= 0, + "Field name " + oldColumnName + " does not exist in table. This is unexpected."); + DataType newDataType = reconvertColumn.getNativeType(); + DataField dataField = sinkPaimonTableSchema.fields().get(idx); + DataType oldDataType = dataField.type(); + switch (canConvert(oldDataType, newDataType)) { + case CONVERT: + paimonCatalog.alterTable( + identifier, + SchemaChange.updateColumnType(oldColumnName, newDataType), + false); + break; + case IGNORE: + log.warn( + "old: {{}-{}} and new: {{}-{}} belongs to the same type family, but old type has higher precision than new type. Ignore this convert request.", + dataField.name(), + oldDataType, + reconvertColumn.getName(), + newDataType); + break; + case EXCEPTION: + throw new UnsupportedOperationException( + String.format( + "Cannot convert field %s from type %s to %s of Paimon table %s.", + oldColumnName, oldDataType, newDataType, identifier.getFullName())); + } + if (StringUtils.isNotBlank(afterTheColumn)) { + paimonCatalog.alterTable( + identifier, + SchemaChange.updateColumnPosition( + SchemaChange.Move.after(oldColumnName, afterTheColumn)), + false); + } + String comment = newColumn.getComment(); + if (StringUtils.isNotBlank(comment)) { + paimonCatalog.alterTable( + identifier, SchemaChange.updateColumnComment(oldColumnName, comment), false); + } + paimonCatalog.alterTable( + identifier, + SchemaChange.updateColumnNullability(oldColumnName, newColumn.isNullable()), + false); + } +} diff --git a/seatunnel-connectors-v2/connector-paimon/src/test/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/schema/UpdatedDataFieldsTest.java b/seatunnel-connectors-v2/connector-paimon/src/test/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/schema/UpdatedDataFieldsTest.java new file mode 100644 index 00000000000..27c9cf35ed0 --- /dev/null +++ b/seatunnel-connectors-v2/connector-paimon/src/test/java/org/apache/seatunnel/connectors/seatunnel/paimon/sink/schema/UpdatedDataFieldsTest.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.connectors.seatunnel.paimon.sink.schema; + +import org.apache.paimon.types.BigIntType; +import org.apache.paimon.types.DecimalType; +import org.apache.paimon.types.DoubleType; +import org.apache.paimon.types.FloatType; +import org.apache.paimon.types.IntType; +import org.apache.paimon.types.SmallIntType; +import org.apache.paimon.types.TimestampType; +import org.apache.paimon.types.VarCharType; + +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +public class UpdatedDataFieldsTest { + @Test + public void testCanConvertString() { + VarCharType oldVarchar = new VarCharType(true, 10); + VarCharType biggerLengthVarchar = new VarCharType(true, 20); + VarCharType smallerLengthVarchar = new VarCharType(true, 5); + IntType intType = new IntType(); + + UpdatedDataFields.ConvertAction convertAction; + convertAction = UpdatedDataFields.canConvert(oldVarchar, biggerLengthVarchar); + Assertions.assertEquals(UpdatedDataFields.ConvertAction.CONVERT, convertAction); + convertAction = UpdatedDataFields.canConvert(oldVarchar, smallerLengthVarchar); + Assertions.assertEquals(UpdatedDataFields.ConvertAction.IGNORE, convertAction); + convertAction = UpdatedDataFields.canConvert(oldVarchar, intType); + + Assertions.assertEquals(UpdatedDataFields.ConvertAction.EXCEPTION, convertAction); + } + + @Test + public void testCanConvertNumber() { + IntType oldType = new IntType(); + BigIntType bigintType = new BigIntType(); + SmallIntType smallintType = new SmallIntType(); + + FloatType floatType = new FloatType(); + + UpdatedDataFields.ConvertAction convertAction; + convertAction = UpdatedDataFields.canConvert(oldType, bigintType); + Assertions.assertEquals(UpdatedDataFields.ConvertAction.CONVERT, convertAction); + convertAction = UpdatedDataFields.canConvert(oldType, smallintType); + Assertions.assertEquals(UpdatedDataFields.ConvertAction.IGNORE, convertAction); + convertAction = UpdatedDataFields.canConvert(oldType, floatType); + + Assertions.assertEquals(UpdatedDataFields.ConvertAction.EXCEPTION, convertAction); + } + + @Test + public void testCanConvertDecimal() { + DecimalType oldType = new DecimalType(20, 9); + DecimalType biggerRangeType = new DecimalType(30, 10); + DecimalType smallerRangeType = new DecimalType(10, 3); + DoubleType doubleType = new DoubleType(); + + UpdatedDataFields.ConvertAction convertAction = null; + convertAction = UpdatedDataFields.canConvert(oldType, biggerRangeType); + Assertions.assertEquals(UpdatedDataFields.ConvertAction.CONVERT, convertAction); + convertAction = UpdatedDataFields.canConvert(oldType, smallerRangeType); + Assertions.assertEquals(UpdatedDataFields.ConvertAction.IGNORE, convertAction); + convertAction = UpdatedDataFields.canConvert(oldType, doubleType); + + Assertions.assertEquals(UpdatedDataFields.ConvertAction.EXCEPTION, convertAction); + } + + @Test + public void testCanConvertTimestamp() { + TimestampType oldType = new TimestampType(true, 3); + TimestampType biggerLengthTimestamp = new TimestampType(true, 5); + TimestampType smallerLengthTimestamp = new TimestampType(true, 2); + VarCharType varCharType = new VarCharType(); + + UpdatedDataFields.ConvertAction convertAction; + convertAction = UpdatedDataFields.canConvert(oldType, biggerLengthTimestamp); + Assertions.assertEquals(UpdatedDataFields.ConvertAction.CONVERT, convertAction); + convertAction = UpdatedDataFields.canConvert(oldType, smallerLengthTimestamp); + Assertions.assertEquals(UpdatedDataFields.ConvertAction.IGNORE, convertAction); + convertAction = UpdatedDataFields.canConvert(oldType, varCharType); + + Assertions.assertEquals(UpdatedDataFields.ConvertAction.EXCEPTION, convertAction); + } +} diff --git a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/util/SchemaUtils.java b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/util/SchemaUtils.java index a4a654dd2b2..4e3a5890859 100644 --- a/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/util/SchemaUtils.java +++ b/seatunnel-connectors-v2/connector-starrocks/src/main/java/org/apache/seatunnel/connectors/seatunnel/starrocks/util/SchemaUtils.java @@ -236,7 +236,7 @@ public static boolean columnExists(Connection connection, TablePath tablePath, S String selectColumnSQL = String.format( "SELECT %s FROM %s WHERE 1 != 1", - quoteIdentifier(column), tablePath.getTableName()); + quoteIdentifier(column), tablePath.getFullName()); try (Statement statement = connection.createStatement()) { return statement.execute(selectColumnSQL); } catch (SQLException e) { diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/test/java/org/apache/seatunnel/core/starter/spark/SparkCommandArgsTest.java b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/test/java/org/apache/seatunnel/core/starter/spark/SparkCommandArgsTest.java index da997e33559..213dc0179a4 100644 --- a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/test/java/org/apache/seatunnel/core/starter/spark/SparkCommandArgsTest.java +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/test/java/org/apache/seatunnel/core/starter/spark/SparkCommandArgsTest.java @@ -25,19 +25,12 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.DisabledOnJre; -import org.junit.jupiter.api.condition.JRE; import java.io.FileNotFoundException; import java.net.URISyntaxException; import static org.apache.seatunnel.api.common.CommonOptions.PLUGIN_NAME; -@DisabledOnJre( - value = JRE.JAVA_11, - disabledReason = - "We should update apache common lang3 version to 3.8 to avoid NPE, " - + "see https://github.com/apache/commons-lang/commit/50ce8c44e1601acffa39f5568f0fc140aade0564") public class SparkCommandArgsTest { @Test public void testExecuteClientCommandArgsWithPluginName() diff --git a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/test/java/org/apache/seatunnel/core/starter/spark/multitable/MultiTableSinkTest.java b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/test/java/org/apache/seatunnel/core/starter/spark/multitable/MultiTableSinkTest.java index 61ed44515c7..41b4285391a 100644 --- a/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/test/java/org/apache/seatunnel/core/starter/spark/multitable/MultiTableSinkTest.java +++ b/seatunnel-core/seatunnel-spark-starter/seatunnel-spark-3-starter/src/test/java/org/apache/seatunnel/core/starter/spark/multitable/MultiTableSinkTest.java @@ -27,8 +27,6 @@ import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Order; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.DisabledOnJre; -import org.junit.jupiter.api.condition.JRE; import lombok.extern.slf4j.Slf4j; @@ -45,11 +43,6 @@ public class MultiTableSinkTest { @Test - @DisabledOnJre( - value = JRE.JAVA_11, - disabledReason = - "We should update apache common lang3 version to 3.8 to avoid NPE, " - + "see https://github.com/apache/commons-lang/commit/50ce8c44e1601acffa39f5568f0fc140aade0564") public void testMultiTableSink() throws FileNotFoundException, URISyntaxException, CommandException { String configurePath = "/config/fake_to_inmemory_multi_table.conf"; diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/java/org/apache/seatunnel/e2e/connector/hive/HiveIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/java/org/apache/seatunnel/e2e/connector/hive/HiveIT.java index bfa83dfb3b9..5307b134b05 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/java/org/apache/seatunnel/e2e/connector/hive/HiveIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/java/org/apache/seatunnel/e2e/connector/hive/HiveIT.java @@ -228,8 +228,15 @@ private void executeJob(TestContainer container, String job1, String job2) } @TestTemplate + public void testFakeSinkHive(TestContainer container) throws Exception { + executeJob(container, "/fake_to_hive.conf", "/hive_to_assert.conf"); + } + + @TestTemplate + @Disabled( + "[HDFS/COS/OSS/S3] is not available in CI, if you want to run this test, please set up your own environment in the test case file, hadoop_hive_conf_path_local and ip below}") public void testFakeSinkHiveOnHDFS(TestContainer container) throws Exception { - executeJob(container, "/fake_to_hive_on_hdfs.conf", "/hive_on_hdfs_to_assert.conf"); + // TODO Add the test case for Hive on HDFS } @TestTemplate diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/java/org/apache/seatunnel/e2e/connector/hive/HiveKerberosIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/java/org/apache/seatunnel/e2e/connector/hive/HiveKerberosIT.java index c2fca452fa8..dd666dd710a 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/java/org/apache/seatunnel/e2e/connector/hive/HiveKerberosIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/java/org/apache/seatunnel/e2e/connector/hive/HiveKerberosIT.java @@ -257,6 +257,10 @@ public void tearDown() throws Exception { log.info(hiveServerContainer.execInContainer("cat", "/tmp/hive/hive.log").getStdout()); hiveServerContainer.close(); } + if (kerberosContainer != null) { + kerberosContainer.close(); + } + super.tearDown(); } private void initializeConnection() @@ -288,27 +292,27 @@ private void executeJob(TestContainer container, String job1, String job2) } @Test - public void testFakeSinkHiveOnHDFS() throws Exception { + public void testFakeSinkHive() throws Exception { copyAbsolutePathToContainer("/tmp/hive.keytab", "/tmp/hive.keytab"); copyFileToContainer("/kerberos/krb5.conf", "/tmp/krb5.conf"); copyFileToContainer("/kerberos/hive-site.xml", "/tmp/hive-site.xml"); Container.ExecResult fakeToHiveWithKerberosResult = - executeJob("/fake_to_hive_on_hdfs_with_kerberos.conf"); + executeJob("/fake_to_hive_with_kerberos.conf"); Assertions.assertEquals(0, fakeToHiveWithKerberosResult.getExitCode()); Container.ExecResult hiveToAssertWithKerberosResult = - executeJob("/hive_on_hdfs_to_assert_with_kerberos.conf"); + executeJob("/hive_to_assert_with_kerberos.conf"); Assertions.assertEquals(0, hiveToAssertWithKerberosResult.getExitCode()); - Container.ExecResult fakeToHiveResult = executeJob("/fake_to_hive_on_hdfs.conf"); + Container.ExecResult fakeToHiveResult = executeJob("/fake_to_hive.conf"); Assertions.assertEquals(1, fakeToHiveResult.getExitCode()); Assertions.assertTrue( fakeToHiveResult .getStderr() .contains("Get hive table information from hive metastore service failed")); - Container.ExecResult hiveToAssertResult = executeJob("/hive_on_hdfs_to_assert.conf"); + Container.ExecResult hiveToAssertResult = executeJob("/hive_to_assert.conf"); Assertions.assertEquals(1, hiveToAssertResult.getExitCode()); Assertions.assertTrue( hiveToAssertResult @@ -316,6 +320,13 @@ public void testFakeSinkHiveOnHDFS() throws Exception { .contains("Get hive table information from hive metastore service failed")); } + @TestTemplate + @Disabled( + "[HDFS/COS/OSS/S3] is not available in CI, if you want to run this test, please set up your own environment in the test case file, hadoop_hive_conf_path_local and ip below}") + public void testFakeSinkHiveOnHDFS(TestContainer container) throws Exception { + // TODO Add the test case for Hive on HDFS + } + @TestTemplate @Disabled( "[HDFS/COS/OSS/S3] is not available in CI, if you want to run this test, please set up your own environment in the test case file, hadoop_hive_conf_path_local and ip below}") diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/fake_to_hive_on_hdfs.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/fake_to_hive.conf similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/fake_to_hive_on_hdfs.conf rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/fake_to_hive.conf diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/fake_to_hive_on_hdfs_with_kerberos.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/fake_to_hive_with_kerberos.conf similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/fake_to_hive_on_hdfs_with_kerberos.conf rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/fake_to_hive_with_kerberos.conf diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/hive_on_hdfs_to_assert.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/hive_to_assert.conf similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/hive_on_hdfs_to_assert.conf rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/hive_to_assert.conf diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/hive_on_hdfs_to_assert_with_kerberos.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/hive_to_assert_with_kerberos.conf similarity index 100% rename from seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/hive_on_hdfs_to_assert_with_kerberos.conf rename to seatunnel-e2e/seatunnel-connector-v2-e2e/connector-hive-e2e/src/test/resources/hive_to_assert_with_kerberos.conf diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java index 26181669fc0..feac8d11cae 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/java/org/apache/seatunnel/connectors/seatunnel/jdbc/JdbcMysqlIT.java @@ -104,7 +104,7 @@ public class JdbcMysqlIT extends AbstractJdbcIT { private static final String CREATE_SQL = "CREATE TABLE IF NOT EXISTS %s\n" + "(\n" - + " `c_bit_1` bit(1) DEFAULT NULL,\n" + + " `c-bit_1` bit(1) DEFAULT NULL,\n" + " `c_bit_8` bit(8) DEFAULT NULL,\n" + " `c_bit_16` bit(16) DEFAULT NULL,\n" + " `c_bit_32` bit(32) DEFAULT NULL,\n" @@ -191,7 +191,7 @@ protected void checkResult( String executeKey, TestContainer container, Container.ExecResult execResult) { String[] fieldNames = new String[] { - "c_bit_1", + "c-bit_1", "c_bit_8", "c_bit_16", "c_bit_32", @@ -249,7 +249,7 @@ String driverUrl() { Pair> initTestData() { String[] fieldNames = new String[] { - "c_bit_1", + "c-bit_1", "c_bit_8", "c_bit_16", "c_bit_32", diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink.conf index a781c8c3f2d..45febb436f4 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink.conf @@ -46,7 +46,7 @@ sink { user = "root" password = "Abc!@#135_seatunnel" - query = """insert into sink (c_bit_1, c_bit_8, c_bit_16, c_bit_32, c_bit_64, c_boolean, c_tinyint, c_tinyint_unsigned, c_smallint, c_smallint_unsigned, + query = """insert into sink (`c-bit_1`, c_bit_8, c_bit_16, c_bit_32, c_bit_64, c_boolean, c_tinyint, c_tinyint_unsigned, c_smallint, c_smallint_unsigned, c_mediumint, c_mediumint_unsigned, c_int, c_integer, c_bigint, c_bigint_unsigned, c_decimal, c_decimal_unsigned, c_float, c_float_unsigned, c_double, c_double_unsigned, c_char, c_tinytext, c_mediumtext, c_text, c_varchar, c_json, c_longtext, c_date, diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink.sql index 84f049bec11..4b0240e3fe0 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink.sql +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink.sql @@ -51,7 +51,7 @@ CREATE TABLE sink_table WITH ( INSERT INTO sink_table - SELECT c_bit_1, c_bit_8, c_bit_16, c_bit_32, c_bit_64, c_boolean, c_tinyint, c_tinyint_unsigned, c_smallint, c_smallint_unsigned, + SELECT `c-bit_1`, c_bit_8, c_bit_16, c_bit_32, c_bit_64, c_boolean, c_tinyint, c_tinyint_unsigned, c_smallint, c_smallint_unsigned, c_mediumint, c_mediumint_unsigned, c_int, c_integer, c_bigint, c_bigint_unsigned, c_decimal, c_decimal_unsigned, c_float, c_float_unsigned, c_double, c_double_unsigned, c_char, c_tinytext, c_mediumtext, c_text, c_varchar, c_json, c_longtext, c_date, diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_parallel.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_parallel.conf index 48474c6dfad..e21c75992c9 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_parallel.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_parallel.conf @@ -45,7 +45,7 @@ sink { user = "root" password = "Abc!@#135_seatunnel" connection_check_timeout_sec = 100 - query = """insert into sink (c_bit_1, c_bit_8, c_bit_16, c_bit_32, c_bit_64, c_boolean, c_tinyint, c_tinyint_unsigned, c_smallint, c_smallint_unsigned, + query = """insert into sink (`c-bit_1`, c_bit_8, c_bit_16, c_bit_32, c_bit_64, c_boolean, c_tinyint, c_tinyint_unsigned, c_smallint, c_smallint_unsigned, c_mediumint, c_mediumint_unsigned, c_int, c_integer, c_bigint, c_bigint_unsigned, c_decimal, c_decimal_unsigned, c_float, c_float_unsigned, c_double, c_double_unsigned, c_char, c_tinytext, c_mediumtext, c_text, c_varchar, c_json, c_longtext, c_date, diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_parallel.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_parallel.sql index bc032b9c22e..33a273f341f 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_parallel.sql +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_parallel.sql @@ -49,7 +49,7 @@ CREATE TABLE sink_table WITH ( CREATE TABLE temp1 AS - SELECT c_bit_1, c_bit_8, c_bit_16, c_bit_32, c_bit_64, c_boolean, c_tinyint, c_tinyint_unsigned, c_smallint, c_smallint_unsigned, + SELECT `c-bit_1`, c_bit_8, c_bit_16, c_bit_32, c_bit_64, c_boolean, c_tinyint, c_tinyint_unsigned, c_smallint, c_smallint_unsigned, c_mediumint, c_mediumint_unsigned, c_int, c_integer, c_bigint, c_bigint_unsigned, c_decimal, c_decimal_unsigned, c_float, c_float_unsigned, c_double, c_double_unsigned, c_char, c_tinytext, c_mediumtext, c_text, c_varchar, c_json, c_longtext, c_date, @@ -58,4 +58,4 @@ CREATE TABLE temp1 AS INSERT INTO sink_table SELECT * FROM temp1; - + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_parallel_upper_lower.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_parallel_upper_lower.conf index cd486d4c4ef..b6b942af18a 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_parallel_upper_lower.conf +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-jdbc-e2e/connector-jdbc-e2e-part-1/src/test/resources/jdbc_mysql_source_and_sink_parallel_upper_lower.conf @@ -46,7 +46,7 @@ sink { user = "root" password = "Abc!@#135_seatunnel" connection_check_timeout_sec = 100 - query = """insert into sink (c_bit_1, c_bit_8, c_bit_16, c_bit_32, c_bit_64, c_boolean, c_tinyint, c_tinyint_unsigned, c_smallint, c_smallint_unsigned, + query = """insert into sink (`c-bit_1`, c_bit_8, c_bit_16, c_bit_32, c_bit_64, c_boolean, c_tinyint, c_tinyint_unsigned, c_smallint, c_smallint_unsigned, c_mediumint, c_mediumint_unsigned, c_int, c_integer, c_bigint, c_bigint_unsigned, c_decimal, c_decimal_unsigned, c_float, c_float_unsigned, c_double, c_double_unsigned, c_char, c_tinytext, c_mediumtext, c_text, c_varchar, c_json, c_longtext, c_date, diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java index 986e5f9f2e5..f9483fd65f3 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/java/org/apache/seatunnel/e2e/connector/kafka/KafkaIT.java @@ -60,6 +60,7 @@ import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.clients.producer.ProducerRecord; +import org.apache.kafka.common.IsolationLevel; import org.apache.kafka.common.TopicPartition; import org.apache.kafka.common.serialization.ByteArrayDeserializer; import org.apache.kafka.common.serialization.ByteArraySerializer; @@ -97,11 +98,14 @@ import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.stream.IntStream; import java.util.stream.Stream; +import static org.awaitility.Awaitility.await; + @Slf4j public class KafkaIT extends TestSuiteBase implements TestResource { private static final String KAFKA_IMAGE_NAME = "confluentinc/cp-kafka:7.0.9"; @@ -752,6 +756,94 @@ public void testKafkaProtobufToAssert(TestContainer container) }); } + @TestTemplate + @DisabledOnContainer( + type = EngineType.SPARK, + value = {}) + public void testKafkaToKafkaExactlyOnceOnStreaming(TestContainer container) + throws InterruptedException { + String producerTopic = "kafka_topic_exactly_once_1"; + String consumerTopic = "kafka_topic_exactly_once_2"; + String sourceData = "Seatunnel Exactly Once Example"; + for (int i = 0; i < 10; i++) { + ProducerRecord record = + new ProducerRecord<>(producerTopic, null, sourceData.getBytes()); + producer.send(record); + producer.flush(); + } + Long endOffset = 0l; + try (KafkaConsumer consumer = new KafkaConsumer<>(kafkaConsumerConfig())) { + consumer.subscribe(Arrays.asList(producerTopic)); + Map offsets = + consumer.endOffsets(Arrays.asList(new TopicPartition(producerTopic, 0))); + endOffset = offsets.entrySet().iterator().next().getValue(); + } + // async execute + CompletableFuture.supplyAsync( + () -> { + try { + container.executeJob("/kafka/kafka_to_kafka_exactly_once_streaming.conf"); + } catch (Exception e) { + log.error("Commit task exception :" + e.getMessage()); + throw new RuntimeException(e); + } + return null; + }); + TimeUnit.MINUTES.sleep(5); + // wait for data written to kafka + Long finalEndOffset = endOffset; + await().atMost(5, TimeUnit.MINUTES) + .pollInterval(5000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertTrue( + checkData(consumerTopic, finalEndOffset, sourceData))); + } + + @TestTemplate + public void testKafkaToKafkaExactlyOnceOnBatch(TestContainer container) + throws InterruptedException, IOException { + String producerTopic = "kafka_topic_exactly_once_1"; + String consumerTopic = "kafka_topic_exactly_once_2"; + String sourceData = "Seatunnel Exactly Once Example"; + for (int i = 0; i < 10; i++) { + ProducerRecord record = + new ProducerRecord<>(producerTopic, null, sourceData.getBytes()); + producer.send(record); + producer.flush(); + } + Long endOffset; + try (KafkaConsumer consumer = new KafkaConsumer<>(kafkaConsumerConfig())) { + consumer.subscribe(Arrays.asList(producerTopic)); + Map offsets = + consumer.endOffsets(Arrays.asList(new TopicPartition(producerTopic, 0))); + endOffset = offsets.entrySet().iterator().next().getValue(); + } + Container.ExecResult execResult = + container.executeJob("/kafka/kafka_to_kafka_exactly_once_batch.conf"); + Assertions.assertEquals(0, execResult.getExitCode()); + // wait for data written to kafka + Assertions.assertTrue(checkData(consumerTopic, endOffset, sourceData)); + } + + // Compare the values of data fields obtained from consumers + private boolean checkData(String topicName, long endOffset, String data) { + List listData = getKafkaConsumerListData(topicName, endOffset); + if (listData.isEmpty() || listData.size() != endOffset) { + log.error( + "testKafkaToKafkaExactlyOnce get data size is not expect,get consumer data size {}", + listData.size()); + return false; + } + for (String value : listData) { + if (!data.equals(value)) { + log.error("testKafkaToKafkaExactlyOnce get data value is not expect"); + return false; + } + } + return true; + } + private @NotNull DefaultSeaTunnelRowSerializer getDefaultSeaTunnelRowSerializer( String topic, SeaTunnelRowType seaTunnelRowType, ReadonlyConfig readonlyConfig) { // Create serializer @@ -934,6 +1026,10 @@ private Properties kafkaConsumerConfig() { props.put( ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, OffsetResetStrategy.EARLIEST.toString().toLowerCase()); + // exactly once semantics must set config read_commit + props.put( + ConsumerConfig.ISOLATION_LEVEL_CONFIG, + IsolationLevel.READ_COMMITTED.name().toLowerCase()); props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class); return props; @@ -1067,6 +1163,24 @@ private List getKafkaConsumerListData(String topicName) { return data; } + private List getKafkaConsumerListData(String topicName, long endOffset) { + List data = new ArrayList<>(); + try (KafkaConsumer consumer = new KafkaConsumer<>(kafkaConsumerConfig())) { + consumer.subscribe(Arrays.asList(topicName)); + Long lastProcessedOffset = -1L; + do { + ConsumerRecords records = consumer.poll(Duration.ofMillis(100)); + for (ConsumerRecord record : records) { + if (lastProcessedOffset < record.offset()) { + data.add(record.value()); + } + lastProcessedOffset = record.offset(); + } + } while (lastProcessedOffset < endOffset - 1); + } + return data; + } + private List getKafkaSTRow(String topicName, ConsumerRecordConverter converter) { List data = new ArrayList<>(); try (KafkaConsumer consumer = diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/kafka/kafka_to_kafka_exactly_once_batch.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/kafka/kafka_to_kafka_exactly_once_batch.conf new file mode 100644 index 00000000000..9965f65d924 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/kafka/kafka_to_kafka_exactly_once_batch.conf @@ -0,0 +1,42 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +env { + parallelism = 1 + job.mode = "BATCH" + } + +source { + Kafka { + bootstrap.servers = "kafkaCluster:9092" + topic = "kafka_topic_exactly_once_1" + # The default format is json, which is optional + format = text + start_mode = earliest + } + +} +transform {} + + +sink{ + kafka { + format = text + topic = "kafka_topic_exactly_once_2" + bootstrap.servers = "kafkaCluster:9092" + semantics = EXACTLY_ONCE + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/kafka/kafka_to_kafka_exactly_once_streaming.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/kafka/kafka_to_kafka_exactly_once_streaming.conf new file mode 100644 index 00000000000..6d039972682 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-kafka-e2e/src/test/resources/kafka/kafka_to_kafka_exactly_once_streaming.conf @@ -0,0 +1,44 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +env { + parallelism = 1 + job.mode = "STREAMING" + checkpoint.interval = 5000 + checkpoint.timeout = 60000 + } + +source { + Kafka { + bootstrap.servers = "kafkaCluster:9092" + topic = "kafka_topic_exactly_once_1" + # The default format is json, which is optional + format = text + start_mode = earliest + } + +} +transform {} + + +sink{ + kafka { + format = text + topic = "kafka_topic_exactly_once_2" + bootstrap.servers = "kafkaCluster:9092" + semantics = EXACTLY_ONCE + } +} \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/pom.xml b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/pom.xml index 71784966f81..c1ce438be3a 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/pom.xml +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/pom.xml @@ -30,6 +30,18 @@ 8.5.6 + + + + org.apache.seatunnel + connector-jdbc + ${project.version} + pom + import + + + + @@ -77,6 +89,34 @@ ${project.version} test + + + org.apache.seatunnel + connector-cdc-mysql + ${project.version} + test + + + + org.apache.seatunnel + connector-cdc-mysql + ${project.version} + test-jar + test + + + + org.testcontainers + mysql + ${testcontainer.version} + + + + mysql + mysql-connector-java + test + + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/AbstractPaimonIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/AbstractPaimonIT.java new file mode 100644 index 00000000000..98323e9df15 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/AbstractPaimonIT.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.connector.paimon; + +import org.apache.seatunnel.common.utils.FileUtils; +import org.apache.seatunnel.core.starter.utils.CompressionUtils; +import org.apache.seatunnel.e2e.common.TestSuiteBase; +import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; + +import org.apache.commons.compress.archivers.ArchiveException; +import org.apache.paimon.catalog.Catalog; +import org.apache.paimon.catalog.CatalogContext; +import org.apache.paimon.catalog.CatalogFactory; +import org.apache.paimon.catalog.Identifier; +import org.apache.paimon.options.Options; +import org.apache.paimon.table.Table; + +import lombok.extern.slf4j.Slf4j; + +import java.io.File; +import java.io.IOException; + +@Slf4j +public abstract class AbstractPaimonIT extends TestSuiteBase { + + protected static String CATALOG_ROOT_DIR = "/tmp/"; + protected static final String NAMESPACE = "paimon"; + protected static final String NAMESPACE_TAR = "paimon.tar.gz"; + protected static final String CATALOG_DIR = CATALOG_ROOT_DIR + NAMESPACE + "/"; + protected static final String TARGET_TABLE = "st_test"; + protected static final String FAKE_TABLE1 = "FakeTable1"; + protected static final String FAKE_DATABASE1 = "FakeDatabase1"; + protected static final String FAKE_TABLE2 = "FakeTable1"; + protected static final String FAKE_DATABASE2 = "FakeDatabase2"; + protected String CATALOG_ROOT_DIR_WIN = "C:/Users/"; + protected String CATALOG_DIR_WIN = CATALOG_ROOT_DIR_WIN + NAMESPACE + "/"; + protected boolean isWindows; + protected boolean changeLogEnabled = false; + + protected final ContainerExtendedFactory containerExtendedFactory = + container -> { + if (isWindows) { + FileUtils.deleteFile(CATALOG_ROOT_DIR_WIN + NAMESPACE_TAR); + FileUtils.deleteFile(CATALOG_ROOT_DIR_WIN + "paimon.tar"); + FileUtils.createNewDir(CATALOG_ROOT_DIR_WIN); + } else { + FileUtils.deleteFile(CATALOG_ROOT_DIR + NAMESPACE_TAR); + FileUtils.createNewDir(CATALOG_DIR); + } + + container.execInContainer( + "sh", + "-c", + "cd " + + CATALOG_ROOT_DIR + + " && tar -czvf " + + NAMESPACE_TAR + + " " + + NAMESPACE); + container.copyFileFromContainer( + CATALOG_ROOT_DIR + NAMESPACE_TAR, + (isWindows ? CATALOG_ROOT_DIR_WIN : CATALOG_ROOT_DIR) + NAMESPACE_TAR); + if (isWindows) { + extractFilesWin(); + } else { + extractFiles(); + } + }; + + private void extractFiles() { + ProcessBuilder processBuilder = new ProcessBuilder(); + processBuilder.command( + "sh", "-c", "cd " + CATALOG_ROOT_DIR + " && tar -zxvf " + NAMESPACE_TAR); + try { + Process process = processBuilder.start(); + // wait command completed + int exitCode = process.waitFor(); + if (exitCode == 0) { + log.info("Extract files successful."); + } else { + log.error("Extract files failed with exit code " + exitCode); + } + } catch (IOException | InterruptedException e) { + e.printStackTrace(); + } + } + + private void extractFilesWin() { + try { + CompressionUtils.unGzip( + new File(CATALOG_ROOT_DIR_WIN + NAMESPACE_TAR), new File(CATALOG_ROOT_DIR_WIN)); + CompressionUtils.unTar( + new File(CATALOG_ROOT_DIR_WIN + "paimon.tar"), new File(CATALOG_ROOT_DIR_WIN)); + } catch (IOException | ArchiveException e) { + throw new RuntimeException(e); + } + } + + protected Table getTable(String dbName, String tbName) { + try { + return getCatalog().getTable(getIdentifier(dbName, tbName)); + } catch (Catalog.TableNotExistException e) { + // do something + throw new RuntimeException("table not exist"); + } + } + + private Identifier getIdentifier(String dbName, String tbName) { + return Identifier.create(dbName, tbName); + } + + private Catalog getCatalog() { + Options options = new Options(); + if (isWindows) { + options.set("warehouse", CATALOG_DIR_WIN); + } else { + options.set("warehouse", "file://" + CATALOG_DIR); + } + return CatalogFactory.createCatalog(CatalogContext.create(options)); + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonSinkCDCIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonSinkCDCIT.java index 05d64679317..0453facae5c 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonSinkCDCIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonSinkCDCIT.java @@ -17,26 +17,16 @@ package org.apache.seatunnel.e2e.connector.paimon; -import org.apache.seatunnel.common.utils.FileUtils; import org.apache.seatunnel.common.utils.SeaTunnelException; -import org.apache.seatunnel.core.starter.utils.CompressionUtils; import org.apache.seatunnel.e2e.common.TestResource; -import org.apache.seatunnel.e2e.common.TestSuiteBase; -import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; import org.apache.seatunnel.e2e.common.container.EngineType; import org.apache.seatunnel.e2e.common.container.TestContainer; import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; import org.apache.seatunnel.e2e.common.util.JobIdGenerator; -import org.apache.commons.compress.archivers.ArchiveException; import org.apache.commons.lang3.StringUtils; import org.apache.paimon.CoreOptions; -import org.apache.paimon.catalog.Catalog; -import org.apache.paimon.catalog.CatalogContext; -import org.apache.paimon.catalog.CatalogFactory; -import org.apache.paimon.catalog.Identifier; import org.apache.paimon.data.InternalRow; -import org.apache.paimon.options.Options; import org.apache.paimon.reader.RecordReader; import org.apache.paimon.table.FileStoreTable; import org.apache.paimon.table.Table; @@ -56,8 +46,6 @@ import lombok.extern.slf4j.Slf4j; -import java.io.File; -import java.io.IOException; import java.time.LocalDate; import java.util.ArrayList; import java.util.Arrays; @@ -74,21 +62,7 @@ disabledReason = "Spark and Flink engine can not auto create paimon table on worker node in local file(e.g flink tm) by savemode feature which can lead error") @Slf4j -public class PaimonSinkCDCIT extends TestSuiteBase implements TestResource { - - private static String CATALOG_ROOT_DIR = "/tmp/"; - private static final String NAMESPACE = "paimon"; - private static final String NAMESPACE_TAR = "paimon.tar.gz"; - private static final String CATALOG_DIR = CATALOG_ROOT_DIR + NAMESPACE + "/"; - private static final String TARGET_TABLE = "st_test"; - private static final String FAKE_TABLE1 = "FakeTable1"; - private static final String FAKE_DATABASE1 = "FakeDatabase1"; - private static final String FAKE_TABLE2 = "FakeTable1"; - private static final String FAKE_DATABASE2 = "FakeDatabase2"; - private String CATALOG_ROOT_DIR_WIN = "C:/Users/"; - private String CATALOG_DIR_WIN = CATALOG_ROOT_DIR_WIN + NAMESPACE + "/"; - private boolean isWindows; - private boolean changeLogEnabled = false; +public class PaimonSinkCDCIT extends AbstractPaimonIT implements TestResource { @BeforeAll @Override @@ -676,66 +650,7 @@ public void testChangelogFullCompaction(TestContainer container) throws Exceptio voidCompletableFuture.cancel(true); } - protected final ContainerExtendedFactory containerExtendedFactory = - container -> { - if (isWindows) { - FileUtils.deleteFile(CATALOG_ROOT_DIR_WIN + NAMESPACE_TAR); - FileUtils.deleteFile(CATALOG_ROOT_DIR_WIN + "paimon.tar"); - FileUtils.createNewDir(CATALOG_ROOT_DIR_WIN); - } else { - FileUtils.deleteFile(CATALOG_ROOT_DIR + NAMESPACE_TAR); - FileUtils.createNewDir(CATALOG_DIR); - } - - container.execInContainer( - "sh", - "-c", - "cd " - + CATALOG_ROOT_DIR - + " && tar -czvf " - + NAMESPACE_TAR - + " " - + NAMESPACE); - container.copyFileFromContainer( - CATALOG_ROOT_DIR + NAMESPACE_TAR, - (isWindows ? CATALOG_ROOT_DIR_WIN : CATALOG_ROOT_DIR) + NAMESPACE_TAR); - if (isWindows) { - extractFilesWin(); - } else { - extractFiles(); - } - }; - - private void extractFiles() { - ProcessBuilder processBuilder = new ProcessBuilder(); - processBuilder.command( - "sh", "-c", "cd " + CATALOG_ROOT_DIR + " && tar -zxvf " + NAMESPACE_TAR); - try { - Process process = processBuilder.start(); - // wait command completed - int exitCode = process.waitFor(); - if (exitCode == 0) { - log.info("Extract files successful."); - } else { - log.error("Extract files failed with exit code " + exitCode); - } - } catch (IOException | InterruptedException e) { - e.printStackTrace(); - } - } - - private void extractFilesWin() { - try { - CompressionUtils.unGzip( - new File(CATALOG_ROOT_DIR_WIN + NAMESPACE_TAR), new File(CATALOG_ROOT_DIR_WIN)); - CompressionUtils.unTar( - new File(CATALOG_ROOT_DIR_WIN + "paimon.tar"), new File(CATALOG_ROOT_DIR_WIN)); - } catch (IOException | ArchiveException e) { - throw new RuntimeException(e); - } - } - - private List loadPaimonData(String dbName, String tbName) throws Exception { + protected List loadPaimonData(String dbName, String tbName) throws Exception { FileStoreTable table = (FileStoreTable) getTable(dbName, tbName); ReadBuilder readBuilder = table.newReadBuilder(); TableScan.Plan plan = readBuilder.newScan().plan(); @@ -785,28 +700,4 @@ private List loadPaimonData(String dbName, String tbName) throws E "=========================================================================================="); return result; } - - protected Table getTable(String dbName, String tbName) { - try { - return getCatalog().getTable(getIdentifier(dbName, tbName)); - } catch (Catalog.TableNotExistException e) { - // do something - throw new RuntimeException("table not exist"); - } - } - - private Identifier getIdentifier(String dbName, String tbName) { - return Identifier.create(dbName, tbName); - } - - private Catalog getCatalog() { - Options options = new Options(); - if (isWindows) { - options.set("warehouse", CATALOG_DIR_WIN); - } else { - options.set("warehouse", "file://" + CATALOG_DIR); - } - Catalog catalog = CatalogFactory.createCatalog(CatalogContext.create(options)); - return catalog; - } } diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonSinkWithSchemaEvolutionIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonSinkWithSchemaEvolutionIT.java new file mode 100644 index 00000000000..0b9f6993297 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/java/org/apache/seatunnel/e2e/connector/paimon/PaimonSinkWithSchemaEvolutionIT.java @@ -0,0 +1,502 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.e2e.connector.paimon; + +import org.apache.seatunnel.api.table.catalog.CatalogTable; +import org.apache.seatunnel.api.table.catalog.TablePath; +import org.apache.seatunnel.api.table.catalog.TableSchema; +import org.apache.seatunnel.api.table.converter.BasicTypeDefine; +import org.apache.seatunnel.common.utils.JdbcUrlUtil; +import org.apache.seatunnel.connectors.seatunnel.cdc.mysql.testutils.MySqlContainer; +import org.apache.seatunnel.connectors.seatunnel.cdc.mysql.testutils.MySqlVersion; +import org.apache.seatunnel.connectors.seatunnel.cdc.mysql.testutils.UniqueDatabase; +import org.apache.seatunnel.connectors.seatunnel.jdbc.catalog.mysql.MySqlCatalog; +import org.apache.seatunnel.connectors.seatunnel.paimon.data.PaimonTypeMapper; +import org.apache.seatunnel.e2e.common.TestResource; +import org.apache.seatunnel.e2e.common.container.ContainerExtendedFactory; +import org.apache.seatunnel.e2e.common.container.EngineType; +import org.apache.seatunnel.e2e.common.container.TestContainer; +import org.apache.seatunnel.e2e.common.junit.DisabledOnContainer; +import org.apache.seatunnel.e2e.common.junit.TestContainerExtension; + +import org.apache.paimon.data.BinaryString; +import org.apache.paimon.data.Decimal; +import org.apache.paimon.data.InternalRow; +import org.apache.paimon.data.Timestamp; +import org.apache.paimon.predicate.Predicate; +import org.apache.paimon.predicate.PredicateBuilder; +import org.apache.paimon.reader.RecordReader; +import org.apache.paimon.table.FileStoreTable; +import org.apache.paimon.table.source.ReadBuilder; +import org.apache.paimon.table.source.TableRead; +import org.apache.paimon.table.source.TableScan; +import org.apache.paimon.types.DataField; +import org.apache.paimon.types.DataType; + +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.MethodOrderer; +import org.junit.jupiter.api.TestMethodOrder; +import org.junit.jupiter.api.TestTemplate; +import org.testcontainers.containers.Container; +import org.testcontainers.containers.output.Slf4jLogConsumer; +import org.testcontainers.lifecycle.Startables; +import org.testcontainers.shaded.org.apache.commons.lang3.tuple.ImmutableTriple; +import org.testcontainers.utility.DockerLoggerFactory; + +import lombok.SneakyThrows; +import lombok.extern.slf4j.Slf4j; +import net.sf.jsqlparser.schema.Column; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import static org.awaitility.Awaitility.await; + +@Slf4j +@TestMethodOrder(MethodOrderer.OrderAnnotation.class) +@DisabledOnContainer( + value = {}, + type = {EngineType.SPARK, EngineType.FLINK}, + disabledReason = + "Currently SPARK do not support cdc. In addition, currently only the zeta engine supports schema evolution for pr https://github.com/apache/seatunnel/pull/5125.") +public class PaimonSinkWithSchemaEvolutionIT extends AbstractPaimonIT implements TestResource { + + private static final String MYSQL_DATABASE = "shop"; + private static final String SOURCE_TABLE = "products"; + + private static final String MYSQL_HOST = "mysql_cdc_e2e"; + private static final String MYSQL_USER_NAME = "mysqluser"; + private static final String MYSQL_USER_PASSWORD = "mysqlpw"; + + private static final String QUERY = "select * from %s.%s"; + + private static final MySqlContainer MYSQL_CONTAINER = createMySqlContainer(MySqlVersion.V8_0); + + private final UniqueDatabase shopDatabase = + new UniqueDatabase( + MYSQL_CONTAINER, MYSQL_DATABASE, "mysqluser", "mysqlpw", MYSQL_DATABASE); + + private static MySqlContainer createMySqlContainer(MySqlVersion version) { + return new MySqlContainer(version) + .withConfigurationOverride("docker/server-gtids/my.cnf") + .withSetupSQL("docker/setup.sql") + .withNetwork(NETWORK) + .withNetworkAliases(MYSQL_HOST) + .withDatabaseName(MYSQL_DATABASE) + .withUsername(MYSQL_USER_NAME) + .withPassword(MYSQL_USER_PASSWORD) + .withLogConsumer( + new Slf4jLogConsumer(DockerLoggerFactory.getLogger("mysql-docker-image"))); + } + + private String driverUrl() { + return "https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/8.0.32/mysql-connector-j-8.0.32.jar"; + } + + @TestContainerExtension + protected final ContainerExtendedFactory extendedFactory = + container -> { + Container.ExecResult extraCommands = + container.execInContainer( + "bash", + "-c", + "mkdir -p /tmp/seatunnel/plugins/MySQL-CDC/lib && cd /tmp/seatunnel/plugins/MySQL-CDC/lib && wget " + + driverUrl()); + Assertions.assertEquals(0, extraCommands.getExitCode(), extraCommands.getStderr()); + }; + + @BeforeAll + @Override + public void startUp() throws Exception { + log.info("The second stage: Starting Mysql containers..."); + Startables.deepStart(Stream.of(MYSQL_CONTAINER)).join(); + log.info("Mysql Containers are started"); + shopDatabase.createAndInitialize(); + log.info("Mysql ddl execution is complete"); + } + + @AfterAll + @Override + public void tearDown() { + if (MYSQL_CONTAINER != null) { + MYSQL_CONTAINER.close(); + } + } + + @TestTemplate + public void testMysqlCdcSinkPaimonWithSchemaChange(TestContainer container) throws Exception { + String jobConfigFile = "/mysql_cdc_to_paimon_with_schema_change.conf"; + CompletableFuture.runAsync( + () -> { + try { + container.executeJob(jobConfigFile); + } catch (Exception e) { + log.error("Commit task exception :" + e.getMessage()); + throw new RuntimeException(e); + } + }); + + // Waiting for auto create sink table + Thread.sleep(15000); + + await().atMost(30, TimeUnit.SECONDS) + .untilAsserted( + () -> { + // copy paimon to local + container.executeExtraCommands(containerExtendedFactory); + Assertions.assertIterableEquals( + queryMysql(String.format(QUERY, MYSQL_DATABASE, SOURCE_TABLE)), + queryPaimon(null, 0, Integer.MAX_VALUE)); + }); + + // Case 1: Add columns with data at same time + shopDatabase.setTemplateName("add_columns").createAndInitialize(); + // Because the paimon is not supported default value, so when the source table add columns + // with default value at same time, the history data in paimon has no value. + List> idRangesWithFiledProjection1 = + getIdRangesWithFiledProjectionImmutableTriplesCase1(); + vertifySchemaAndData(container, idRangesWithFiledProjection1); + + // Case 2: Drop columns with data at same time + shopDatabase.setTemplateName("drop_columns").createAndInitialize(); + List> idRangesWithFiledProjection2 = + getIdRangesWithFiledProjectionImmutableTriplesCase2(); + vertifySchemaAndData(container, idRangesWithFiledProjection2); + + // Case 3: Change columns with data at same time + shopDatabase.setTemplateName("change_columns").createAndInitialize(); + List> idRangesWithFiledProjection3 = + getIdRangesWithFiledProjectionImmutableTriplesCase3(); + vertifySchemaAndData(container, idRangesWithFiledProjection3); + + // Case 4: Modify columns with data at same time + shopDatabase.setTemplateName("modify_columns").createAndInitialize(); + List> idRangesWithFiledProjection4 = + getIdRangesWithFiledProjectionImmutableTriplesCase4(); + vertifySchemaAndData(container, idRangesWithFiledProjection4); + } + + private List> + getIdRangesWithFiledProjectionImmutableTriplesCase4() { + List> newIdRangesWithFiledProjection = + getIdRangesWithFiledProjectionImmutableTriplesCase3(); + newIdRangesWithFiledProjection.add( + ImmutableTriple.of( + new String[] {"id", "name", "description", "weight", "add_column"}, + 164, + 172)); + return newIdRangesWithFiledProjection; + } + + private List> + getIdRangesWithFiledProjectionImmutableTriplesCase3() { + String changeColumnNameBefore = "add_column2"; + String changeColumnNameAfter = "add_column"; + List> + idRangesWithFiledProjectionImmutableTriplesCase2 = + getIdRangesWithFiledProjectionImmutableTriplesCase2(); + List> newIdRangesWithFiledProjection = + idRangesWithFiledProjectionImmutableTriplesCase2.stream() + .map( + immutableTriple -> + ImmutableTriple.of( + Arrays.stream(immutableTriple.getLeft()) + .map( + column -> + column.equals( + changeColumnNameBefore) + ? changeColumnNameAfter + : column) + .toArray(String[]::new), + immutableTriple.getMiddle(), + immutableTriple.getRight())) + .collect(Collectors.toList()); + newIdRangesWithFiledProjection.add( + ImmutableTriple.of( + new String[] {"id", "name", "description", "weight", "add_column"}, + 155, + 163)); + return newIdRangesWithFiledProjection; + } + + private List> + getIdRangesWithFiledProjectionImmutableTriplesCase2() { + List dropColumnNames = + Arrays.asList("add_column4", "add_column6", "add_column1", "add_column3"); + List> + idRangesWithFiledProjectionImmutableTriplesCase1 = + getIdRangesWithFiledProjectionImmutableTriplesCase1(); + List> newIdRangesWithFiledProjection = + idRangesWithFiledProjectionImmutableTriplesCase1.stream() + .map( + immutableTriple -> + ImmutableTriple.of( + Arrays.stream(immutableTriple.getLeft()) + .filter( + column -> + !dropColumnNames.contains( + column)) + .toArray(String[]::new), + immutableTriple.getMiddle(), + immutableTriple.getRight())) + .collect(Collectors.toList()); + + newIdRangesWithFiledProjection.add( + ImmutableTriple.of( + new String[] {"id", "name", "description", "weight", "add_column2"}, + 137, + 154)); + return newIdRangesWithFiledProjection; + } + + private static List> + getIdRangesWithFiledProjectionImmutableTriplesCase1() { + return new ArrayList>() { + { + add( + ImmutableTriple.of( + new String[] {"id", "name", "description", "weight"}, 0, 118)); + add( + ImmutableTriple.of( + new String[] { + "id", + "name", + "description", + "weight", + "add_column1", + "add_column2" + }, + 119, + 127)); + add( + ImmutableTriple.of( + new String[] { + "id", + "name", + "description", + "weight", + "add_column1", + "add_column2", + "add_column3", + "add_column4" + }, + 128, + 136)); + add( + ImmutableTriple.of( + new String[] { + "id", + "add_column6", + "name", + "description", + "weight", + "add_column1", + "add_column2", + "add_column3", + "add_column4" + }, + 173, + 181)); + } + }; + } + + private void vertifySchemaAndData( + TestContainer container, + List> idRangesWithFiledProjection) { + await().atMost(30, TimeUnit.SECONDS) + .untilAsserted( + () -> { + // copy paimon to local + container.executeExtraCommands(containerExtendedFactory); + // 1. Vertify the schema + vertifySchema(); + + // 2. Vertify the data + idRangesWithFiledProjection.forEach( + idRange -> + Assertions.assertIterableEquals( + queryMysql( + String.format( + "select " + + String.join( + ",", + Arrays.asList( + idRange + .getLeft())) + + " from %s.%s where id >= %s and id <= %s", + MYSQL_DATABASE, + SOURCE_TABLE, + idRange.getMiddle(), + idRange.getRight())), + queryPaimon( + idRange.getLeft(), + idRange.getMiddle(), + idRange.getRight()))); + }); + } + + private void vertifySchema() { + try (MySqlCatalog mySqlCatalog = + new MySqlCatalog( + "mysql", + MYSQL_USER_NAME, + MYSQL_USER_PASSWORD, + JdbcUrlUtil.getUrlInfo(MYSQL_CONTAINER.getJdbcUrl()))) { + mySqlCatalog.open(); + CatalogTable mySqlCatalogTable = + mySqlCatalog.getTable(TablePath.of(MYSQL_DATABASE, SOURCE_TABLE)); + TableSchema tableSchemaInMysql = mySqlCatalogTable.getTableSchema(); + + List columns = + tableSchemaInMysql.getColumns(); + FileStoreTable table = (FileStoreTable) getTable("mysql_to_paimon", "products"); + List fields = table.schema().fields(); + + Assertions.assertEquals(fields.size(), columns.size()); + for (int i = 0; i < columns.size(); i++) { + BasicTypeDefine paimonTypeDefine = + PaimonTypeMapper.INSTANCE.reconvert(columns.get(i)); + DataField dataField = fields.get(i); + Assertions.assertEquals(paimonTypeDefine.getName(), dataField.name()); + Assertions.assertEquals( + dataField.type().getTypeRoot(), + paimonTypeDefine.getNativeType().getTypeRoot()); + } + } + } + + private int getColumnIndex(PredicateBuilder builder, Column column) { + int index = builder.indexOf(column.getColumnName()); + if (index == -1) { + throw new IllegalArgumentException( + String.format("The column named [%s] is not exists", column.getColumnName())); + } + return index; + } + + @SneakyThrows + protected List> queryPaimon( + String[] projectionFiles, int lowerBound, int upperBound) { + FileStoreTable table = (FileStoreTable) getTable("mysql_to_paimon", "products"); + Predicate finalPredicate = getPredicateWithBound(lowerBound, upperBound, table); + ReadBuilder readBuilder = table.newReadBuilder().withFilter(finalPredicate); + List fields = table.schema().fields(); + if (projectionFiles != null && projectionFiles.length > 0) { + readBuilder.withProjection( + getProjectionIndex(table.schema().fieldNames(), projectionFiles)); + fields = + table.schema().fields().stream() + .filter( + dataField -> + Arrays.asList(projectionFiles) + .contains(dataField.name())) + .collect(Collectors.toList()); + } + TableScan.Plan plan = readBuilder.newScan().plan(); + TableRead tableRead = readBuilder.newRead(); + + List> results = new ArrayList<>(); + try (RecordReader reader = tableRead.executeFilter().createReader(plan)) { + List finalFields = fields; + reader.forEachRemaining( + row -> { + List rowRecords = new ArrayList<>(); + for (int i = 0; i < finalFields.size(); i++) { + Object fieldOrNull = + InternalRow.createFieldGetter(finalFields.get(i).type(), i) + .getFieldOrNull(row); + if (fieldOrNull instanceof BinaryString) { + fieldOrNull = ((BinaryString) fieldOrNull).toString(); + } else if (fieldOrNull instanceof Timestamp) { + fieldOrNull = ((Timestamp) fieldOrNull).toSQLTimestamp(); + } else if (fieldOrNull instanceof Decimal) { + fieldOrNull = ((Decimal) fieldOrNull).toBigDecimal(); + } + rowRecords.add(fieldOrNull); + } + results.add(rowRecords); + }); + } + return results; + } + + private Predicate getPredicateWithBound(int lowerBound, int upperBound, FileStoreTable table) { + PredicateBuilder lowerBoundPredicateBuilder = new PredicateBuilder(table.rowType()); + Predicate lowerBoundPredicate = + lowerBoundPredicateBuilder.greaterOrEqual( + getColumnIndex(lowerBoundPredicateBuilder, new Column("id")), lowerBound); + + PredicateBuilder upperBoundPredicateBuilder = new PredicateBuilder(table.rowType()); + Predicate upperBoundPredicate = + upperBoundPredicateBuilder.lessOrEqual( + getColumnIndex(upperBoundPredicateBuilder, new Column("id")), upperBound); + + return PredicateBuilder.and(lowerBoundPredicate, upperBoundPredicate); + } + + private int[] getProjectionIndex(List actualFieldNames, String[] projectionFieldNames) { + return Arrays.stream(projectionFieldNames) + .mapToInt( + projectionFieldName -> { + int index = actualFieldNames.indexOf(projectionFieldName); + if (index == -1) { + throw new IllegalArgumentException( + "column " + projectionFieldName + " does not exist."); + } + return index; + }) + .toArray(); + } + + private Connection getJdbcConnection() throws SQLException { + return DriverManager.getConnection( + MYSQL_CONTAINER.getJdbcUrl(), + MYSQL_CONTAINER.getUsername(), + MYSQL_CONTAINER.getPassword()); + } + + private List> queryMysql(String sql) { + try (Connection connection = getJdbcConnection()) { + ResultSet resultSet = connection.createStatement().executeQuery(sql); + List> result = new ArrayList<>(); + int columnCount = resultSet.getMetaData().getColumnCount(); + while (resultSet.next()) { + ArrayList objects = new ArrayList<>(); + for (int i = 1; i <= columnCount; i++) { + objects.add(resultSet.getObject(i)); + } + result.add(objects); + } + return result; + } catch (SQLException e) { + throw new RuntimeException(e); + } + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/add_columns.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/add_columns.sql new file mode 100644 index 00000000000..d08c19c8ca4 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/add_columns.sql @@ -0,0 +1,82 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +-- ---------------------------------------------------------------------------------------------------------------- +-- DATABASE: shop +-- ---------------------------------------------------------------------------------------------------------------- +CREATE DATABASE IF NOT EXISTS `shop`; +use shop; +INSERT INTO products +VALUES (110,"scooter","Small 2-wheel scooter",3.14), + (111,"car battery","12V car battery",8.1), + (112,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8), + (113,"hammer","12oz carpenter's hammer",0.75), + (114,"hammer","14oz carpenter's hammer",0.875), + (115,"hammer","16oz carpenter's hammer",1.0), + (116,"rocks","box of assorted rocks",5.3), + (117,"jacket","water resistent black wind breaker",0.1), + (118,"spare tire","24 inch spare tire",22.2); +update products set name = 'dailai' where id = 101; +delete from products where id = 102; + +alter table products ADD COLUMN add_column1 varchar(64) not null default 'yy',ADD COLUMN add_column2 int not null default 1; + +update products set name = 'dailai' where id = 110; +insert into products +values (119,"scooter","Small 2-wheel scooter",3.14,'xx',1), + (120,"car battery","12V car battery",8.1,'xx',2), + (121,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8,'xx',3), + (122,"hammer","12oz carpenter's hammer",0.75,'xx',4), + (123,"hammer","14oz carpenter's hammer",0.875,'xx',5), + (124,"hammer","16oz carpenter's hammer",1.0,'xx',6), + (125,"rocks","box of assorted rocks",5.3,'xx',7), + (126,"jacket","water resistent black wind breaker",0.1,'xx',8), + (127,"spare tire","24 inch spare tire",22.2,'xx',9); +delete from products where id = 118; + +alter table products ADD COLUMN add_column3 float not null default 1.1; +alter table products ADD COLUMN add_column4 timestamp not null default current_timestamp(); + +delete from products where id = 113; +insert into products +values (128,"scooter","Small 2-wheel scooter",3.14,'xx',1,1.1,'2023-02-02 09:09:09'), + (129,"car battery","12V car battery",8.1,'xx',2,1.2,'2023-02-02 09:09:09'), + (130,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8,'xx',3,1.3,'2023-02-02 09:09:09'), + (131,"hammer","12oz carpenter's hammer",0.75,'xx',4,1.4,'2023-02-02 09:09:09'), + (132,"hammer","14oz carpenter's hammer",0.875,'xx',5,1.5,'2023-02-02 09:09:09'), + (133,"hammer","16oz carpenter's hammer",1.0,'xx',6,1.6,'2023-02-02 09:09:09'), + (134,"rocks","box of assorted rocks",5.3,'xx',7,1.7,'2023-02-02 09:09:09'), + (135,"jacket","water resistent black wind breaker",0.1,'xx',8,1.8,'2023-02-02 09:09:09'), + (136,"spare tire","24 inch spare tire",22.2,'xx',9,1.9,'2023-02-02 09:09:09'); +update products set name = 'dailai' where id = 135; + +alter table products ADD COLUMN add_column6 varchar(64) not null default 'ff' after id; +delete from products where id = 115; +insert into products +values (173,'tt',"scooter","Small 2-wheel scooter",3.14,'xx',1,1.1,'2023-02-02 09:09:09'), + (174,'tt',"car battery","12V car battery",8.1,'xx',2,1.2,'2023-02-02 09:09:09'), + (175,'tt',"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8,'xx',3,1.3,'2023-02-02 09:09:09'), + (176,'tt',"hammer","12oz carpenter's hammer",0.75,'xx',4,1.4,'2023-02-02 09:09:09'), + (177,'tt',"hammer","14oz carpenter's hammer",0.875,'xx',5,1.5,'2023-02-02 09:09:09'), + (178,'tt',"hammer","16oz carpenter's hammer",1.0,'xx',6,1.6,'2023-02-02 09:09:09'), + (179,'tt',"rocks","box of assorted rocks",5.3,'xx',7,1.7,'2023-02-02 09:09:09'), + (180,'tt',"jacket","water resistent black wind breaker",0.1,'xx',8,1.8,'2023-02-02 09:09:09'), + (181,'tt',"spare tire","24 inch spare tire",22.2,'xx',9,1.9,'2023-02-02 09:09:09'); + +-- add column for irrelevant table +ALTER TABLE products_on_hand ADD COLUMN add_column5 varchar(64) not null default 'yy'; + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/change_columns.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/change_columns.sql new file mode 100644 index 00000000000..a17f9a0a936 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/change_columns.sql @@ -0,0 +1,36 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +-- ---------------------------------------------------------------------------------------------------------------- +-- DATABASE: shop +-- ---------------------------------------------------------------------------------------------------------------- +CREATE DATABASE IF NOT EXISTS `shop`; +use shop; + +alter table products change add_column2 add_column int default 1 not null; +delete from products where id < 155; +insert into products +values (155,"scooter","Small 2-wheel scooter",3.14,1), + (156,"car battery","12V car battery",8.1,2), + (157,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8,3), + (158,"hammer","12oz carpenter's hammer",0.75,4), + (159,"hammer","14oz carpenter's hammer",0.875,5), + (160,"hammer","16oz carpenter's hammer",1.0,6), + (161,"rocks","box of assorted rocks",5.3,7), + (162,"jacket","water resistent black wind breaker",0.1,8), + (163,"spare tire","24 inch spare tire",22.2,9); + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/drop_columns.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/drop_columns.sql new file mode 100644 index 00000000000..5c3b7d1f549 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/drop_columns.sql @@ -0,0 +1,50 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +-- ---------------------------------------------------------------------------------------------------------------- +-- DATABASE: shop +-- ---------------------------------------------------------------------------------------------------------------- +CREATE DATABASE IF NOT EXISTS `shop`; +use shop; + +alter table products drop column add_column4,drop column add_column6; +insert into products +values (137,"scooter","Small 2-wheel scooter",3.14,'xx',1,1.1), + (138,"car battery","12V car battery",8.1,'xx',2,1.2), + (139,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8,'xx',3,1.3), + (140,"hammer","12oz carpenter's hammer",0.75,'xx',4,1.4), + (141,"hammer","14oz carpenter's hammer",0.875,'xx',5,1.5), + (142,"hammer","16oz carpenter's hammer",1.0,'xx',6,1.6), + (143,"rocks","box of assorted rocks",5.3,'xx',7,1.7), + (144,"jacket","water resistent black wind breaker",0.1,'xx',8,1.8), + (145,"spare tire","24 inch spare tire",22.2,'xx',9,1.9); +update products set name = 'dailai' where id in (140,141,142); +delete from products where id < 137; + + +alter table products drop column add_column1,drop column add_column3; +insert into products +values (146,"scooter","Small 2-wheel scooter",3.14,1), + (147,"car battery","12V car battery",8.1,2), + (148,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8,3), + (149,"hammer","12oz carpenter's hammer",0.75,4), + (150,"hammer","14oz carpenter's hammer",0.875,5), + (151,"hammer","16oz carpenter's hammer",1.0,6), + (152,"rocks","box of assorted rocks",5.3,7), + (153,"jacket","water resistent black wind breaker",0.1,8), + (154,"spare tire","24 inch spare tire",22.2,9); +update products set name = 'dailai' where id > 143; diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/inventory.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/inventory.sql new file mode 100644 index 00000000000..a0a225981f3 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/inventory.sql @@ -0,0 +1,94 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +-- ---------------------------------------------------------------------------------------------------------------- +-- DATABASE: inventory +-- ---------------------------------------------------------------------------------------------------------------- +-- Create and populate our products using a single insert with many rows +CREATE TABLE products ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255) NOT NULL DEFAULT 'SeaTunnel', + description VARCHAR(512), + weight FLOAT +); +ALTER TABLE products AUTO_INCREMENT = 101; + +INSERT INTO products +VALUES (default,"scooter","Small 2-wheel scooter",3.14), + (default,"car battery","12V car battery",8.1), + (default,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8), + (default,"hammer","12oz carpenter's hammer",0.75), + (default,"hammer","14oz carpenter's hammer",0.875), + (default,"hammer","16oz carpenter's hammer",1.0), + (default,"rocks","box of assorted rocks",5.3), + (default,"jacket","water resistent black wind breaker",0.1), + (default,"spare tire","24 inch spare tire",22.2); + +-- Create and populate the products on hand using multiple inserts +CREATE TABLE products_on_hand ( + product_id INTEGER NOT NULL PRIMARY KEY, + quantity INTEGER NOT NULL, + FOREIGN KEY (product_id) REFERENCES products(id) +); + +INSERT INTO products_on_hand VALUES (101,3); +INSERT INTO products_on_hand VALUES (102,8); +INSERT INTO products_on_hand VALUES (103,18); +INSERT INTO products_on_hand VALUES (104,4); +INSERT INTO products_on_hand VALUES (105,5); +INSERT INTO products_on_hand VALUES (106,0); +INSERT INTO products_on_hand VALUES (107,44); +INSERT INTO products_on_hand VALUES (108,2); +INSERT INTO products_on_hand VALUES (109,5); + +-- Create some customers ... +CREATE TABLE customers ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + first_name VARCHAR(255) NOT NULL, + last_name VARCHAR(255) NOT NULL, + email VARCHAR(255) NOT NULL UNIQUE KEY +) AUTO_INCREMENT=1001; + + +INSERT INTO customers +VALUES (default,"Sally","Thomas","sally.thomas@acme.com"), + (default,"George","Bailey","gbailey@foobar.com"), + (default,"Edward","Walker","ed@walker.com"), + (default,"Anne","Kretchmar","annek@noanswer.org"); + +-- Create some very simple orders +CREATE TABLE orders ( + order_number INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + order_date DATE NOT NULL, + purchaser INTEGER NOT NULL, + quantity INTEGER NOT NULL, + product_id INTEGER NOT NULL, + FOREIGN KEY order_customer (purchaser) REFERENCES customers(id), + FOREIGN KEY ordered_product (product_id) REFERENCES products(id) +) AUTO_INCREMENT = 10001; + +INSERT INTO orders +VALUES (default, '2016-01-16', 1001, 1, 102), + (default, '2016-01-17', 1002, 2, 105), + (default, '2016-02-18', 1004, 3, 109), + (default, '2016-02-19', 1002, 2, 106), + (default, '16-02-21', 1003, 1, 107); + +CREATE TABLE category ( + id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, + category_name VARCHAR(255) +); diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/modify_columns.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/modify_columns.sql new file mode 100644 index 00000000000..ab64c47567b --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/modify_columns.sql @@ -0,0 +1,36 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +-- ---------------------------------------------------------------------------------------------------------------- +-- DATABASE: shop +-- ---------------------------------------------------------------------------------------------------------------- +CREATE DATABASE IF NOT EXISTS `shop`; +use shop; + +alter table products modify name longtext null; +delete from products where id < 155; +insert into products +values (164,"scooter","Small 2-wheel scooter",3.14,1), + (165,"car battery","12V car battery",8.1,2), + (166,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8,3), + (167,"hammer","12oz carpenter's hammer",0.75,4), + (168,"hammer","14oz carpenter's hammer",0.875,5), + (169,"hammer","16oz carpenter's hammer",1.0,6), + (170,"rocks","box of assorted rocks",5.3,7), + (171,"jacket","water resistent black wind breaker",0.1,8), + (172,"spare tire","24 inch spare tire",22.2,9); + diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/mysql_cdc.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/mysql_cdc.sql new file mode 100644 index 00000000000..25d7abca0da --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/mysql_cdc.sql @@ -0,0 +1,683 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +-- ---------------------------------------------------------------------------------------------------------------- +-- DATABASE: inventory +-- ---------------------------------------------------------------------------------------------------------------- +CREATE DATABASE IF NOT EXISTS `mysql_cdc`; + +use mysql_cdc; +-- Create a mysql data source table +CREATE TABLE mysql_cdc_e2e_source_table +( + `id` int NOT NULL AUTO_INCREMENT, + `f_binary` binary(64) DEFAULT NULL, + `f_blob` blob, + `f_long_varbinary` mediumblob, + `f_longblob` longblob, + `f_tinyblob` tinyblob, + `f_varbinary` varbinary(100) DEFAULT NULL, + `f_smallint` smallint DEFAULT NULL, + `f_smallint_unsigned` smallint unsigned DEFAULT NULL, + `f_mediumint` mediumint DEFAULT NULL, + `f_mediumint_unsigned` mediumint unsigned DEFAULT NULL, + `f_int` int DEFAULT NULL, + `f_int_unsigned` int unsigned DEFAULT NULL, + `f_integer` int DEFAULT NULL, + `f_integer_unsigned` int unsigned DEFAULT NULL, + `f_bigint` bigint DEFAULT NULL, + `f_bigint_unsigned` bigint unsigned DEFAULT NULL, + `f_numeric` decimal(10, 0) DEFAULT NULL, + `f_decimal` decimal(10, 0) DEFAULT NULL, + `f_float` float DEFAULT NULL, + `f_double` double DEFAULT NULL, + `f_double_precision` double DEFAULT NULL, + `f_longtext` longtext, + `f_mediumtext` mediumtext, + `f_text` text, + `f_tinytext` tinytext, + `f_varchar` varchar(100) collate gbk_bin DEFAULT NULL, + `f_date` date DEFAULT NULL, + `f_datetime` datetime DEFAULT NULL, + `f_timestamp` timestamp NULL DEFAULT NULL, + `f_bit1` bit(1) DEFAULT NULL, + `f_bit64` bit(64) DEFAULT NULL, + `f_char` char(1) DEFAULT NULL, + `f_enum` enum ('enum1','enum2','enum3') DEFAULT NULL, + `f_mediumblob` mediumblob, + `f_long_varchar` mediumtext, + `f_real` double DEFAULT NULL, + `f_time` time DEFAULT NULL, + `f_tinyint` tinyint DEFAULT NULL, + `f_tinyint_unsigned` tinyint unsigned DEFAULT NULL, + `f_json` json DEFAULT NULL, + `f_year` year DEFAULT NULL, + PRIMARY KEY (`id`), + UNIQUE KEY uniq_key_f (`id`, `f_int`, `f_bigint`) USING BTREE +) ENGINE = InnoDB + AUTO_INCREMENT = 2 + DEFAULT CHARSET = utf8mb4 + COLLATE = utf8mb4_0900_ai_ci; + +CREATE TABLE mysql_cdc_e2e_source_table2 +( + `id` int NOT NULL AUTO_INCREMENT, + `f_binary` binary(64) DEFAULT NULL, + `f_blob` blob, + `f_long_varbinary` mediumblob, + `f_longblob` longblob, + `f_tinyblob` tinyblob, + `f_varbinary` varbinary(100) DEFAULT NULL, + `f_smallint` smallint DEFAULT NULL, + `f_smallint_unsigned` smallint unsigned DEFAULT NULL, + `f_mediumint` mediumint DEFAULT NULL, + `f_mediumint_unsigned` mediumint unsigned DEFAULT NULL, + `f_int` int DEFAULT NULL, + `f_int_unsigned` int unsigned DEFAULT NULL, + `f_integer` int DEFAULT NULL, + `f_integer_unsigned` int unsigned DEFAULT NULL, + `f_bigint` bigint DEFAULT NULL, + `f_bigint_unsigned` bigint unsigned DEFAULT NULL, + `f_numeric` decimal(10, 0) DEFAULT NULL, + `f_decimal` decimal(10, 0) DEFAULT NULL, + `f_float` float DEFAULT NULL, + `f_double` double DEFAULT NULL, + `f_double_precision` double DEFAULT NULL, + `f_longtext` longtext, + `f_mediumtext` mediumtext, + `f_text` text, + `f_tinytext` tinytext, + `f_varchar` varchar(100) DEFAULT NULL, + `f_date` date DEFAULT NULL, + `f_datetime` datetime DEFAULT NULL, + `f_timestamp` timestamp NULL DEFAULT NULL, + `f_bit1` bit(1) DEFAULT NULL, + `f_bit64` bit(64) DEFAULT NULL, + `f_char` char(1) DEFAULT NULL, + `f_enum` enum ('enum1','enum2','enum3') DEFAULT NULL, + `f_mediumblob` mediumblob, + `f_long_varchar` mediumtext, + `f_real` double DEFAULT NULL, + `f_time` time DEFAULT NULL, + `f_tinyint` tinyint DEFAULT NULL, + `f_tinyint_unsigned` tinyint unsigned DEFAULT NULL, + `f_json` json DEFAULT NULL, + `f_year` year DEFAULT NULL, + PRIMARY KEY (`id`), + UNIQUE KEY uniq_key_f (`id`, `f_int`, `f_bigint`) USING BTREE +) ENGINE = InnoDB + AUTO_INCREMENT = 2 + DEFAULT CHARSET = utf8mb4 + COLLATE = utf8mb4_0900_ai_ci; + +CREATE TABLE mysql_cdc_e2e_source_table_no_primary_key +( + `id` int NOT NULL, + `f_binary` binary(64) DEFAULT NULL, + `f_blob` blob, + `f_long_varbinary` mediumblob, + `f_longblob` longblob, + `f_tinyblob` tinyblob, + `f_varbinary` varbinary(100) DEFAULT NULL, + `f_smallint` smallint DEFAULT NULL, + `f_smallint_unsigned` smallint unsigned DEFAULT NULL, + `f_mediumint` mediumint DEFAULT NULL, + `f_mediumint_unsigned` mediumint unsigned DEFAULT NULL, + `f_int` int DEFAULT NULL, + `f_int_unsigned` int unsigned DEFAULT NULL, + `f_integer` int DEFAULT NULL, + `f_integer_unsigned` int unsigned DEFAULT NULL, + `f_bigint` bigint DEFAULT NULL, + `f_bigint_unsigned` bigint unsigned DEFAULT NULL, + `f_numeric` decimal(10, 0) DEFAULT NULL, + `f_decimal` decimal(10, 0) DEFAULT NULL, + `f_float` float DEFAULT NULL, + `f_double` double DEFAULT NULL, + `f_double_precision` double DEFAULT NULL, + `f_longtext` longtext, + `f_mediumtext` mediumtext, + `f_text` text, + `f_tinytext` tinytext, + `f_varchar` varchar(100) DEFAULT NULL, + `f_date` date DEFAULT NULL, + `f_datetime` datetime DEFAULT NULL, + `f_timestamp` timestamp NULL DEFAULT NULL, + `f_bit1` bit(1) DEFAULT NULL, + `f_bit64` bit(64) DEFAULT NULL, + `f_char` char(1) DEFAULT NULL, + `f_enum` enum ('enum1','enum2','enum3') DEFAULT NULL, + `f_mediumblob` mediumblob, + `f_long_varchar` mediumtext, + `f_real` double DEFAULT NULL, + `f_time` time DEFAULT NULL, + `f_tinyint` tinyint DEFAULT NULL, + `f_tinyint_unsigned` tinyint unsigned DEFAULT NULL, + `f_json` json DEFAULT NULL, + `f_year` year DEFAULT NULL +) ENGINE = InnoDB + DEFAULT CHARSET = utf8mb4 + COLLATE = utf8mb4_0900_ai_ci; + +CREATE TABLE mysql_cdc_e2e_source_table_1_custom_primary_key +( + `id` int NOT NULL, + `f_binary` binary(64) DEFAULT NULL, + `f_blob` blob, + `f_long_varbinary` mediumblob, + `f_longblob` longblob, + `f_tinyblob` tinyblob, + `f_varbinary` varbinary(100) DEFAULT NULL, + `f_smallint` smallint DEFAULT NULL, + `f_smallint_unsigned` smallint unsigned DEFAULT NULL, + `f_mediumint` mediumint DEFAULT NULL, + `f_mediumint_unsigned` mediumint unsigned DEFAULT NULL, + `f_int` int DEFAULT NULL, + `f_int_unsigned` int unsigned DEFAULT NULL, + `f_integer` int DEFAULT NULL, + `f_integer_unsigned` int unsigned DEFAULT NULL, + `f_bigint` bigint DEFAULT NULL, + `f_bigint_unsigned` bigint unsigned DEFAULT NULL, + `f_numeric` decimal(10, 0) DEFAULT NULL, + `f_decimal` decimal(10, 0) DEFAULT NULL, + `f_float` float DEFAULT NULL, + `f_double` double DEFAULT NULL, + `f_double_precision` double DEFAULT NULL, + `f_longtext` longtext, + `f_mediumtext` mediumtext, + `f_text` text, + `f_tinytext` tinytext, + `f_varchar` varchar(100) DEFAULT NULL, + `f_date` date DEFAULT NULL, + `f_datetime` datetime DEFAULT NULL, + `f_timestamp` timestamp NULL DEFAULT NULL, + `f_bit1` bit(1) DEFAULT NULL, + `f_bit64` bit(64) DEFAULT NULL, + `f_char` char(1) DEFAULT NULL, + `f_enum` enum ('enum1','enum2','enum3') DEFAULT NULL, + `f_mediumblob` mediumblob, + `f_long_varchar` mediumtext, + `f_real` double DEFAULT NULL, + `f_time` time DEFAULT NULL, + `f_tinyint` tinyint DEFAULT NULL, + `f_tinyint_unsigned` tinyint unsigned DEFAULT NULL, + `f_json` json DEFAULT NULL, + `f_year` year DEFAULT NULL +) ENGINE = InnoDB + DEFAULT CHARSET = utf8mb4 + COLLATE = utf8mb4_0900_ai_ci; + +CREATE TABLE mysql_cdc_e2e_source_table_2_custom_primary_key +( + `id` int NOT NULL, + `f_binary` binary(64) DEFAULT NULL, + `f_blob` blob, + `f_long_varbinary` mediumblob, + `f_longblob` longblob, + `f_tinyblob` tinyblob, + `f_varbinary` varbinary(100) DEFAULT NULL, + `f_smallint` smallint DEFAULT NULL, + `f_smallint_unsigned` smallint unsigned DEFAULT NULL, + `f_mediumint` mediumint DEFAULT NULL, + `f_mediumint_unsigned` mediumint unsigned DEFAULT NULL, + `f_int` int DEFAULT NULL, + `f_int_unsigned` int unsigned DEFAULT NULL, + `f_integer` int DEFAULT NULL, + `f_integer_unsigned` int unsigned DEFAULT NULL, + `f_bigint` bigint DEFAULT NULL, + `f_bigint_unsigned` bigint unsigned DEFAULT NULL, + `f_numeric` decimal(10, 0) DEFAULT NULL, + `f_decimal` decimal(10, 0) DEFAULT NULL, + `f_float` float DEFAULT NULL, + `f_double` double DEFAULT NULL, + `f_double_precision` double DEFAULT NULL, + `f_longtext` longtext, + `f_mediumtext` mediumtext, + `f_text` text, + `f_tinytext` tinytext, + `f_varchar` varchar(100) DEFAULT NULL, + `f_date` date DEFAULT NULL, + `f_datetime` datetime DEFAULT NULL, + `f_timestamp` timestamp NULL DEFAULT NULL, + `f_bit1` bit(1) DEFAULT NULL, + `f_bit64` bit(64) DEFAULT NULL, + `f_char` char(1) DEFAULT NULL, + `f_enum` enum ('enum1','enum2','enum3') DEFAULT NULL, + `f_mediumblob` mediumblob, + `f_long_varchar` mediumtext, + `f_real` double DEFAULT NULL, + `f_time` time DEFAULT NULL, + `f_tinyint` tinyint DEFAULT NULL, + `f_tinyint_unsigned` tinyint unsigned DEFAULT NULL, + `f_json` json DEFAULT NULL, + `f_year` year DEFAULT NULL +) ENGINE = InnoDB + DEFAULT CHARSET = utf8mb4 + COLLATE = utf8mb4_0900_ai_ci; + +CREATE TABLE mysql_cdc_e2e_sink_table +( + `id` int NOT NULL AUTO_INCREMENT, + `f_binary` binary(64) DEFAULT NULL, + `f_blob` blob, + `f_long_varbinary` mediumblob, + `f_longblob` longblob, + `f_tinyblob` tinyblob, + `f_varbinary` varbinary(100) DEFAULT NULL, + `f_smallint` smallint DEFAULT NULL, + `f_smallint_unsigned` smallint unsigned DEFAULT NULL, + `f_mediumint` mediumint DEFAULT NULL, + `f_mediumint_unsigned` mediumint unsigned DEFAULT NULL, + `f_int` int DEFAULT NULL, + `f_int_unsigned` int unsigned DEFAULT NULL, + `f_integer` int DEFAULT NULL, + `f_integer_unsigned` int unsigned DEFAULT NULL, + `f_bigint` bigint DEFAULT NULL, + `f_bigint_unsigned` bigint unsigned DEFAULT NULL, + `f_numeric` decimal(10, 0) DEFAULT NULL, + `f_decimal` decimal(10, 0) DEFAULT NULL, + `f_float` float DEFAULT NULL, + `f_double` double DEFAULT NULL, + `f_double_precision` double DEFAULT NULL, + `f_longtext` longtext, + `f_mediumtext` mediumtext, + `f_text` text, + `f_tinytext` tinytext, + `f_varchar` varchar(100) DEFAULT NULL, + `f_date` date DEFAULT NULL, + `f_datetime` datetime DEFAULT NULL, + `f_timestamp` timestamp NULL DEFAULT NULL, + `f_bit1` bit(1) DEFAULT NULL, + `f_bit64` bit(64) DEFAULT NULL, + `f_char` char(1) DEFAULT NULL, + `f_enum` enum ('enum1','enum2','enum3') DEFAULT NULL, + `f_mediumblob` mediumblob, + `f_long_varchar` mediumtext, + `f_real` double DEFAULT NULL, + `f_time` time DEFAULT NULL, + `f_tinyint` tinyint DEFAULT NULL, + `f_tinyint_unsigned` tinyint unsigned DEFAULT NULL, + `f_json` json DEFAULT NULL, + `f_year` int DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE = InnoDB + AUTO_INCREMENT = 2 + DEFAULT CHARSET = utf8mb4 + COLLATE = utf8mb4_0900_ai_ci; + +truncate table mysql_cdc_e2e_source_table; +truncate table mysql_cdc_e2e_source_table2; +truncate table mysql_cdc_e2e_source_table_no_primary_key; +truncate table mysql_cdc_e2e_source_table_1_custom_primary_key; +truncate table mysql_cdc_e2e_source_table_2_custom_primary_key; +truncate table mysql_cdc_e2e_sink_table; + +INSERT INTO mysql_cdc_e2e_source_table ( id, f_binary, f_blob, f_long_varbinary, f_longblob, f_tinyblob, f_varbinary, f_smallint, + f_smallint_unsigned, f_mediumint, f_mediumint_unsigned, f_int, f_int_unsigned, f_integer, + f_integer_unsigned, f_bigint, f_bigint_unsigned, f_numeric, f_decimal, f_float, f_double, + f_double_precision, f_longtext, f_mediumtext, f_text, f_tinytext, f_varchar, f_date, f_datetime, + f_timestamp, f_bit1, f_bit64, f_char, f_enum, f_mediumblob, f_long_varchar, f_real, f_time, + f_tinyint, f_tinyint_unsigned, f_json, f_year ) +VALUES ( 1, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, + 0x74696E79626C6F62, 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, + 123456789, 987654321, 123, 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', + 'This is a text field', 'This is a tiny text field', '中文测试', '2022-04-27', '2022-04-27 14:30:00', + '2023-04-27 11:08:40', 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', + 12.345, '14:30:00', -128, 255, '{ "key": "value" }', 2022 ), + ( 2, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, 0x74696E79626C6F62, + 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, 123456789, 987654321, + 123, 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', 'This is a text field', + 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', '2023-04-27 11:08:40', + 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', + 112.345, '14:30:00', -128, 22, '{ "key": "value" }', 2013 ), + ( 3, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, 0x74696E79626C6F62, + 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, 123456789, 987654321, 123, + 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', 'This is a text field', + 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', '2023-04-27 11:08:40', + 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', 112.345, + '14:30:00', -128, 22, '{ "key": "value" }', 2021 ); + +INSERT INTO mysql_cdc_e2e_source_table2 ( id, f_binary, f_blob, f_long_varbinary, f_longblob, f_tinyblob, f_varbinary, f_smallint, + f_smallint_unsigned, f_mediumint, f_mediumint_unsigned, f_int, f_int_unsigned, f_integer, + f_integer_unsigned, f_bigint, f_bigint_unsigned, f_numeric, f_decimal, f_float, f_double, + f_double_precision, f_longtext, f_mediumtext, f_text, f_tinytext, f_varchar, f_date, f_datetime, + f_timestamp, f_bit1, f_bit64, f_char, f_enum, f_mediumblob, f_long_varchar, f_real, f_time, + f_tinyint, f_tinyint_unsigned, f_json, f_year ) +VALUES ( 1, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, + 0x74696E79626C6F62, 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, + 123456789, 987654321, 123, 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', + 'This is a text field', 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', + '2023-04-27 11:08:40', 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', + 12.345, '14:30:00', -128, 255, '{ "key": "value" }', 2022 ), + ( 2, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, 0x74696E79626C6F62, + 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, 123456789, 987654321, + 123, 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', 'This is a text field', + 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', '2023-04-27 11:08:40', + 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', + 112.345, '14:30:00', -128, 22, '{ "key": "value" }', 2013 ), + ( 3, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, 0x74696E79626C6F62, + 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, 123456789, 987654321, 123, + 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', 'This is a text field', + 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', '2023-04-27 11:08:40', + 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', 112.345, + '14:30:00', -128, 22, '{ "key": "value" }', 2021 ); + +INSERT INTO mysql_cdc_e2e_source_table_no_primary_key ( id, f_binary, f_blob, f_long_varbinary, f_longblob, f_tinyblob, f_varbinary, f_smallint, + f_smallint_unsigned, f_mediumint, f_mediumint_unsigned, f_int, f_int_unsigned, f_integer, + f_integer_unsigned, f_bigint, f_bigint_unsigned, f_numeric, f_decimal, f_float, f_double, + f_double_precision, f_longtext, f_mediumtext, f_text, f_tinytext, f_varchar, f_date, f_datetime, + f_timestamp, f_bit1, f_bit64, f_char, f_enum, f_mediumblob, f_long_varchar, f_real, f_time, + f_tinyint, f_tinyint_unsigned, f_json, f_year ) +VALUES ( 1, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, + 0x74696E79626C6F62, 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, + 123456789, 987654321, 123, 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', + 'This is a text field', 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', + '2023-04-27 11:08:40', 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', + 12.345, '14:30:00', -128, 255, '{ "key": "value" }', 2022 ), + ( 2, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, 0x74696E79626C6F62, + 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, 123456789, 987654321, + 123, 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', 'This is a text field', + 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', '2023-04-27 11:08:40', + 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', + 112.345, '14:30:00', -128, 22, '{ "key": "value" }', 2013 ), + ( 3, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, 0x74696E79626C6F62, + 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, 123456789, 987654321, 123, + 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', 'This is a text field', + 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', '2023-04-27 11:08:40', + 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', 112.345, + '14:30:00', -128, 22, '{ "key": "value" }', 2021 ); + +INSERT INTO mysql_cdc_e2e_source_table_1_custom_primary_key ( id, f_binary, f_blob, f_long_varbinary, f_longblob, f_tinyblob, f_varbinary, f_smallint, + f_smallint_unsigned, f_mediumint, f_mediumint_unsigned, f_int, f_int_unsigned, f_integer, + f_integer_unsigned, f_bigint, f_bigint_unsigned, f_numeric, f_decimal, f_float, f_double, + f_double_precision, f_longtext, f_mediumtext, f_text, f_tinytext, f_varchar, f_date, f_datetime, + f_timestamp, f_bit1, f_bit64, f_char, f_enum, f_mediumblob, f_long_varchar, f_real, f_time, + f_tinyint, f_tinyint_unsigned, f_json, f_year ) +VALUES ( 1, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, + 0x74696E79626C6F62, 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, + 123456789, 987654321, 123, 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', + 'This is a text field', 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', + '2023-04-27 11:08:40', 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', + 12.345, '14:30:00', -128, 255, '{ "key": "value" }', 2022 ), + ( 2, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, 0x74696E79626C6F62, + 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, 123456789, 987654321, + 123, 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', 'This is a text field', + 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', '2023-04-27 11:08:40', + 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', + 112.345, '14:30:00', -128, 22, '{ "key": "value" }', 2013 ), + ( 3, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, 0x74696E79626C6F62, + 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, 123456789, 987654321, 123, + 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', 'This is a text field', + 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', '2023-04-27 11:08:40', + 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', 112.345, + '14:30:00', -128, 22, '{ "key": "value" }', 2021 ); + +INSERT INTO mysql_cdc_e2e_source_table_2_custom_primary_key ( id, f_binary, f_blob, f_long_varbinary, f_longblob, f_tinyblob, f_varbinary, f_smallint, + f_smallint_unsigned, f_mediumint, f_mediumint_unsigned, f_int, f_int_unsigned, f_integer, + f_integer_unsigned, f_bigint, f_bigint_unsigned, f_numeric, f_decimal, f_float, f_double, + f_double_precision, f_longtext, f_mediumtext, f_text, f_tinytext, f_varchar, f_date, f_datetime, + f_timestamp, f_bit1, f_bit64, f_char, f_enum, f_mediumblob, f_long_varchar, f_real, f_time, + f_tinyint, f_tinyint_unsigned, f_json, f_year ) +VALUES ( 1, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, + 0x74696E79626C6F62, 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, + 123456789, 987654321, 123, 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', + 'This is a text field', 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', + '2023-04-27 11:08:40', 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', + 12.345, '14:30:00', -128, 255, '{ "key": "value" }', 2022 ), + ( 2, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, 0x74696E79626C6F62, + 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, 123456789, 987654321, + 123, 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', 'This is a text field', + 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', '2023-04-27 11:08:40', + 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', + 112.345, '14:30:00', -128, 22, '{ "key": "value" }', 2013 ), + ( 3, 0x61626374000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, + 0x68656C6C6F, 0x18000000789C0BC9C82C5600A244859CFCBC7485B2C4A2A4CCBCC4A24A00697308D4, NULL, 0x74696E79626C6F62, + 0x48656C6C6F20776F726C64, 12345, 54321, 123456, 654321, 1234567, 7654321, 1234567, 7654321, 123456789, 987654321, 123, + 789, 12.34, 56.78, 90.12, 'This is a long text field', 'This is a medium text field', 'This is a text field', + 'This is a tiny text field', 'This is a varchar field', '2022-04-27', '2022-04-27 14:30:00', '2023-04-27 11:08:40', + 1, b'0101010101010101010101010101010101010101010101010101010101010101', 'C', 'enum2', + 0x1B000000789C0BC9C82C5600A24485DCD494CCD25C85A49CFC2485B4CCD49C140083FF099A, 'This is a long varchar field', 112.345, + '14:30:00', -128, 22, '{ "key": "value" }', 2021 ); + +CREATE DATABASE IF NOT EXISTS `mysql_cdc2`; + +use mysql_cdc2; +-- Create a mysql data source table +CREATE TABLE mysql_cdc_e2e_source_table +( + `id` int NOT NULL AUTO_INCREMENT, + `f_binary` binary(64) DEFAULT NULL, + `f_blob` blob, + `f_long_varbinary` mediumblob, + `f_longblob` longblob, + `f_tinyblob` tinyblob, + `f_varbinary` varbinary(100) DEFAULT NULL, + `f_smallint` smallint DEFAULT NULL, + `f_smallint_unsigned` smallint unsigned DEFAULT NULL, + `f_mediumint` mediumint DEFAULT NULL, + `f_mediumint_unsigned` mediumint unsigned DEFAULT NULL, + `f_int` int DEFAULT NULL, + `f_int_unsigned` int unsigned DEFAULT NULL, + `f_integer` int DEFAULT NULL, + `f_integer_unsigned` int unsigned DEFAULT NULL, + `f_bigint` bigint DEFAULT NULL, + `f_bigint_unsigned` bigint unsigned DEFAULT NULL, + `f_numeric` decimal(10, 0) DEFAULT NULL, + `f_decimal` decimal(10, 0) DEFAULT NULL, + `f_float` float DEFAULT NULL, + `f_double` double DEFAULT NULL, + `f_double_precision` double DEFAULT NULL, + `f_longtext` longtext, + `f_mediumtext` mediumtext, + `f_text` text, + `f_tinytext` tinytext, + `f_varchar` varchar(100) DEFAULT NULL, + `f_date` date DEFAULT NULL, + `f_datetime` datetime DEFAULT NULL, + `f_timestamp` timestamp NULL DEFAULT NULL, + `f_bit1` bit(1) DEFAULT NULL, + `f_bit64` bit(64) DEFAULT NULL, + `f_char` char(1) DEFAULT NULL, + `f_enum` enum ('enum1','enum2','enum3') DEFAULT NULL, + `f_mediumblob` mediumblob, + `f_long_varchar` mediumtext, + `f_real` double DEFAULT NULL, + `f_time` time DEFAULT NULL, + `f_tinyint` tinyint DEFAULT NULL, + `f_tinyint_unsigned` tinyint unsigned DEFAULT NULL, + `f_json` json DEFAULT NULL, + `f_year` year DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE = InnoDB + AUTO_INCREMENT = 2 + DEFAULT CHARSET = utf8mb4 + COLLATE = utf8mb4_0900_ai_ci; + +CREATE TABLE mysql_cdc_e2e_source_table2 +( + `id` int NOT NULL AUTO_INCREMENT, + `f_binary` binary(64) DEFAULT NULL, + `f_blob` blob, + `f_long_varbinary` mediumblob, + `f_longblob` longblob, + `f_tinyblob` tinyblob, + `f_varbinary` varbinary(100) DEFAULT NULL, + `f_smallint` smallint DEFAULT NULL, + `f_smallint_unsigned` smallint unsigned DEFAULT NULL, + `f_mediumint` mediumint DEFAULT NULL, + `f_mediumint_unsigned` mediumint unsigned DEFAULT NULL, + `f_int` int DEFAULT NULL, + `f_int_unsigned` int unsigned DEFAULT NULL, + `f_integer` int DEFAULT NULL, + `f_integer_unsigned` int unsigned DEFAULT NULL, + `f_bigint` bigint DEFAULT NULL, + `f_bigint_unsigned` bigint unsigned DEFAULT NULL, + `f_numeric` decimal(10, 0) DEFAULT NULL, + `f_decimal` decimal(10, 0) DEFAULT NULL, + `f_float` float DEFAULT NULL, + `f_double` double DEFAULT NULL, + `f_double_precision` double DEFAULT NULL, + `f_longtext` longtext, + `f_mediumtext` mediumtext, + `f_text` text, + `f_tinytext` tinytext, + `f_varchar` varchar(100) DEFAULT NULL, + `f_date` date DEFAULT NULL, + `f_datetime` datetime DEFAULT NULL, + `f_timestamp` timestamp NULL DEFAULT NULL, + `f_bit1` bit(1) DEFAULT NULL, + `f_bit64` bit(64) DEFAULT NULL, + `f_char` char(1) DEFAULT NULL, + `f_enum` enum ('enum1','enum2','enum3') DEFAULT NULL, + `f_mediumblob` mediumblob, + `f_long_varchar` mediumtext, + `f_real` double DEFAULT NULL, + `f_time` time DEFAULT NULL, + `f_tinyint` tinyint DEFAULT NULL, + `f_tinyint_unsigned` tinyint unsigned DEFAULT NULL, + `f_json` json DEFAULT NULL, + `f_year` year DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE = InnoDB + AUTO_INCREMENT = 2 + DEFAULT CHARSET = utf8mb4 + COLLATE = utf8mb4_0900_ai_ci; + +CREATE TABLE mysql_cdc_e2e_source_table_1_custom_primary_key +( + `id` int NOT NULL, + `f_binary` binary(64) DEFAULT NULL, + `f_blob` blob, + `f_long_varbinary` mediumblob, + `f_longblob` longblob, + `f_tinyblob` tinyblob, + `f_varbinary` varbinary(100) DEFAULT NULL, + `f_smallint` smallint DEFAULT NULL, + `f_smallint_unsigned` smallint unsigned DEFAULT NULL, + `f_mediumint` mediumint DEFAULT NULL, + `f_mediumint_unsigned` mediumint unsigned DEFAULT NULL, + `f_int` int DEFAULT NULL, + `f_int_unsigned` int unsigned DEFAULT NULL, + `f_integer` int DEFAULT NULL, + `f_integer_unsigned` int unsigned DEFAULT NULL, + `f_bigint` bigint DEFAULT NULL, + `f_bigint_unsigned` bigint unsigned DEFAULT NULL, + `f_numeric` decimal(10, 0) DEFAULT NULL, + `f_decimal` decimal(10, 0) DEFAULT NULL, + `f_float` float DEFAULT NULL, + `f_double` double DEFAULT NULL, + `f_double_precision` double DEFAULT NULL, + `f_longtext` longtext, + `f_mediumtext` mediumtext, + `f_text` text, + `f_tinytext` tinytext, + `f_varchar` varchar(100) DEFAULT NULL, + `f_date` date DEFAULT NULL, + `f_datetime` datetime DEFAULT NULL, + `f_timestamp` timestamp NULL DEFAULT NULL, + `f_bit1` bit(1) DEFAULT NULL, + `f_bit64` bit(64) DEFAULT NULL, + `f_char` char(1) DEFAULT NULL, + `f_enum` enum ('enum1','enum2','enum3') DEFAULT NULL, + `f_mediumblob` mediumblob, + `f_long_varchar` mediumtext, + `f_real` double DEFAULT NULL, + `f_time` time DEFAULT NULL, + `f_tinyint` tinyint DEFAULT NULL, + `f_tinyint_unsigned` tinyint unsigned DEFAULT NULL, + `f_json` json DEFAULT NULL, + `f_year` year DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE = InnoDB + DEFAULT CHARSET = utf8mb4 + COLLATE = utf8mb4_0900_ai_ci; + +CREATE TABLE mysql_cdc_e2e_source_table_2_custom_primary_key +( + `id` int NOT NULL, + `f_binary` binary(64) DEFAULT NULL, + `f_blob` blob, + `f_long_varbinary` mediumblob, + `f_longblob` longblob, + `f_tinyblob` tinyblob, + `f_varbinary` varbinary(100) DEFAULT NULL, + `f_smallint` smallint DEFAULT NULL, + `f_smallint_unsigned` smallint unsigned DEFAULT NULL, + `f_mediumint` mediumint DEFAULT NULL, + `f_mediumint_unsigned` mediumint unsigned DEFAULT NULL, + `f_int` int DEFAULT NULL, + `f_int_unsigned` int unsigned DEFAULT NULL, + `f_integer` int DEFAULT NULL, + `f_integer_unsigned` int unsigned DEFAULT NULL, + `f_bigint` bigint DEFAULT NULL, + `f_bigint_unsigned` bigint unsigned DEFAULT NULL, + `f_numeric` decimal(10, 0) DEFAULT NULL, + `f_decimal` decimal(10, 0) DEFAULT NULL, + `f_float` float DEFAULT NULL, + `f_double` double DEFAULT NULL, + `f_double_precision` double DEFAULT NULL, + `f_longtext` longtext, + `f_mediumtext` mediumtext, + `f_text` text, + `f_tinytext` tinytext, + `f_varchar` varchar(100) DEFAULT NULL, + `f_date` date DEFAULT NULL, + `f_datetime` datetime DEFAULT NULL, + `f_timestamp` timestamp NULL DEFAULT NULL, + `f_bit1` bit(1) DEFAULT NULL, + `f_bit64` bit(64) DEFAULT NULL, + `f_char` char(1) DEFAULT NULL, + `f_enum` enum ('enum1','enum2','enum3') DEFAULT NULL, + `f_mediumblob` mediumblob, + `f_long_varchar` mediumtext, + `f_real` double DEFAULT NULL, + `f_time` time DEFAULT NULL, + `f_tinyint` tinyint DEFAULT NULL, + `f_tinyint_unsigned` tinyint unsigned DEFAULT NULL, + `f_json` json DEFAULT NULL, + `f_year` year DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE = InnoDB + DEFAULT CHARSET = utf8mb4 + COLLATE = utf8mb4_0900_ai_ci; diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/shop.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/shop.sql new file mode 100644 index 00000000000..f97d5852f3a --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/ddl/shop.sql @@ -0,0 +1,78 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +-- ---------------------------------------------------------------------------------------------------------------- +-- DATABASE: shop +-- ---------------------------------------------------------------------------------------------------------------- +CREATE DATABASE IF NOT EXISTS `shop`; +use shop; + +drop table if exists products; +-- Create and populate our products using a single insert with many rows +CREATE TABLE products ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255) NOT NULL DEFAULT 'SeaTunnel', + description VARCHAR(512), + weight FLOAT +); + +drop table if exists mysql_cdc_e2e_sink_table_with_schema_change; +CREATE TABLE if not exists mysql_cdc_e2e_sink_table_with_schema_change ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255) NOT NULL DEFAULT 'SeaTunnel', + description VARCHAR(512), + weight FLOAT +); + +drop table if exists mysql_cdc_e2e_sink_table_with_schema_change_exactly_once; +CREATE TABLE if not exists mysql_cdc_e2e_sink_table_with_schema_change_exactly_once ( + id INTEGER NOT NULL AUTO_INCREMENT PRIMARY KEY, + name VARCHAR(255) NOT NULL DEFAULT 'SeaTunnel', + description VARCHAR(512), + weight FLOAT +); + +ALTER TABLE products AUTO_INCREMENT = 101; + +INSERT INTO products +VALUES (101,"scooter","Small 2-wheel scooter",3.14), + (102,"car battery","12V car battery",8.1), + (103,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8), + (104,"hammer","12oz carpenter's hammer",0.75), + (105,"hammer","14oz carpenter's hammer",0.875), + (106,"hammer","16oz carpenter's hammer",1.0), + (107,"rocks","box of assorted rocks",5.3), + (108,"jacket","water resistent black wind breaker",0.1), + (109,"spare tire","24 inch spare tire",22.2); + + +drop table if exists products_on_hand; +CREATE TABLE products_on_hand ( + product_id INTEGER NOT NULL PRIMARY KEY, + quantity INTEGER NOT NULL +); + + +INSERT INTO products_on_hand VALUES (101,3); +INSERT INTO products_on_hand VALUES (102,8); +INSERT INTO products_on_hand VALUES (103,18); +INSERT INTO products_on_hand VALUES (104,4); +INSERT INTO products_on_hand VALUES (105,5); +INSERT INTO products_on_hand VALUES (106,0); +INSERT INTO products_on_hand VALUES (107,44); +INSERT INTO products_on_hand VALUES (108,2); +INSERT INTO products_on_hand VALUES (109,5); diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/docker/server-gtids/my.cnf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/docker/server-gtids/my.cnf new file mode 100644 index 00000000000..a390897885d --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/docker/server-gtids/my.cnf @@ -0,0 +1,65 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# For advice on how to change settings please see +# http://dev.mysql.com/doc/refman/5.7/en/server-configuration-defaults.html + +[mysqld] +# +# Remove leading # and set to the amount of RAM for the most important data +# cache in MySQL. Start at 70% of total RAM for dedicated server, else 10%. +# innodb_buffer_pool_size = 128M +# +# Remove leading # to turn on a very important data integrity option: logging +# changes to the binary log between backups. +# log_bin +# +# Remove leading # to set options mainly useful for reporting servers. +# The server defaults are faster for transactions and fast SELECTs. +# Adjust sizes as needed, experiment to find the optimal values. +# join_buffer_size = 128M +# sort_buffer_size = 2M +# read_rnd_buffer_size = 2M +skip-host-cache +skip-name-resolve +#datadir=/var/lib/mysql +#socket=/var/lib/mysql/mysql.sock +secure-file-priv=/var/lib/mysql +user=mysql + +# Disabling symbolic-links is recommended to prevent assorted security risks +symbolic-links=0 + +#log-error=/var/log/mysqld.log +#pid-file=/var/run/mysqld/mysqld.pid + +# ---------------------------------------------- +# Enable the binlog for replication & CDC +# ---------------------------------------------- + +# Enable binary replication log and set the prefix, expiration, and log format. +# The prefix is arbitrary, expiration can be short for integration tests but would +# be longer on a production system. Row-level info is required for ingest to work. +# Server ID is required, but this will vary on production systems +server-id = 223344 +log_bin = mysql-bin +expire_logs_days = 1 +binlog_format = row + +# enable gtid mode +gtid_mode = on +enforce_gtid_consistency = on \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/docker/setup.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/docker/setup.sql new file mode 100644 index 00000000000..079b8f1d95f --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/docker/setup.sql @@ -0,0 +1,35 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +-- In production you would almost certainly limit the replication user must be on the follower (slave) machine, +-- to prevent other clients accessing the log from other machines. For example, 'replicator'@'follower.acme.com'. +-- However, in this database we'll grant 2 users different privileges: +-- +-- 1) 'mysqluser' - all privileges +-- 2) 'st_user_source' - all privileges required by the snapshot reader AND binlog reader (used for testing) +-- 3) 'st_user_sink' - all privileges required by the write data (used for testing) +-- +GRANT ALL PRIVILEGES ON *.* TO 'mysqluser'@'%'; + +CREATE USER 'st_user_source' IDENTIFIED BY 'mysqlpw'; +GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT, DROP, LOCK TABLES ON *.* TO 'st_user_source'@'%'; +CREATE USER 'st_user_sink' IDENTIFIED BY 'mysqlpw'; +GRANT SELECT, INSERT, UPDATE, DELETE, CREATE, DROP, INDEX, ALTER ON *.* TO 'st_user_sink'@'%'; +-- ---------------------------------------------------------------------------------------------------------------- +-- DATABASE: emptydb +-- ---------------------------------------------------------------------------------------------------------------- +CREATE DATABASE emptydb; diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/mysql_cdc_to_paimon_with_schema_change.conf b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/mysql_cdc_to_paimon_with_schema_change.conf new file mode 100644 index 00000000000..714c4be81c0 --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-paimon-e2e/src/test/resources/mysql_cdc_to_paimon_with_schema_change.conf @@ -0,0 +1,49 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +###### +###### This config file is a demonstration of streaming processing in seatunnel config +###### + +env { + # You can set engine configuration here + parallelism = 5 + job.mode = "STREAMING" + checkpoint.interval = 5000 + read_limit.bytes_per_second=7000000 + read_limit.rows_per_second=400 +} + +source { + MySQL-CDC { + server-id = 5652-5657 + username = "st_user_source" + password = "mysqlpw" + table-names = ["shop.products"] + base-url = "jdbc:mysql://mysql_cdc_e2e:3306/shop" + debezium = { + include.schema.changes = true + } + } +} + +sink { + Paimon { + warehouse = "file:///tmp/paimon" + database = "mysql_to_paimon" + table = "products" + } +} diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/java/org/apache/seatunnel/e2e/connector/starrocks/StarRocksSchemaChangeIT.java b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/java/org/apache/seatunnel/e2e/connector/starrocks/StarRocksSchemaChangeIT.java index 66d98ceb801..ea7fe35fe0b 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/java/org/apache/seatunnel/e2e/connector/starrocks/StarRocksSchemaChangeIT.java +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/java/org/apache/seatunnel/e2e/connector/starrocks/StarRocksSchemaChangeIT.java @@ -191,6 +191,9 @@ public void testStarRocksSinkWithSchemaEvolutionCase(TestContainer container) assertSchemaEvolutionForAddColumns( DATABASE, SOURCE_TABLE, SINK_TABLE, mysqlConnection, starRocksConnection); + assertSchemaEvolutionForDropColumns( + DATABASE, SOURCE_TABLE, SINK_TABLE, mysqlConnection, starRocksConnection); + // savepoint 1 Assertions.assertEquals(0, container.savepointJob(jobId).getExitCode()); @@ -303,6 +306,27 @@ private void assertSchemaEvolutionForAddColumns( }); } + private void assertSchemaEvolutionForDropColumns( + String database, + String sourceTable, + String sinkTable, + Connection sourceConnection, + Connection sinkConnection) { + + // case1 add columns with cdc data at same time + shopDatabase.setTemplateName("drop_columns_validate_schema").createAndInitialize(); + await().atMost(60000, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> + Assertions.assertIterableEquals( + query( + String.format(QUERY_COLUMNS, database, sourceTable), + sourceConnection), + query( + String.format(QUERY_COLUMNS, database, sinkTable), + sinkConnection))); + } + private void assertTableStructureAndData( String database, String sourceTable, diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/resources/ddl/drop_columns.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/resources/ddl/drop_columns.sql index 5c3b7d1f549..9464e02e1d3 100644 --- a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/resources/ddl/drop_columns.sql +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/resources/ddl/drop_columns.sql @@ -21,20 +21,6 @@ CREATE DATABASE IF NOT EXISTS `shop`; use shop; -alter table products drop column add_column4,drop column add_column6; -insert into products -values (137,"scooter","Small 2-wheel scooter",3.14,'xx',1,1.1), - (138,"car battery","12V car battery",8.1,'xx',2,1.2), - (139,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8,'xx',3,1.3), - (140,"hammer","12oz carpenter's hammer",0.75,'xx',4,1.4), - (141,"hammer","14oz carpenter's hammer",0.875,'xx',5,1.5), - (142,"hammer","16oz carpenter's hammer",1.0,'xx',6,1.6), - (143,"rocks","box of assorted rocks",5.3,'xx',7,1.7), - (144,"jacket","water resistent black wind breaker",0.1,'xx',8,1.8), - (145,"spare tire","24 inch spare tire",22.2,'xx',9,1.9); -update products set name = 'dailai' where id in (140,141,142); -delete from products where id < 137; - alter table products drop column add_column1,drop column add_column3; insert into products diff --git a/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/resources/ddl/drop_columns_validate_schema.sql b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/resources/ddl/drop_columns_validate_schema.sql new file mode 100644 index 00000000000..262006bd81e --- /dev/null +++ b/seatunnel-e2e/seatunnel-connector-v2-e2e/connector-starrocks-e2e/src/test/resources/ddl/drop_columns_validate_schema.sql @@ -0,0 +1,36 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +-- ---------------------------------------------------------------------------------------------------------------- +-- DATABASE: shop +-- ---------------------------------------------------------------------------------------------------------------- +CREATE DATABASE IF NOT EXISTS `shop`; +use shop; + +alter table products drop column add_column4,drop column add_column6; +insert into products +values (137,"scooter","Small 2-wheel scooter",3.14,'xx',1,1.1), + (138,"car battery","12V car battery",8.1,'xx',2,1.2), + (139,"12-pack drill bits","12-pack of drill bits with sizes ranging from #40 to #3",0.8,'xx',3,1.3), + (140,"hammer","12oz carpenter's hammer",0.75,'xx',4,1.4), + (141,"hammer","14oz carpenter's hammer",0.875,'xx',5,1.5), + (142,"hammer","16oz carpenter's hammer",1.0,'xx',6,1.6), + (143,"rocks","box of assorted rocks",5.3,'xx',7,1.7), + (144,"jacket","water resistent black wind breaker",0.1,'xx',8,1.8), + (145,"spare tire","24 inch spare tire",22.2,'xx',9,1.9); +update products set name = 'dailai' where id in (140,141,142); +delete from products where id < 137; \ No newline at end of file diff --git a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/seatunnel/SeaTunnelContainer.java b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/seatunnel/SeaTunnelContainer.java index ad586153f46..1b42994154d 100644 --- a/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/seatunnel/SeaTunnelContainer.java +++ b/seatunnel-e2e/seatunnel-e2e-common/src/test/java/org/apache/seatunnel/e2e/common/container/seatunnel/SeaTunnelContainer.java @@ -512,7 +512,11 @@ public Container.ExecResult cancelJob(String jobId) throws IOException, Interrup @Override public String getJobStatus(String jobId) { - HttpGet get = new HttpGet("http://" + server.getHost() + ":8080/job-info/" + jobId); + HttpGet get = + new HttpGet( + String.format( + "http://%s:%d/job-info/%s", + server.getHost(), server.getMappedPort(8080), jobId)); try (CloseableHttpClient client = HttpClients.createDefault()) { CloseableHttpResponse response = client.execute(get); if (response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) { diff --git a/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelEngineClusterRoleTest.java b/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelEngineClusterRoleTest.java index 89134ce4671..48dfb47476c 100644 --- a/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelEngineClusterRoleTest.java +++ b/seatunnel-engine/seatunnel-engine-client/src/test/java/org/apache/seatunnel/engine/client/SeaTunnelEngineClusterRoleTest.java @@ -38,6 +38,7 @@ import org.junit.jupiter.api.condition.OS; import com.hazelcast.client.config.ClientConfig; +import com.hazelcast.config.Config; import com.hazelcast.instance.impl.HazelcastInstanceImpl; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; @@ -306,6 +307,62 @@ public void pendingJobCancel() { } } + @Test + public void testStartMasterNodeWithTcpIp() { + SeaTunnelConfig seaTunnelConfig = ConfigProvider.locateAndGetSeaTunnelConfig(); + HazelcastInstanceImpl instance = + SeaTunnelServerStarter.createMasterHazelcastInstance(seaTunnelConfig); + Assertions.assertNotNull(instance); + Assertions.assertEquals(1, instance.getCluster().getMembers().size()); + instance.shutdown(); + } + + @Test + public void testStartMasterNodeWithMulticastJoin() { + SeaTunnelConfig seaTunnelConfig = ConfigProvider.locateAndGetSeaTunnelConfig(); + seaTunnelConfig.setHazelcastConfig(Config.loadFromString(getMulticastConfig())); + HazelcastInstanceImpl instance = + SeaTunnelServerStarter.createMasterHazelcastInstance(seaTunnelConfig); + Assertions.assertNotNull(instance); + Assertions.assertEquals(1, instance.getCluster().getMembers().size()); + instance.shutdown(); + } + + @Test + public void testCannotOnlyStartWorkerNodeWithTcpIp() { + SeaTunnelConfig seaTunnelConfig = ConfigProvider.locateAndGetSeaTunnelConfig(); + Assertions.assertThrows( + IllegalStateException.class, + () -> { + SeaTunnelServerStarter.createWorkerHazelcastInstance(seaTunnelConfig); + }); + } + + @Test + public void testCannotOnlyStartWorkerNodeWithMulticastJoin() { + SeaTunnelConfig seaTunnelConfig = ConfigProvider.locateAndGetSeaTunnelConfig(); + seaTunnelConfig.setHazelcastConfig(Config.loadFromString(getMulticastConfig())); + Assertions.assertThrows( + IllegalStateException.class, + () -> { + SeaTunnelServerStarter.createWorkerHazelcastInstance(seaTunnelConfig); + }); + } + + private String getMulticastConfig() { + return "hazelcast:\n" + + " network:\n" + + " join:\n" + + " multicast:\n" + + " enabled: true\n" + + " multicast-group: 224.2.2.3\n" + + " multicast-port: 54327\n" + + " multicast-time-to-live: 32\n" + + " multicast-timeout-seconds: 2\n" + + " trusted-interfaces:\n" + + " - 192.168.1.1\n"; + } + private SeaTunnelClient createSeaTunnelClient(String clusterName) { ClientConfig clientConfig = ConfigProvider.locateAndGetClientConfig(); clientConfig.setClusterName(TestUtils.getClusterName(clusterName)); diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelNodeContext.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelNodeContext.java index 60174b88645..3bfd8a41f7b 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelNodeContext.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/SeaTunnelNodeContext.java @@ -18,16 +18,21 @@ package org.apache.seatunnel.engine.server; import org.apache.seatunnel.engine.common.config.SeaTunnelConfig; +import org.apache.seatunnel.engine.server.joiner.LiteNodeDropOutDiscoveryJoiner; +import org.apache.seatunnel.engine.server.joiner.LiteNodeDropOutMulticastJoiner; +import org.apache.seatunnel.engine.server.joiner.LiteNodeDropOutTcpIpJoiner; import com.hazelcast.config.JoinConfig; import com.hazelcast.instance.impl.DefaultNodeContext; import com.hazelcast.instance.impl.Node; import com.hazelcast.instance.impl.NodeExtension; import com.hazelcast.internal.cluster.Joiner; +import com.hazelcast.internal.config.AliasedDiscoveryConfigUtils; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import static com.hazelcast.config.ConfigAccessor.getActiveMemberNetworkConfig; +import static com.hazelcast.spi.properties.ClusterProperty.DISCOVERY_SPI_ENABLED; @Slf4j public class SeaTunnelNodeContext extends DefaultNodeContext { @@ -45,15 +50,28 @@ public NodeExtension createNodeExtension(@NonNull Node node) { @Override public Joiner createJoiner(Node node) { + JoinConfig join = getActiveMemberNetworkConfig(seaTunnelConfig.getHazelcastConfig()).getJoin(); join.verify(); - if (join.getTcpIpConfig().isEnabled()) { + // update for seatunnel, lite member can not become master node + if (join.getMulticastConfig().isEnabled() && node.multicastService != null) { + log.info("Using LiteNodeDropOutMulticast Multicast discovery"); + return new LiteNodeDropOutMulticastJoiner(node); + } else if (join.getTcpIpConfig().isEnabled()) { log.info("Using LiteNodeDropOutTcpIpJoiner TCP/IP discovery"); return new LiteNodeDropOutTcpIpJoiner(node); + } else if (node.getProperties().getBoolean(DISCOVERY_SPI_ENABLED) + || isAnyAliasedConfigEnabled(join) + || join.isAutoDetectionEnabled()) { + log.info("Using LiteNodeDropOutDiscoveryJoiner Discovery SPI"); + return new LiteNodeDropOutDiscoveryJoiner(node); } + return null; + } - return super.createJoiner(node); + private boolean isAnyAliasedConfigEnabled(JoinConfig join) { + return !AliasedDiscoveryConfigUtils.createDiscoveryStrategyConfigs(join).isEmpty(); } } diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/joiner/LiteNodeDropOutDiscoveryJoiner.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/joiner/LiteNodeDropOutDiscoveryJoiner.java new file mode 100644 index 00000000000..a3ae66f372e --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/joiner/LiteNodeDropOutDiscoveryJoiner.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.engine.server.joiner; + +import com.hazelcast.cluster.Address; +import com.hazelcast.cluster.impl.MemberImpl; +import com.hazelcast.config.JoinConfig; +import com.hazelcast.instance.EndpointQualifier; +import com.hazelcast.instance.ProtocolType; +import com.hazelcast.instance.impl.Node; +import com.hazelcast.internal.config.AliasedDiscoveryConfigUtils; +import com.hazelcast.internal.util.Preconditions; +import com.hazelcast.internal.util.concurrent.BackoffIdleStrategy; +import com.hazelcast.internal.util.concurrent.IdleStrategy; +import com.hazelcast.spi.discovery.DiscoveryNode; +import com.hazelcast.spi.discovery.integration.DiscoveryService; +import com.hazelcast.spi.properties.ClusterProperty; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.Iterator; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import static com.hazelcast.internal.config.AliasedDiscoveryConfigUtils.allUsePublicAddress; +import static com.hazelcast.spi.properties.ClusterProperty.DISCOVERY_SPI_PUBLIC_IP_ENABLED; + +public class LiteNodeDropOutDiscoveryJoiner extends LiteNodeDropOutTcpIpJoiner { + + private final DiscoveryService discoveryService; + private final boolean usePublicAddress; + private final IdleStrategy idleStrategy; + private final int maximumWaitingTimeBeforeJoinSeconds; + + public LiteNodeDropOutDiscoveryJoiner(Node node) { + super(node); + this.idleStrategy = + new BackoffIdleStrategy( + 0L, + 0L, + TimeUnit.MILLISECONDS.toNanos(10L), + TimeUnit.MILLISECONDS.toNanos(500L)); + this.maximumWaitingTimeBeforeJoinSeconds = + node.getProperties().getInteger(ClusterProperty.WAIT_SECONDS_BEFORE_JOIN); + this.discoveryService = node.discoveryService; + this.usePublicAddress = usePublicAddress(node.getConfig().getNetworkConfig().getJoin()); + } + + private boolean usePublicAddress(JoinConfig join) { + return node.getProperties().getBoolean(DISCOVERY_SPI_PUBLIC_IP_ENABLED) + || allUsePublicAddress( + AliasedDiscoveryConfigUtils.aliasedDiscoveryConfigsFrom(join)); + } + + protected Collection
getPossibleAddressesForInitialJoin() { + long deadLine = + System.nanoTime() + + TimeUnit.SECONDS.toNanos((long) this.maximumWaitingTimeBeforeJoinSeconds); + + for (int i = 0; System.nanoTime() < deadLine; ++i) { + Collection
possibleAddresses = this.getPossibleAddresses(); + if (!possibleAddresses.isEmpty()) { + return possibleAddresses; + } + + this.idleStrategy.idle((long) i); + } + + return Collections.emptyList(); + } + + protected Collection
getPossibleAddresses() { + Iterable discoveredNodes = + (Iterable) + Preconditions.checkNotNull( + this.discoveryService.discoverNodes(), + "Discovered nodes cannot be null!"); + MemberImpl localMember = this.node.nodeEngine.getLocalMember(); + Set
localAddresses = this.node.getLocalAddressRegistry().getLocalAddresses(); + Collection
possibleMembers = new ArrayList(); + Iterator var5 = discoveredNodes.iterator(); + + while (var5.hasNext()) { + DiscoveryNode discoveryNode = (DiscoveryNode) var5.next(); + Address discoveredAddress = + this.usePublicAddress + ? discoveryNode.getPublicAddress() + : discoveryNode.getPrivateAddress(); + if (localAddresses.contains(discoveredAddress)) { + if (!this.usePublicAddress && discoveryNode.getPublicAddress() != null) { + localMember + .getAddressMap() + .put( + EndpointQualifier.resolve(ProtocolType.CLIENT, "public"), + this.publicAddress(localMember, discoveryNode)); + } + } else { + possibleMembers.add(discoveredAddress); + } + } + + return possibleMembers; + } + + private Address publicAddress(MemberImpl localMember, DiscoveryNode discoveryNode) { + if (localMember.getAddressMap().containsKey(EndpointQualifier.CLIENT)) { + try { + String publicHost = discoveryNode.getPublicAddress().getHost(); + int clientPort = + ((Address) localMember.getAddressMap().get(EndpointQualifier.CLIENT)) + .getPort(); + return new Address(publicHost, clientPort); + } catch (Exception var5) { + Exception e = var5; + this.logger.fine(e); + } + } + + return discoveryNode.getPublicAddress(); + } +} diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/joiner/LiteNodeDropOutMulticastJoiner.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/joiner/LiteNodeDropOutMulticastJoiner.java new file mode 100644 index 00000000000..47ec2818db4 --- /dev/null +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/joiner/LiteNodeDropOutMulticastJoiner.java @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.seatunnel.engine.server.joiner; + +import com.hazelcast.cluster.Address; +import com.hazelcast.config.ConfigAccessor; +import com.hazelcast.config.NetworkConfig; +import com.hazelcast.instance.impl.Node; +import com.hazelcast.internal.cluster.impl.JoinRequest; +import com.hazelcast.internal.cluster.impl.MulticastJoiner; +import com.hazelcast.internal.util.Clock; +import com.hazelcast.internal.util.RandomPicker; +import lombok.extern.slf4j.Slf4j; + +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +@Slf4j +public class LiteNodeDropOutMulticastJoiner extends MulticastJoiner { + + private static final long JOIN_RETRY_INTERVAL = 1000L; + private final AtomicInteger currentTryCount = new AtomicInteger(0); + private final AtomicInteger maxTryCount = new AtomicInteger(calculateTryCount()); + + public LiteNodeDropOutMulticastJoiner(Node node) { + super(node); + } + + @Override + public void doJoin() { + long joinStartTime = Clock.currentTimeMillis(); + long maxJoinMillis = getMaxJoinMillis(); + Address thisAddress = node.getThisAddress(); + + while (shouldRetry() && (Clock.currentTimeMillis() - joinStartTime < maxJoinMillis)) { + + // clear master node + clusterService.setMasterAddressToJoin(null); + + Address masterAddress = getTargetAddress(); + if (masterAddress == null) { + masterAddress = findMasterWithMulticast(); + } + clusterService.setMasterAddressToJoin(masterAddress); + + if (masterAddress == null || thisAddress.equals(masterAddress)) { + if (node.isLiteMember()) { + log.info("This node is lite member. No need to join to a master node."); + continue; + } else { + clusterJoinManager.setThisMemberAsMaster(); + return; + } + } + + logger.info("Trying to join to discovered node: " + masterAddress); + joinMaster(); + } + } + + private void joinMaster() { + long maxMasterJoinTime = getMaxJoinTimeToMasterNode(); + long start = Clock.currentTimeMillis(); + + while (shouldRetry() && Clock.currentTimeMillis() - start < maxMasterJoinTime) { + + Address master = clusterService.getMasterAddress(); + if (master != null) { + if (logger.isFineEnabled()) { + logger.fine("Joining to master " + master); + } + clusterJoinManager.sendJoinRequest(master); + } else { + break; + } + + try { + clusterService.blockOnJoin(JOIN_RETRY_INTERVAL); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + + if (isBlacklisted(master)) { + clusterService.setMasterAddressToJoin(null); + return; + } + } + } + + private Address findMasterWithMulticast() { + try { + if (this.logger.isFineEnabled()) { + this.logger.fine("Searching for master node. Max tries: " + maxTryCount.get()); + } + + JoinRequest joinRequest = this.node.createJoinRequest((Address) null); + + while (this.node.isRunning() + && currentTryCount.incrementAndGet() <= maxTryCount.get()) { + joinRequest.setTryCount(currentTryCount.get()); + this.node.multicastService.send(joinRequest); + Address masterAddress = this.clusterService.getMasterAddress(); + if (masterAddress != null) { + Address var3 = masterAddress; + return var3; + } + + Thread.sleep((long) this.getPublishInterval()); + } + + return null; + } catch (Exception var7) { + Exception e = var7; + if (this.logger != null) { + this.logger.warning(e); + } + + return null; + } finally { + currentTryCount.set(0); + } + } + + private int calculateTryCount() { + NetworkConfig networkConfig = ConfigAccessor.getActiveMemberNetworkConfig(this.config); + long timeoutMillis = + TimeUnit.SECONDS.toMillis( + (long) + networkConfig + .getJoin() + .getMulticastConfig() + .getMulticastTimeoutSeconds()); + int avgPublishInterval = 125; + int tryCount = (int) timeoutMillis / avgPublishInterval; + String host = this.node.getThisAddress().getHost(); + + int lastDigits; + try { + lastDigits = Integer.parseInt(host.substring(host.lastIndexOf(46) + 1)); + } catch (NumberFormatException var9) { + lastDigits = RandomPicker.getInt(512); + } + + int portDiff = this.node.getThisAddress().getPort() - networkConfig.getPort(); + tryCount += (lastDigits + portDiff) % 10; + return tryCount; + } + + private int getPublishInterval() { + return RandomPicker.getInt(50, 200); + } +} diff --git a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/LiteNodeDropOutTcpIpJoiner.java b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/joiner/LiteNodeDropOutTcpIpJoiner.java similarity index 99% rename from seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/LiteNodeDropOutTcpIpJoiner.java rename to seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/joiner/LiteNodeDropOutTcpIpJoiner.java index 67aac64aca2..afb3eab795e 100644 --- a/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/LiteNodeDropOutTcpIpJoiner.java +++ b/seatunnel-engine/seatunnel-engine-server/src/main/java/org/apache/seatunnel/engine/server/joiner/LiteNodeDropOutTcpIpJoiner.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.seatunnel.engine.server; +package org.apache.seatunnel.engine.server.joiner; import com.hazelcast.cluster.Address; import com.hazelcast.config.Config; diff --git a/seatunnel-shade/seatunnel-hazelcast/seatunnel-hazelcast-shade/src/main/java/com/hazelcast/cluster/impl/MemberImpl.java b/seatunnel-shade/seatunnel-hazelcast/seatunnel-hazelcast-shade/src/main/java/com/hazelcast/cluster/impl/MemberImpl.java index c5949ee9257..ff7ca8e14ed 100644 --- a/seatunnel-shade/seatunnel-hazelcast/seatunnel-hazelcast-shade/src/main/java/com/hazelcast/cluster/impl/MemberImpl.java +++ b/seatunnel-shade/seatunnel-hazelcast/seatunnel-hazelcast-shade/src/main/java/com/hazelcast/cluster/impl/MemberImpl.java @@ -286,21 +286,20 @@ public String toString() { sb.append(":"); sb.append(address.getPort()); sb.append(" - ").append(uuid); - if (localMember()) { - sb.append(" this"); - } - // update for seatunnel, add worker and master info if (isLiteMember()) { - sb.append(" worker"); + sb.append(" [worker node]"); + } else { + sb.append(" [master node]"); } - if (instance != null && instance.node.getClusterService().getMasterAddress() != null && instance.node.getClusterService().getMasterAddress().equals(address)) { - sb.append(" master"); + sb.append(" [active master]"); + } + if (localMember()) { + sb.append(" this"); } - // update for seatunnel, add worker and master info end return sb.toString(); } diff --git a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngine.java b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngine.java index 2848cc9094e..9318ff0b05e 100644 --- a/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngine.java +++ b/seatunnel-transforms-v2/src/main/java/org/apache/seatunnel/transform/sql/zeta/ZetaSQLEngine.java @@ -183,7 +183,7 @@ public SeaTunnelRowType typeMapping(List inputColumnsMapping) { for (SelectItem selectItem : selectItems) { if (selectItem.getExpression() instanceof AllColumns) { for (int i = 0; i < inputRowType.getFieldNames().length; i++) { - fieldNames[idx] = inputRowType.getFieldName(i); + fieldNames[idx] = cleanEscape(inputRowType.getFieldName(i)); seaTunnelDataTypes[idx] = inputRowType.getFieldType(i); if (inputColumnsMapping != null) { inputColumnsMapping.set(idx, inputRowType.getFieldName(i)); @@ -194,16 +194,12 @@ public SeaTunnelRowType typeMapping(List inputColumnsMapping) { Expression expression = selectItem.getExpression(); if (selectItem.getAlias() != null) { String aliasName = selectItem.getAlias().getName(); - if (aliasName.startsWith(ESCAPE_IDENTIFIER) - && aliasName.endsWith(ESCAPE_IDENTIFIER)) { - aliasName = aliasName.substring(1, aliasName.length() - 1); - } - fieldNames[idx] = aliasName; + fieldNames[idx] = cleanEscape(aliasName); } else { if (expression instanceof Column) { - fieldNames[idx] = ((Column) expression).getColumnName(); + fieldNames[idx] = cleanEscape(((Column) expression).getColumnName()); } else { - fieldNames[idx] = expression.toString(); + fieldNames[idx] = cleanEscape(expression.toString()); } } @@ -225,6 +221,13 @@ public SeaTunnelRowType typeMapping(List inputColumnsMapping) { fieldNames, seaTunnelDataTypes, lateralViews, inputColumnsMapping); } + private static String cleanEscape(String columnName) { + if (columnName.startsWith(ESCAPE_IDENTIFIER) && columnName.endsWith(ESCAPE_IDENTIFIER)) { + columnName = columnName.substring(1, columnName.length() - 1); + } + return columnName; + } + @Override public List transformBySQL(SeaTunnelRow inputRow, SeaTunnelRowType outRowType) { // ------Physical Query Plan Execution------ diff --git a/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/sql/SQLTransformTest.java b/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/sql/SQLTransformTest.java index fcf14cc7b9d..999b7fecd44 100644 --- a/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/sql/SQLTransformTest.java +++ b/seatunnel-transforms-v2/src/test/java/org/apache/seatunnel/transform/sql/SQLTransformTest.java @@ -166,12 +166,13 @@ public void testEscapeIdentifier() { ReadonlyConfig.fromMap( Collections.singletonMap( "query", - "select id, trim(`apply`) as `apply` from test where `apply` = 'a'")); + "select `id`, trim(`apply`) as `apply` from test where `apply` = 'a'")); SQLTransform sqlTransform = new SQLTransform(config, table); TableSchema tableSchema = sqlTransform.transformTableSchema(); List result = sqlTransform.transformRow( new SeaTunnelRow(new Object[] {Integer.valueOf(1), String.valueOf("a")})); + Assertions.assertEquals("id", tableSchema.getFieldNames()[0]); Assertions.assertEquals("apply", tableSchema.getFieldNames()[1]); Assertions.assertEquals("a", result.get(0).getField(1)); result = diff --git a/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-3.3/src/test/java/org/apache/seatunnel/translation/spark/sink/SparkSinkTest.java b/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-3.3/src/test/java/org/apache/seatunnel/translation/spark/sink/SparkSinkTest.java index 2e0d0f3f0d3..44b5fe6e83b 100644 --- a/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-3.3/src/test/java/org/apache/seatunnel/translation/spark/sink/SparkSinkTest.java +++ b/seatunnel-translation/seatunnel-translation-spark/seatunnel-translation-spark-3.3/src/test/java/org/apache/seatunnel/translation/spark/sink/SparkSinkTest.java @@ -36,8 +36,6 @@ import org.apache.spark.sql.types.StructType; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.condition.DisabledOnJre; -import org.junit.jupiter.api.condition.JRE; import java.math.BigDecimal; import java.sql.Timestamp; @@ -60,11 +58,6 @@ public class SparkSinkTest { @Test - @DisabledOnJre( - value = JRE.JAVA_11, - disabledReason = - "We should update apache common lang3 version to 3.8 to avoid NPE, " - + "see https://github.com/apache/commons-lang/commit/50ce8c44e1601acffa39f5568f0fc140aade0564") public void testSparkSinkWriteDataWithCopy() { // We should make sure that the data is written to the sink with copy. SparkSession spark = diff --git a/tools/update_modules_check/update_modules_check.py b/tools/update_modules_check/update_modules_check.py index 0bc4ec7b764..cef49ad4c35 100644 --- a/tools/update_modules_check/update_modules_check.py +++ b/tools/update_modules_check/update_modules_check.py @@ -142,7 +142,7 @@ def get_deleted_modules(files): def get_sub_it_modules(modules, total_num, current_num): - modules_arr = modules.split(",") + modules_arr = list(dict.fromkeys(modules.split(","))) modules_arr.remove("connector-jdbc-e2e") modules_arr.remove("connector-kafka-e2e") modules_arr.remove("connector-rocketmq-e2e") @@ -168,7 +168,7 @@ def get_sub_update_it_modules(modules, total_num, current_num): # :connector-jdbc-e2e-common,:connector-jdbc-e2e-part-1 --> connector-jdbc-e2e-common,:connector-jdbc-e2e-part-1 modules = modules[1:] # connector-jdbc-e2e-common,:connector-jdbc-e2e-part-1 --> [connector-jdbc-e2e-common, connector-jdbc-e2e-part-1] - module_list = modules.split(",:") + module_list = list(dict.fromkeys(modules.split(",:"))) if "connector-kudu-e2e" in module_list: module_list.remove("connector-kudu-e2e") if "connector-amazonsqs-e2e" in module_list: