Skip to content

Commit

Permalink
[Managed Iceberg] refactor integration tests; make it easy to add a n…
Browse files Browse the repository at this point in the history
…ew catalog to test suite (#33444)
  • Loading branch information
ahmedabu98 authored Dec 28, 2024
1 parent def0678 commit 0dc2c24
Show file tree
Hide file tree
Showing 12 changed files with 323 additions and 503 deletions.
2 changes: 1 addition & 1 deletion .github/trigger_files/IO_Iceberg_Integration_Tests.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 7
"modification": 1
}
2 changes: 1 addition & 1 deletion .github/workflows/IO_Iceberg_Integration_Tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,4 @@ jobs:
- name: Run IcebergIO Integration Test
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:io:iceberg:catalogTests --info
gradle-command: :sdks:java:io:iceberg:integrationTest --info
22 changes: 15 additions & 7 deletions sdks/java/io/iceberg/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ hadoopVersions.each {kv -> configurations.create("hadoopVersion$kv.key")}
def iceberg_version = "1.6.1"
def parquet_version = "1.12.0"
def orc_version = "1.9.2"
def hive_version = "3.1.3"

dependencies {
implementation library.java.vendored_guava_32_1_2_jre
Expand All @@ -66,6 +67,18 @@ dependencies {
testImplementation project(path: ":sdks:java:core", configuration: "shadowTest")
testImplementation project(":sdks:java:extensions:google-cloud-platform-core")
testImplementation library.java.junit

// Hive catalog test dependencies
testImplementation project(path: ":sdks:java:io:iceberg:hive")
testImplementation "org.apache.iceberg:iceberg-common:$iceberg_version"
testImplementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
testImplementation ("org.apache.hive:hive-metastore:$hive_version")
testImplementation "org.assertj:assertj-core:3.11.1"
testRuntimeOnly ("org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version") {
exclude group: "org.apache.hive", module: "hive-exec"
exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle"
}

testRuntimeOnly library.java.slf4j_jdk14
testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
testRuntimeOnly project(path: ":runners:google-cloud-dataflow-java")
Expand Down Expand Up @@ -106,10 +119,10 @@ hadoopVersions.each { kv ->
}
}

task integrationTest(type: Test) {
task catalogTests(type: Test) {
group = "Verification"
def gcpProject = project.findProperty('gcpProject') ?: 'apache-beam-testing'
def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://temp-storage-for-end-to-end-tests'
def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://managed-iceberg-integration-tests'
systemProperty "beamTestPipelineOptions", JsonOutput.toJson([
"--project=${gcpProject}",
"--tempLocation=${gcpTempLocation}",
Expand All @@ -125,11 +138,6 @@ task integrationTest(type: Test) {
testClassesDirs = sourceSets.test.output.classesDirs
}

tasks.register('catalogTests') {
dependsOn integrationTest
dependsOn ":sdks:java:io:iceberg:hive:integrationTest"
}

task loadTest(type: Test) {
def gcpProject = project.findProperty('gcpProject') ?: 'apache-beam-testing'
def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://temp-storage-for-end-to-end-tests/temp-lt'
Expand Down
48 changes: 2 additions & 46 deletions sdks/java/io/iceberg/hive/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,7 @@ def avatica_version = "1.25.0"
dependencies {
// dependencies needed to run with iceberg's hive catalog
// these dependencies are going to be included in io-expansion-service
implementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
permitUnusedDeclared ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
runtimeOnly ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
// analyzeClassesDependencies fails with "Cannot accept visitor on URL", likely the plugin does not recognize "core" classifier
// use "core" classifier to depend on un-shaded jar
runtimeOnly ("org.apache.hive:hive-exec:$hive_version:core") {
Expand All @@ -51,53 +50,10 @@ dependencies {
runtimeOnly ("org.apache.hadoop:hadoop-yarn-server-resourcemanager:$hadoop_version")
runtimeOnly ("org.apache.hbase:hbase-client:$hbase_version")
runtimeOnly ("org.apache.calcite.avatica:avatica-core:$avatica_version")
implementation ("org.apache.hive:hive-metastore:$hive_version")
runtimeOnly ("org.apache.iceberg:iceberg-parquet:$iceberg_version")
permitUnusedDeclared ("org.apache.hive:hive-metastore:$hive_version")

// ----- below dependencies are for testing and will not appear in the shaded jar -----
// Beam IcebergIO dependencies
testImplementation project(path: ":sdks:java:core", configuration: "shadow")
testImplementation project(":sdks:java:managed")
testImplementation project(":sdks:java:io:iceberg")
testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
testRuntimeOnly library.java.snake_yaml
testRuntimeOnly library.java.bigdataoss_gcs_connector
testRuntimeOnly library.java.hadoop_client

// needed to set up the test environment
testImplementation "org.apache.iceberg:iceberg-common:$iceberg_version"
testImplementation "org.apache.iceberg:iceberg-core:$iceberg_version"
testImplementation "org.assertj:assertj-core:3.11.1"
testImplementation library.java.junit

// needed to set up test Hive Metastore and run tests
testRuntimeOnly ("org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version") {
exclude group: "org.apache.hive", module: "hive-exec"
exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle"
}
testImplementation "org.apache.iceberg:iceberg-parquet:$iceberg_version"
testImplementation "org.apache.parquet:parquet-column:1.12.0"
runtimeOnly ("org.apache.hive:hive-metastore:$hive_version")
}

configurations.all {
// the fatjar "parquet-hadoop-bundle" conflicts with "parquet-hadoop" used by org.apache.iceberg:iceberg-parquet
exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle"
}

task integrationTest(type: Test) {
group = "Verification"
def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://temp-storage-for-end-to-end-tests/iceberg-hive-it'
systemProperty "beamTestPipelineOptions", JsonOutput.toJson([
"--tempLocation=${gcpTempLocation}",
])

// Disable Gradle cache: these ITs interact with live service that should always be considered "out of date"
outputs.upToDateWhen { false }

include '**/*IT.class'

maxParallelForks 4
classpath = sourceSets.test.runtimeClasspath
testClassesDirs = sourceSets.test.output.classesDirs
}
Loading

0 comments on commit 0dc2c24

Please sign in to comment.