Skip to content

Commit

Permalink
Managed Iceberg hive support and integration tests (#32052)
Browse files Browse the repository at this point in the history
* iceberg hive support and integration tests

* split read and write tests; cleanup

* add test documentation

* extend new config_properties arg to translation tests

* revert beam schema override

* actually run hive ITs

* trigger integration tests

* cut down hive database source lines
  • Loading branch information
ahmedabu98 authored Aug 9, 2024
1 parent 17298b5 commit b21a84a
Show file tree
Hide file tree
Showing 21 changed files with 1,423 additions and 143 deletions.
2 changes: 1 addition & 1 deletion .github/trigger_files/IO_Iceberg_Integration_Tests.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 3
"modification": 4
}
2 changes: 1 addition & 1 deletion .github/workflows/IO_Iceberg_Integration_Tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,4 +75,4 @@ jobs:
- name: Run IcebergIO Integration Test
uses: ./.github/actions/gradle-command-self-hosted-action
with:
gradle-command: :sdks:java:io:iceberg:integrationTest
gradle-command: :sdks:java:io:iceberg:catalogTests
5 changes: 5 additions & 0 deletions sdks/java/io/iceberg/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,11 @@ task integrationTest(type: Test) {
testClassesDirs = sourceSets.test.output.classesDirs
}

tasks.register('catalogTests') {
dependsOn integrationTest
dependsOn ":sdks:java:io:iceberg:hive:integrationTest"
}

task loadTest(type: Test) {
def gcpProject = project.findProperty('gcpProject') ?: 'apache-beam-testing'
def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://temp-storage-for-end-to-end-tests/temp-lt'
Expand Down
80 changes: 80 additions & 0 deletions sdks/java/io/iceberg/hive/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import groovy.json.JsonOutput

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
plugins { id 'org.apache.beam.module' }
applyJavaNature(
automaticModuleName: 'org.apache.beam.sdk.io.iceberg.hive',
exportJavadoc: false,
shadowClosure: {},
)

description = "Apache Beam :: SDKs :: Java :: IO :: Iceberg :: Hive"
ext.summary = "Runtime dependencies needed for Hive catalog integration."

def hive_version = "3.1.3"
def iceberg_version = "1.4.2"

dependencies {
// dependencies needed to run with iceberg's hive catalog
runtimeOnly ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
runtimeOnly project(path: ":sdks:java:io:iceberg:hive:exec", configuration: "shadow")
runtimeOnly library.java.bigdataoss_gcs_connector
runtimeOnly library.java.hadoop_client

// ----- below dependencies are for testing and will not appear in the shaded jar -----
// Beam IcebergIO dependencies
testImplementation project(path: ":sdks:java:core", configuration: "shadow")
testImplementation project(":sdks:java:managed")
testImplementation project(":sdks:java:io:iceberg")
testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow")
testRuntimeOnly library.java.snake_yaml

// needed to set up the test environment
testImplementation "org.apache.iceberg:iceberg-common:$iceberg_version"
testImplementation "org.apache.iceberg:iceberg-core:$iceberg_version"
testImplementation "org.assertj:assertj-core:3.11.1"
testImplementation library.java.junit

// needed to set up test Hive Metastore and run tests
testImplementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
testImplementation project(path: ":sdks:java:io:iceberg:hive:exec", configuration: "shadow")
testRuntimeOnly ("org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version") {
exclude group: "org.apache.hive", module: "hive-exec"
exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle"
}
testImplementation "org.apache.iceberg:iceberg-parquet:$iceberg_version"
testImplementation "org.apache.parquet:parquet-column:1.12.0"
}

task integrationTest(type: Test) {
group = "Verification"
def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://temp-storage-for-end-to-end-tests/iceberg-hive-it'
systemProperty "beamTestPipelineOptions", JsonOutput.toJson([
"--tempLocation=${gcpTempLocation}",
])

// Disable Gradle cache: these ITs interact with live service that should always be considered "out of date"
outputs.upToDateWhen { false }

include '**/*IT.class'

maxParallelForks 4
classpath = sourceSets.test.runtimeClasspath
testClassesDirs = sourceSets.test.output.classesDirs
}
58 changes: 58 additions & 0 deletions sdks/java/io/iceberg/hive/exec/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
plugins {
id 'org.apache.beam.module'
id 'java'
id 'com.github.johnrengelman.shadow'
}

dependencies {
implementation("org.apache.hive:hive-exec:3.1.3")
permitUnusedDeclared("org.apache.hive:hive-exec:3.1.3")
}

configurations {
shadow
}

artifacts {
shadow(archives(shadowJar) {
builtBy shadowJar
})
}

shadowJar {
zip64 true

// need to shade "com.google.guava" to avoid Guava conflict
relocate 'com.google.protobuf', getJavaRelocatedPath('com.google.protobuf')
relocate 'shaded.parquet', getJavaRelocatedPath('shaded.parquet')
relocate 'org.apache.parquet', getJavaRelocatedPath('org.apache.parquet')

version "3.1.3"
mergeServiceFiles()

exclude 'LICENSE'
exclude(
'org/xml/**',
'javax/**',
'com/sun/**'
)
}
description = "Apache Beam :: SDKs :: Java :: IO :: Iceberg :: Hive :: Exec"
ext.summary = "A copy of the hive-exec dependency with some popular libraries relocated."
Loading

0 comments on commit b21a84a

Please sign in to comment.