Skip to content

Commit

Permalink
Switch to use unshaded hive-exec for io expansion service
Browse files Browse the repository at this point in the history
* This enables the shadow jar pick up dependencies of newer versions
  • Loading branch information
Abacn committed Dec 11, 2024
1 parent b4fbf89 commit d1fd25f
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 75 deletions.
2 changes: 1 addition & 1 deletion .github/trigger_files/IO_Iceberg_Integration_Tests.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 2
"modification": 3
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 2
"modification": 1
}
2 changes: 1 addition & 1 deletion sdks/java/io/expansion-service/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ dependencies {
runtimeOnly library.java.bigdataoss_gcs_connector
// Needed for HiveCatalog
runtimeOnly ("org.apache.iceberg:iceberg-hive-metastore:1.4.2")
runtimeOnly project(path: ":sdks:java:io:iceberg:hive:exec", configuration: "shadow")
runtimeOnly project(path: ":sdks:java:io:iceberg:hive")

runtimeOnly library.java.kafka_clients
runtimeOnly library.java.slf4j_jdk14
Expand Down
32 changes: 27 additions & 5 deletions sdks/java/io/iceberg/hive/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,35 @@ plugins { id 'org.apache.beam.module' }
applyJavaNature(
automaticModuleName: 'org.apache.beam.sdk.io.iceberg.hive',
exportJavadoc: false,
shadowClosure: {},
validateShadowJar: false, // fails with "Could not receive a message from the daemon.", likely a shadow plugin bug
publish: false, // it's an intermediate jar for io-expansion-service
)

description = "Apache Beam :: SDKs :: Java :: IO :: Iceberg :: Hive"
ext.summary = "Runtime dependencies needed for Hive catalog integration."

def hive_version = "3.1.3"
def hadoop_version = "3.4.1"
def iceberg_version = "1.4.2"

dependencies {
// dependencies needed to run with iceberg's hive catalog
runtimeOnly ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
runtimeOnly project(path: ":sdks:java:io:iceberg:hive:exec", configuration: "shadow")
// these dependencies are going to be included in io-expansion-service
implementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
permitUnusedDeclared ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
// analyzeClassesDependencies fails with "Cannot accept visitor on URL", likely the plugin does not recognize "core" classifier
// use "core" classifier to depend on un-shaded jar
runtimeOnly ("org.apache.hive:hive-exec:$hive_version:core") {
// old hadoop-yarn-server-resourcemanager contains critical log4j vulneribility
exclude group: "org.apache.hadoop", module: "hadoop-yarn-server-resourcemanager"
// old hadoop-yarn-server-resourcemanager contains critical log4j and hadoop vulneribility
exclude group: "org.apache.hbase", module: "hbase-client"
}
runtimeOnly ("org.apache.hadoop:hadoop-yarn-server-resourcemanager:$hadoop_version")
runtimeOnly ("org.apache.hbase:hbase-client:2.6.1-hadoop3")
implementation ("org.apache.hive:hive-metastore:$hive_version")
runtimeOnly ("org.apache.iceberg:iceberg-parquet:$iceberg_version")
permitUnusedDeclared ("org.apache.hive:hive-metastore:$hive_version")

// ----- below dependencies are for testing and will not appear in the shaded jar -----
// Beam IcebergIO dependencies
Expand All @@ -52,8 +68,9 @@ dependencies {
testImplementation library.java.junit

// needed to set up test Hive Metastore and run tests
testImplementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
testImplementation project(path: ":sdks:java:io:iceberg:hive:exec", configuration: "shadow")
// testImplementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
testImplementation ("org.apache.hive:hive-exec:$hive_version:core")
// testImplementation ("org.apache.hive:hive-metastore:$hive_version")
testRuntimeOnly ("org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version") {
exclude group: "org.apache.hive", module: "hive-exec"
exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle"
Expand All @@ -62,6 +79,11 @@ dependencies {
testImplementation "org.apache.parquet:parquet-column:1.12.0"
}

configurations.all {
// the fatjar "parquet-hadoop-bundle" conflicts with "parquet-hadoop" used by org.apache.iceberg:iceberg-parquet
exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle"
}

task integrationTest(type: Test) {
group = "Verification"
def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://temp-storage-for-end-to-end-tests/iceberg-hive-it'
Expand Down
65 changes: 0 additions & 65 deletions sdks/java/io/iceberg/hive/exec/build.gradle

This file was deleted.

2 changes: 0 additions & 2 deletions settings.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -357,5 +357,3 @@ include("sdks:java:extensions:combiners")
findProject(":sdks:java:extensions:combiners")?.name = "combiners"
include("sdks:java:io:iceberg:hive")
findProject(":sdks:java:io:iceberg:hive")?.name = "hive"
include("sdks:java:io:iceberg:hive:exec")
findProject(":sdks:java:io:iceberg:hive:exec")?.name = "exec"

0 comments on commit d1fd25f

Please sign in to comment.