Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to use unshaded hive-exec for io expansion service #33349

Merged
merged 2 commits into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/trigger_files/IO_Iceberg_Integration_Tests.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 2
"modification": 3
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"comment": "Modify this file in a trivial way to cause this test suite to run",
"modification": 2
"modification": 1
}
3 changes: 2 additions & 1 deletion sdks/java/io/expansion-service/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,11 @@ dependencies {
runtimeOnly library.java.bigdataoss_gcs_connector
// Needed for HiveCatalog
runtimeOnly ("org.apache.iceberg:iceberg-hive-metastore:1.4.2")
runtimeOnly project(path: ":sdks:java:io:iceberg:hive:exec", configuration: "shadow")
runtimeOnly project(path: ":sdks:java:io:iceberg:hive")

runtimeOnly library.java.kafka_clients
runtimeOnly library.java.slf4j_jdk14
testImplementation(library.java.junit)
}

task runExpansionService (type: JavaExec) {
Expand Down
34 changes: 29 additions & 5 deletions sdks/java/io/iceberg/hive/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,40 @@ plugins { id 'org.apache.beam.module' }
applyJavaNature(
automaticModuleName: 'org.apache.beam.sdk.io.iceberg.hive',
exportJavadoc: false,
shadowClosure: {},
validateShadowJar: false, // fails with "Could not receive a message from the daemon.", likely a shadow plugin bug
Abacn marked this conversation as resolved.
Show resolved Hide resolved
publish: false, // it's an intermediate jar for io-expansion-service
)

description = "Apache Beam :: SDKs :: Java :: IO :: Iceberg :: Hive"
ext.summary = "Runtime dependencies needed for Hive catalog integration."

def hive_version = "3.1.3"
def hbase_version = "2.6.1-hadoop3"
def hadoop_version = "3.4.1"
def iceberg_version = "1.4.2"
def avatica_version = "1.25.0"

dependencies {
// dependencies needed to run with iceberg's hive catalog
runtimeOnly ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
runtimeOnly project(path: ":sdks:java:io:iceberg:hive:exec", configuration: "shadow")
// these dependencies are going to be included in io-expansion-service
implementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
permitUnusedDeclared ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
// analyzeClassesDependencies fails with "Cannot accept visitor on URL", likely the plugin does not recognize "core" classifier
// use "core" classifier to depend on un-shaded jar
runtimeOnly ("org.apache.hive:hive-exec:$hive_version:core") {
// old hadoop-yarn-server-resourcemanager contains critical log4j vulneribility
exclude group: "org.apache.hadoop", module: "hadoop-yarn-server-resourcemanager"
// old hadoop-yarn-server-resourcemanager contains critical log4j and hadoop vulneribility
exclude group: "org.apache.hbase", module: "hbase-client"
// old calcite leaks old protobuf-java
exclude group: "org.apache.calcite.avatica", module: "avatica"
}
runtimeOnly ("org.apache.hadoop:hadoop-yarn-server-resourcemanager:$hadoop_version")
runtimeOnly ("org.apache.hbase:hbase-client:$hbase_version")
runtimeOnly ("org.apache.calcite.avatica:avatica-core:$avatica_version")
implementation ("org.apache.hive:hive-metastore:$hive_version")
runtimeOnly ("org.apache.iceberg:iceberg-parquet:$iceberg_version")
permitUnusedDeclared ("org.apache.hive:hive-metastore:$hive_version")

// ----- below dependencies are for testing and will not appear in the shaded jar -----
// Beam IcebergIO dependencies
Expand All @@ -52,8 +73,6 @@ dependencies {
testImplementation library.java.junit

// needed to set up test Hive Metastore and run tests
testImplementation ("org.apache.iceberg:iceberg-hive-metastore:$iceberg_version")
testImplementation project(path: ":sdks:java:io:iceberg:hive:exec", configuration: "shadow")
testRuntimeOnly ("org.apache.hive.hcatalog:hive-hcatalog-core:$hive_version") {
exclude group: "org.apache.hive", module: "hive-exec"
exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle"
Expand All @@ -62,6 +81,11 @@ dependencies {
testImplementation "org.apache.parquet:parquet-column:1.12.0"
}

configurations.all {
// the fatjar "parquet-hadoop-bundle" conflicts with "parquet-hadoop" used by org.apache.iceberg:iceberg-parquet
exclude group: "org.apache.parquet", module: "parquet-hadoop-bundle"
}

task integrationTest(type: Test) {
group = "Verification"
def gcpTempLocation = project.findProperty('gcpTempLocation') ?: 'gs://temp-storage-for-end-to-end-tests/iceberg-hive-it'
Expand Down
65 changes: 0 additions & 65 deletions sdks/java/io/iceberg/hive/exec/build.gradle

This file was deleted.

2 changes: 0 additions & 2 deletions settings.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -357,5 +357,3 @@ include("sdks:java:extensions:combiners")
findProject(":sdks:java:extensions:combiners")?.name = "combiners"
include("sdks:java:io:iceberg:hive")
findProject(":sdks:java:io:iceberg:hive")?.name = "hive"
include("sdks:java:io:iceberg:hive:exec")
findProject(":sdks:java:io:iceberg:hive:exec")?.name = "exec"
Loading