From 8a002557d0e1061159d405b10328729bb56a69e2 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 20 Nov 2024 12:10:12 +0000 Subject: [PATCH 01/65] Updated to BK-core --- beekeeper-core/pom.xml | 53 +++++++++++++++++++ .../core/checker/IcebergTableChecker.java | 44 +++++++++++++++ .../beekeeper/core/config/CoreBeans.java | 43 +++++++++++++++ 3 files changed, 140 insertions(+) create mode 100644 beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/checker/IcebergTableChecker.java create mode 100644 beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/config/CoreBeans.java diff --git a/beekeeper-core/pom.xml b/beekeeper-core/pom.xml index 58e3760d..73519fcc 100644 --- a/beekeeper-core/pom.xml +++ b/beekeeper-core/pom.xml @@ -10,6 +10,11 @@ beekeeper-core + + 2.3.7 + 1.4.2 + + ch.qos.logback @@ -82,6 +87,54 @@ spring-test test + + + + org.apache.hive + hive-metastore + ${hive.version} + + + org.apache.hbase + hbase-client + + + org.slf4j + slf4j-log4j12 + + + org.apache.logging.log4j + log4j-slf4j-impl + + + junit + junit + + + org.eclipse.jetty.aggregate + jetty-all + + + org.eclipse.jetty.orbit + javax.servlet + + + javax.servlet + servlet-api + + + + + com.hotels + hcommon-hive-metastore + ${hcommon-hive-metastore.version} + + + net.java.dev.jna + jna + + + diff --git a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/checker/IcebergTableChecker.java b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/checker/IcebergTableChecker.java new file mode 100644 index 00000000..28edb6e0 --- /dev/null +++ b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/checker/IcebergTableChecker.java @@ -0,0 +1,44 @@ +package com.expediagroup.beekeeper.core.checker; + +import java.util.Map; +import java.util.function.Supplier; + +import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; +import org.apache.hadoop.hive.metastore.api.Table; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.expediagroup.beekeeper.core.error.BeekeeperException; +import com.hotels.hcommon.hive.metastore.client.api.CloseableMetaStoreClient; + +public class IcebergTableChecker { + + private static final Logger log = LoggerFactory.getLogger(IcebergTableChecker.class); + + private final Supplier metaStoreClientSupplier; + + public IcebergTableChecker(Supplier metaStoreClientSupplier) { + this.metaStoreClientSupplier = metaStoreClientSupplier; + } + + public boolean isIcebergTable(String databaseName, String tableName) { + try (CloseableMetaStoreClient client = metaStoreClientSupplier.get()) { + Table table = client.getTable(databaseName, tableName); + + // Extract table parameters and storage descriptor properties + Map parameters = table.getParameters(); + String tableType = parameters.getOrDefault("table_type", "").toLowerCase(); + String format = parameters.getOrDefault("format", "").toLowerCase(); + String outputFormat = table.getSd().getOutputFormat().toLowerCase(); + + // Check if any of the fields indicate Iceberg + return tableType.contains("iceberg") || format.contains("iceberg") || outputFormat.contains("iceberg"); + + } catch (NoSuchObjectException e) { + log.warn("Table {}.{} does not exist.", databaseName, tableName); + return false; + } catch (Exception e) { + throw new BeekeeperException("Error checking if table is Iceberg", e); + } + } +} diff --git a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/config/CoreBeans.java b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/config/CoreBeans.java new file mode 100644 index 00000000..056fe80f --- /dev/null +++ b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/config/CoreBeans.java @@ -0,0 +1,43 @@ +package com.expediagroup.beekeeper.core.config; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +import com.expediagroup.beekeeper.core.checker.IcebergTableChecker; + +import com.hotels.hcommon.hive.metastore.client.api.CloseableMetaStoreClient; +import com.hotels.hcommon.hive.metastore.client.closeable.CloseableMetaStoreClientFactory; +import com.hotels.hcommon.hive.metastore.client.supplier.HiveMetaStoreClientSupplier; + +import java.util.function.Supplier; + +@Configuration +public class CoreBeans { + + @Bean + public HiveConf hiveConf(@Value("${properties.metastore-uri}") String metastoreUri) { + HiveConf hiveConf = new HiveConf(); + hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, metastoreUri); + return hiveConf; + } + + @Bean + public CloseableMetaStoreClientFactory metaStoreClientFactory() { + return new CloseableMetaStoreClientFactory(); + } + + @Bean + public Supplier metaStoreClientSupplier( + CloseableMetaStoreClientFactory metaStoreClientFactory, + HiveConf hiveConf) { + String name = "beekeeper-core"; + return new HiveMetaStoreClientSupplier(metaStoreClientFactory, hiveConf, name); + } + + @Bean + public IcebergTableChecker icebergTableChecker(Supplier metaStoreClientSupplier) { + return new IcebergTableChecker(metaStoreClientSupplier); + } +} From 6baeee4776315b7bfe2617ebbecbd5602158480d Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 20 Nov 2024 12:10:38 +0000 Subject: [PATCH 02/65] Updated to path-cleanup --- beekeeper-path-cleanup/pom.xml | 5 + .../path/cleanup/context/CommonBeans.java | 4 + .../cleanup/handler/GenericPathHandler.java | 8 +- .../handler/UnreferencedPathHandler.java | 31 ++++- .../handler/GenericPathHandlerTest.java | 5 +- .../handler/UnreferencedPathHandlerTest.java | 126 +++++++++++++++++- 6 files changed, 172 insertions(+), 7 deletions(-) diff --git a/beekeeper-path-cleanup/pom.xml b/beekeeper-path-cleanup/pom.xml index ae9c43f3..e74ba2db 100644 --- a/beekeeper-path-cleanup/pom.xml +++ b/beekeeper-path-cleanup/pom.xml @@ -28,6 +28,11 @@ + + com.expediagroup + beekeeper-core + ${project.version} + ch.qos.logback diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java index 0b118243..45a6c5da 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java @@ -22,10 +22,13 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; +import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Profile; import org.springframework.data.jpa.repository.config.EnableJpaRepositories; import org.springframework.scheduling.annotation.EnableScheduling; +import com.expediagroup.beekeeper.core.config.CoreBeans; + import io.micrometer.core.instrument.MeterRegistry; import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; @@ -50,6 +53,7 @@ @ComponentScan({ "com.expediagroup.beekeeper.core", "com.expediagroup.beekeeper.cleanup" }) @EntityScan(basePackages = { "com.expediagroup.beekeeper.core.model" }) @EnableJpaRepositories(basePackages = { "com.expediagroup.beekeeper.core.repository" }) +@Import(CoreBeans.class) public class CommonBeans { @Bean diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java index 30442c22..173c20f7 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java @@ -31,7 +31,7 @@ public abstract class GenericPathHandler { - private final Logger log = LoggerFactory.getLogger(GenericPathHandler.class); + protected final Logger log = LoggerFactory.getLogger(GenericPathHandler.class); private final HousekeepingPathRepository housekeepingPathRepository; private final PathCleaner pathCleaner; @@ -75,7 +75,7 @@ private boolean cleanUpPath(HousekeepingPath housekeepingPath) { return false; } - private void cleanupContent(HousekeepingPath housekeepingPath) { + protected void cleanupContent(HousekeepingPath housekeepingPath) { try { log.info("Cleaning up path \"{}\"", housekeepingPath.getPath()); if (cleanUpPath(housekeepingPath)) { @@ -89,13 +89,13 @@ private void cleanupContent(HousekeepingPath housekeepingPath) { } } - private void updateAttemptsAndStatus(HousekeepingPath housekeepingPath, HousekeepingStatus status) { + protected void updateAttemptsAndStatus(HousekeepingPath housekeepingPath, HousekeepingStatus status) { housekeepingPath.setCleanupAttempts(housekeepingPath.getCleanupAttempts() + 1); housekeepingPath.setHousekeepingStatus(status); housekeepingPathRepository.save(housekeepingPath); } - private void updateStatus(HousekeepingPath housekeepingPath, HousekeepingStatus status) { + protected void updateStatus(HousekeepingPath housekeepingPath, HousekeepingStatus status) { housekeepingPath.setHousekeepingStatus(status); housekeepingPathRepository.save(housekeepingPath); } diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandler.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandler.java index 82d68b69..910fd38b 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandler.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandler.java @@ -24,24 +24,53 @@ import org.springframework.stereotype.Component; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; +import com.expediagroup.beekeeper.core.checker.IcebergTableChecker; import com.expediagroup.beekeeper.core.model.HousekeepingPath; +import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; @Component public class UnreferencedPathHandler extends GenericPathHandler { private final HousekeepingPathRepository housekeepingPathRepository; + private final IcebergTableChecker icebergTableChecker; @Autowired public UnreferencedPathHandler( HousekeepingPathRepository housekeepingPathRepository, - @Qualifier("s3PathCleaner") PathCleaner pathCleaner) { + @Qualifier("s3PathCleaner") PathCleaner pathCleaner, + IcebergTableChecker icebergTableChecker) { super(housekeepingPathRepository, pathCleaner); this.housekeepingPathRepository = housekeepingPathRepository; + this.icebergTableChecker = icebergTableChecker; } @Override public Slice findRecordsToClean(LocalDateTime instant, Pageable pageable) { return housekeepingPathRepository.findRecordsForCleanup(instant, pageable); } + + @Override + protected void cleanupContent(HousekeepingPath housekeepingPath) { // extends method from generic handler + String databaseName = housekeepingPath.getDatabaseName(); + String tableName = housekeepingPath.getTableName(); + + if (databaseName == null || tableName == null) { + super.cleanupContent(housekeepingPath); // if no table info delegate process to parent class + return; + } + + try { + if (icebergTableChecker.isIcebergTable(databaseName, tableName)) { + updateStatus(housekeepingPath, HousekeepingStatus.SKIPPED); + log.info("Skipped cleanup for Iceberg table: {}.{}", databaseName, tableName); + return; + } + + super.cleanupContent(housekeepingPath); // If not an Iceberg table, proceed with the default cleanup logic. is this ok?? + } catch (Exception e) { + updateAttemptsAndStatus(housekeepingPath, HousekeepingStatus.FAILED); // Mark the path as FAILED + log.warn("Failed to check if table {}.{} is Iceberg", databaseName, tableName, e); + } + } } diff --git a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandlerTest.java b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandlerTest.java index aa66d192..27ebda6a 100644 --- a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandlerTest.java +++ b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandlerTest.java @@ -36,6 +36,7 @@ import org.springframework.data.domain.Pageable; import com.expediagroup.beekeeper.cleanup.aws.S3PathCleaner; +import com.expediagroup.beekeeper.core.checker.IcebergTableChecker; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; @@ -47,6 +48,8 @@ public class GenericPathHandlerTest { @Mock private S3PathCleaner pathCleaner; @Mock + private IcebergTableChecker icebergTableChecker; + @Mock private HousekeepingPath mockPath; @Mock private Pageable mockPageable; @@ -60,7 +63,7 @@ public class GenericPathHandlerTest { @BeforeEach public void initTest() { - handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); + handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); when(mockPath.getPath()).thenReturn(VALID_TABLE_PATH); } diff --git a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandlerTest.java b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandlerTest.java index c00e9ca6..6703896e 100644 --- a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandlerTest.java +++ b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandlerTest.java @@ -16,10 +16,14 @@ package com.expediagroup.beekeeper.path.cleanup.handler; import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; +import static org.mockito.Mockito.never; import static com.expediagroup.beekeeper.core.model.LifecycleEventType.UNREFERENCED; +import java.time.Duration; import java.time.LocalDateTime; import org.junit.jupiter.api.BeforeEach; @@ -31,7 +35,11 @@ import org.springframework.data.domain.Pageable; import com.expediagroup.beekeeper.cleanup.aws.S3PathCleaner; +import com.expediagroup.beekeeper.core.checker.IcebergTableChecker; +import com.expediagroup.beekeeper.core.model.HousekeepingPath; +import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.core.model.LifecycleEventType; +import com.expediagroup.beekeeper.core.model.PeriodDuration; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; @ExtendWith(MockitoExtension.class) @@ -41,13 +49,15 @@ public class UnreferencedPathHandlerTest { private HousekeepingPathRepository housekeepingPathRepository; @Mock private S3PathCleaner s3PathCleaner; + @Mock + private IcebergTableChecker icebergTableChecker; private LifecycleEventType lifecycleEventType = UNREFERENCED; private UnreferencedPathHandler handler; @BeforeEach public void initTest() { - handler = new UnreferencedPathHandler(housekeepingPathRepository, s3PathCleaner); + handler = new UnreferencedPathHandler(housekeepingPathRepository, s3PathCleaner, icebergTableChecker); } @Test @@ -67,4 +77,118 @@ public void verifyHousekeepingPathFetch() { handler.findRecordsToClean(now, emptyPageable); verify(housekeepingPathRepository).findRecordsForCleanup(now, emptyPageable); } + + @Test + public void cleanupContent_WithNullDatabaseNameAndTableName_ShouldDelegateToGenericPathHandler() { + HousekeepingPath pathWithNulls = createHousekeepingPath("s3://bucket/null_table", null, null); + + handler.cleanupContent(pathWithNulls); + + verify(s3PathCleaner).cleanupPath(pathWithNulls); + verify(housekeepingPathRepository).save(pathWithNulls); + // Assert that the status is set to DELETED by the superclass + assertThat(pathWithNulls.getHousekeepingStatus()).isEqualTo(HousekeepingStatus.DELETED); + } + + @Test + public void cleanupContent_IcebergTable_ShouldSkipCleanup() { + HousekeepingPath icebergPath = createHousekeepingPath("s3://bucket/iceberg_table", "database", "iceberg_table"); + + when(icebergTableChecker.isIcebergTable("database", "iceberg_table")).thenReturn(true); + + handler.cleanupContent(icebergPath); + // verify that pathCleaner is not called and cleanup is skipped for Iceberg tables + verify(s3PathCleaner, never()).cleanupPath(any(HousekeepingPath.class)); + verify(housekeepingPathRepository).save(icebergPath); + assertThat(icebergPath.getHousekeepingStatus()).isEqualTo(HousekeepingStatus.SKIPPED); + } + + @Test + public void cleanupContent_NonIcebergTable_ShouldProceedWithCleanup() { + HousekeepingPath nonIcebergPath = createHousekeepingPath("s3://bucket/non_iceberg_table", "database", "non_iceberg_table"); + + // Mock icebergTbleChcker to return false + when(icebergTableChecker.isIcebergTable("database", "non_iceberg_table")).thenReturn(false); + + handler.cleanupContent(nonIcebergPath); + + verify(s3PathCleaner).cleanupPath(nonIcebergPath); + verify(housekeepingPathRepository).save(nonIcebergPath); + assertThat(nonIcebergPath.getHousekeepingStatus()).isEqualTo(HousekeepingStatus.DELETED); + } + + @Test + public void cleanupContent_IcebergTableCheckThrowsException_ShouldSetStatusToFailed() { + HousekeepingPath errorPath = createHousekeepingPath("s3://bucket/error_table", "database", "error_table"); + + // Mock the IcebergTableChecker to throw an exception + when(icebergTableChecker.isIcebergTable("database", "error_table")) + .thenThrow(new RuntimeException("Iceberg check failed")); + + handler.cleanupContent(errorPath); + + verify(s3PathCleaner, never()).cleanupPath(any(HousekeepingPath.class)); + verify(housekeepingPathRepository).save(errorPath); + assertThat(errorPath.getHousekeepingStatus()).isEqualTo(HousekeepingStatus.FAILED); + assertThat(errorPath.getCleanupAttempts()).isEqualTo(1); + } + + @Test + public void cleanupContent_IcebergTable_ShouldNotIncrementCleanupAttempts() { + HousekeepingPath icebergPath = createHousekeepingPath("s3://bucket/iceberg_table", "database", "iceberg_table"); + + when(icebergTableChecker.isIcebergTable("database", "iceberg_table")).thenReturn(true); + + handler.cleanupContent(icebergPath); + + assertThat(icebergPath.getCleanupAttempts()).isEqualTo(0); + } + + @Test + public void cleanupContent_NonIcebergTable_ShouldIncrementCleanupAttempts() { + HousekeepingPath nonIcebergPath = createHousekeepingPath("s3://bucket/non_iceberg_table", "database", "non_iceberg_table"); + + when(icebergTableChecker.isIcebergTable("database", "non_iceberg_table")).thenReturn(false); + + handler.cleanupContent(nonIcebergPath); + + assertThat(nonIcebergPath.getCleanupAttempts()).isEqualTo(1); + } + + @Test + public void cleanupContent_MultiplePaths_ShouldHandleEachAccordingly() { + HousekeepingPath icebergPath = createHousekeepingPath("s3://bucket/iceberg_table", "database", "iceberg_table"); + HousekeepingPath nonIcebergPath = createHousekeepingPath("s3://bucket/non_iceberg_table", "database", "non_iceberg_table"); + + when(icebergTableChecker.isIcebergTable("database", "iceberg_table")).thenReturn(true); + when(icebergTableChecker.isIcebergTable("database", "non_iceberg_table")).thenReturn(false); + + handler.cleanupContent(icebergPath); + handler.cleanupContent(nonIcebergPath); + + // Iceberg Path: cleanup skipped + verify(s3PathCleaner, never()).cleanupPath(icebergPath); + verify(housekeepingPathRepository).save(icebergPath); + assertThat(icebergPath.getHousekeepingStatus()).isEqualTo(HousekeepingStatus.SKIPPED); + assertThat(icebergPath.getCleanupAttempts()).isEqualTo(0); + + // Non-Iceberg Path: cleanup proceeded + verify(s3PathCleaner).cleanupPath(nonIcebergPath); + verify(housekeepingPathRepository).save(nonIcebergPath); + assertThat(nonIcebergPath.getHousekeepingStatus()).isEqualTo(HousekeepingStatus.DELETED); + assertThat(nonIcebergPath.getCleanupAttempts()).isEqualTo(1); + } + + private HousekeepingPath createHousekeepingPath(String path, String databaseName, String tableName) { + return HousekeepingPath.builder() + .path(path) + .databaseName(databaseName) + .tableName(tableName) + .housekeepingStatus(HousekeepingStatus.SCHEDULED) + .creationTimestamp(LocalDateTime.now()) + .cleanupDelay(PeriodDuration.of(Duration.ofDays(3))) // Example: 3 days delay + .cleanupAttempts(0) + .lifecycleType(UNREFERENCED.toString()) + .build(); + } } From 7090dc43a52a773937b54de76e66d4c57e46bd5d Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 20 Nov 2024 12:14:42 +0000 Subject: [PATCH 03/65] Update PagingCleanupServiceTest.java --- .../cleanup/service/PagingCleanupServiceTest.java | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/service/PagingCleanupServiceTest.java b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/service/PagingCleanupServiceTest.java index db654ed4..00279f22 100644 --- a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/service/PagingCleanupServiceTest.java +++ b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/service/PagingCleanupServiceTest.java @@ -50,6 +50,7 @@ import com.google.common.collect.Lists; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; +import com.expediagroup.beekeeper.core.checker.IcebergTableChecker; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.core.model.PeriodDuration; @@ -74,9 +75,10 @@ public class PagingCleanupServiceTest { private @Autowired HousekeepingPathRepository housekeepingPathRepository; private @MockBean PathCleaner pathCleaner; + private @MockBean IcebergTableChecker icebergTableChecker; @Test public void typicalWithPaging() { - UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); + UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); pagingCleanupService = new PagingPathCleanupService(List.of(handler), 2, false); List paths = List.of("s3://bucket/some_foo", "s3://bucket/some_bar", "s3://bucket/some_foobar"); @@ -97,7 +99,7 @@ public void typicalWithPaging() { @Test public void mixOfScheduledAndFailedPaths() { - UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); + UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); pagingCleanupService = new PagingPathCleanupService(List.of(handler), 2, false); List paths = List .of(createEntityHousekeepingPath("s3://bucket/some_foo", SCHEDULED), @@ -113,7 +115,7 @@ public void mixOfScheduledAndFailedPaths() { @Test public void mixOfAllPaths() { - UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); + UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); pagingCleanupService = new PagingPathCleanupService(List.of(handler), 2, false); List paths = List .of(createEntityHousekeepingPath("s3://bucket/some_foo", SCHEDULED), @@ -130,7 +132,7 @@ public void mixOfAllPaths() { @Test void pathCleanerException() { - UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); + UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); pagingCleanupService = new PagingPathCleanupService(List.of(handler), 2, false); doThrow(new RuntimeException("Error")).doNothing().when(pathCleaner).cleanupPath(any(HousekeepingPath.class)); @@ -158,7 +160,7 @@ void pathCleanerException() { @Test @Timeout(value = 10) void doNotInfiniteLoopOnRepeatedFailures() { - UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); + UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); pagingCleanupService = new PagingPathCleanupService(List.of(handler), 1, false); List paths = List .of(createEntityHousekeepingPath("s3://bucket/some_foo", FAILED), @@ -186,7 +188,7 @@ void doNotInfiniteLoopOnRepeatedFailures() { @Test @Timeout(value = 10) void doNotInfiniteLoopOnDryRunCleanup() { - UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); + UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); pagingCleanupService = new PagingPathCleanupService(List.of(handler), 1, true); List paths = List .of(createEntityHousekeepingPath("s3://bucket/some_foo", SCHEDULED), @@ -211,7 +213,6 @@ private HousekeepingPath createEntityHousekeepingPath(String path, HousekeepingS .housekeepingStatus(housekeepingStatus) .creationTimestamp(localNow) .modifiedTimestamp(localNow) - .modifiedTimestamp(localNow) .cleanupDelay(PeriodDuration.of(Duration.parse("P3D"))) .cleanupAttempts(0) .lifecycleType(UNREFERENCED.toString()) From 0f99aa69d6639161f7989a116b24ac15b7de429a Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Wed, 20 Nov 2024 13:42:11 +0100 Subject: [PATCH 04/65] cleanup --- .../core/checker/IcebergTableChecker.java | 12 +- .../beekeeper/core/config/CoreBeans.java | 43 ------ beekeeper-path-cleanup/pom.xml | 5 - .../path/cleanup/context/CommonBeans.java | 4 - .../cleanup/handler/GenericPathHandler.java | 8 +- .../handler/UnreferencedPathHandler.java | 31 +---- .../handler/GenericPathHandlerTest.java | 5 +- .../handler/UnreferencedPathHandlerTest.java | 126 +----------------- 8 files changed, 11 insertions(+), 223 deletions(-) delete mode 100644 beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/config/CoreBeans.java diff --git a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/checker/IcebergTableChecker.java b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/checker/IcebergTableChecker.java index 28edb6e0..cf232973 100644 --- a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/checker/IcebergTableChecker.java +++ b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/checker/IcebergTableChecker.java @@ -21,22 +21,18 @@ public IcebergTableChecker(Supplier metaStoreClientSup this.metaStoreClientSupplier = metaStoreClientSupplier; } - public boolean isIcebergTable(String databaseName, String tableName) { + public void throwExceptionIfIceberg(String databaseName, String tableName) { try (CloseableMetaStoreClient client = metaStoreClientSupplier.get()) { Table table = client.getTable(databaseName, tableName); - // Extract table parameters and storage descriptor properties Map parameters = table.getParameters(); String tableType = parameters.getOrDefault("table_type", "").toLowerCase(); String format = parameters.getOrDefault("format", "").toLowerCase(); String outputFormat = table.getSd().getOutputFormat().toLowerCase(); - // Check if any of the fields indicate Iceberg - return tableType.contains("iceberg") || format.contains("iceberg") || outputFormat.contains("iceberg"); - - } catch (NoSuchObjectException e) { - log.warn("Table {}.{} does not exist.", databaseName, tableName); - return false; + if (tableType.contains("iceberg") || format.contains("iceberg") || outputFormat.contains("iceberg")) { + throw new BeekeeperException("Iceberg tables are not currently supported in Beekeeper"); + } } catch (Exception e) { throw new BeekeeperException("Error checking if table is Iceberg", e); } diff --git a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/config/CoreBeans.java b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/config/CoreBeans.java deleted file mode 100644 index 056fe80f..00000000 --- a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/config/CoreBeans.java +++ /dev/null @@ -1,43 +0,0 @@ -package com.expediagroup.beekeeper.core.config; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.context.annotation.Bean; -import org.springframework.context.annotation.Configuration; - -import com.expediagroup.beekeeper.core.checker.IcebergTableChecker; - -import com.hotels.hcommon.hive.metastore.client.api.CloseableMetaStoreClient; -import com.hotels.hcommon.hive.metastore.client.closeable.CloseableMetaStoreClientFactory; -import com.hotels.hcommon.hive.metastore.client.supplier.HiveMetaStoreClientSupplier; - -import java.util.function.Supplier; - -@Configuration -public class CoreBeans { - - @Bean - public HiveConf hiveConf(@Value("${properties.metastore-uri}") String metastoreUri) { - HiveConf hiveConf = new HiveConf(); - hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, metastoreUri); - return hiveConf; - } - - @Bean - public CloseableMetaStoreClientFactory metaStoreClientFactory() { - return new CloseableMetaStoreClientFactory(); - } - - @Bean - public Supplier metaStoreClientSupplier( - CloseableMetaStoreClientFactory metaStoreClientFactory, - HiveConf hiveConf) { - String name = "beekeeper-core"; - return new HiveMetaStoreClientSupplier(metaStoreClientFactory, hiveConf, name); - } - - @Bean - public IcebergTableChecker icebergTableChecker(Supplier metaStoreClientSupplier) { - return new IcebergTableChecker(metaStoreClientSupplier); - } -} diff --git a/beekeeper-path-cleanup/pom.xml b/beekeeper-path-cleanup/pom.xml index e74ba2db..ae9c43f3 100644 --- a/beekeeper-path-cleanup/pom.xml +++ b/beekeeper-path-cleanup/pom.xml @@ -28,11 +28,6 @@ - - com.expediagroup - beekeeper-core - ${project.version} - ch.qos.logback diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java index 45a6c5da..0b118243 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java @@ -22,13 +22,10 @@ import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.ComponentScan; import org.springframework.context.annotation.Configuration; -import org.springframework.context.annotation.Import; import org.springframework.context.annotation.Profile; import org.springframework.data.jpa.repository.config.EnableJpaRepositories; import org.springframework.scheduling.annotation.EnableScheduling; -import com.expediagroup.beekeeper.core.config.CoreBeans; - import io.micrometer.core.instrument.MeterRegistry; import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; @@ -53,7 +50,6 @@ @ComponentScan({ "com.expediagroup.beekeeper.core", "com.expediagroup.beekeeper.cleanup" }) @EntityScan(basePackages = { "com.expediagroup.beekeeper.core.model" }) @EnableJpaRepositories(basePackages = { "com.expediagroup.beekeeper.core.repository" }) -@Import(CoreBeans.class) public class CommonBeans { @Bean diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java index 173c20f7..30442c22 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java @@ -31,7 +31,7 @@ public abstract class GenericPathHandler { - protected final Logger log = LoggerFactory.getLogger(GenericPathHandler.class); + private final Logger log = LoggerFactory.getLogger(GenericPathHandler.class); private final HousekeepingPathRepository housekeepingPathRepository; private final PathCleaner pathCleaner; @@ -75,7 +75,7 @@ private boolean cleanUpPath(HousekeepingPath housekeepingPath) { return false; } - protected void cleanupContent(HousekeepingPath housekeepingPath) { + private void cleanupContent(HousekeepingPath housekeepingPath) { try { log.info("Cleaning up path \"{}\"", housekeepingPath.getPath()); if (cleanUpPath(housekeepingPath)) { @@ -89,13 +89,13 @@ protected void cleanupContent(HousekeepingPath housekeepingPath) { } } - protected void updateAttemptsAndStatus(HousekeepingPath housekeepingPath, HousekeepingStatus status) { + private void updateAttemptsAndStatus(HousekeepingPath housekeepingPath, HousekeepingStatus status) { housekeepingPath.setCleanupAttempts(housekeepingPath.getCleanupAttempts() + 1); housekeepingPath.setHousekeepingStatus(status); housekeepingPathRepository.save(housekeepingPath); } - protected void updateStatus(HousekeepingPath housekeepingPath, HousekeepingStatus status) { + private void updateStatus(HousekeepingPath housekeepingPath, HousekeepingStatus status) { housekeepingPath.setHousekeepingStatus(status); housekeepingPathRepository.save(housekeepingPath); } diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandler.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandler.java index 910fd38b..82d68b69 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandler.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandler.java @@ -24,53 +24,24 @@ import org.springframework.stereotype.Component; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; -import com.expediagroup.beekeeper.core.checker.IcebergTableChecker; import com.expediagroup.beekeeper.core.model.HousekeepingPath; -import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; @Component public class UnreferencedPathHandler extends GenericPathHandler { private final HousekeepingPathRepository housekeepingPathRepository; - private final IcebergTableChecker icebergTableChecker; @Autowired public UnreferencedPathHandler( HousekeepingPathRepository housekeepingPathRepository, - @Qualifier("s3PathCleaner") PathCleaner pathCleaner, - IcebergTableChecker icebergTableChecker) { + @Qualifier("s3PathCleaner") PathCleaner pathCleaner) { super(housekeepingPathRepository, pathCleaner); this.housekeepingPathRepository = housekeepingPathRepository; - this.icebergTableChecker = icebergTableChecker; } @Override public Slice findRecordsToClean(LocalDateTime instant, Pageable pageable) { return housekeepingPathRepository.findRecordsForCleanup(instant, pageable); } - - @Override - protected void cleanupContent(HousekeepingPath housekeepingPath) { // extends method from generic handler - String databaseName = housekeepingPath.getDatabaseName(); - String tableName = housekeepingPath.getTableName(); - - if (databaseName == null || tableName == null) { - super.cleanupContent(housekeepingPath); // if no table info delegate process to parent class - return; - } - - try { - if (icebergTableChecker.isIcebergTable(databaseName, tableName)) { - updateStatus(housekeepingPath, HousekeepingStatus.SKIPPED); - log.info("Skipped cleanup for Iceberg table: {}.{}", databaseName, tableName); - return; - } - - super.cleanupContent(housekeepingPath); // If not an Iceberg table, proceed with the default cleanup logic. is this ok?? - } catch (Exception e) { - updateAttemptsAndStatus(housekeepingPath, HousekeepingStatus.FAILED); // Mark the path as FAILED - log.warn("Failed to check if table {}.{} is Iceberg", databaseName, tableName, e); - } - } } diff --git a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandlerTest.java b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandlerTest.java index 27ebda6a..aa66d192 100644 --- a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandlerTest.java +++ b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandlerTest.java @@ -36,7 +36,6 @@ import org.springframework.data.domain.Pageable; import com.expediagroup.beekeeper.cleanup.aws.S3PathCleaner; -import com.expediagroup.beekeeper.core.checker.IcebergTableChecker; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; @@ -48,8 +47,6 @@ public class GenericPathHandlerTest { @Mock private S3PathCleaner pathCleaner; @Mock - private IcebergTableChecker icebergTableChecker; - @Mock private HousekeepingPath mockPath; @Mock private Pageable mockPageable; @@ -63,7 +60,7 @@ public class GenericPathHandlerTest { @BeforeEach public void initTest() { - handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); + handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); when(mockPath.getPath()).thenReturn(VALID_TABLE_PATH); } diff --git a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandlerTest.java b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandlerTest.java index 6703896e..c00e9ca6 100644 --- a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandlerTest.java +++ b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/handler/UnreferencedPathHandlerTest.java @@ -16,14 +16,10 @@ package com.expediagroup.beekeeper.path.cleanup.handler; import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; -import static org.mockito.Mockito.never; import static com.expediagroup.beekeeper.core.model.LifecycleEventType.UNREFERENCED; -import java.time.Duration; import java.time.LocalDateTime; import org.junit.jupiter.api.BeforeEach; @@ -35,11 +31,7 @@ import org.springframework.data.domain.Pageable; import com.expediagroup.beekeeper.cleanup.aws.S3PathCleaner; -import com.expediagroup.beekeeper.core.checker.IcebergTableChecker; -import com.expediagroup.beekeeper.core.model.HousekeepingPath; -import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.core.model.LifecycleEventType; -import com.expediagroup.beekeeper.core.model.PeriodDuration; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; @ExtendWith(MockitoExtension.class) @@ -49,15 +41,13 @@ public class UnreferencedPathHandlerTest { private HousekeepingPathRepository housekeepingPathRepository; @Mock private S3PathCleaner s3PathCleaner; - @Mock - private IcebergTableChecker icebergTableChecker; private LifecycleEventType lifecycleEventType = UNREFERENCED; private UnreferencedPathHandler handler; @BeforeEach public void initTest() { - handler = new UnreferencedPathHandler(housekeepingPathRepository, s3PathCleaner, icebergTableChecker); + handler = new UnreferencedPathHandler(housekeepingPathRepository, s3PathCleaner); } @Test @@ -77,118 +67,4 @@ public void verifyHousekeepingPathFetch() { handler.findRecordsToClean(now, emptyPageable); verify(housekeepingPathRepository).findRecordsForCleanup(now, emptyPageable); } - - @Test - public void cleanupContent_WithNullDatabaseNameAndTableName_ShouldDelegateToGenericPathHandler() { - HousekeepingPath pathWithNulls = createHousekeepingPath("s3://bucket/null_table", null, null); - - handler.cleanupContent(pathWithNulls); - - verify(s3PathCleaner).cleanupPath(pathWithNulls); - verify(housekeepingPathRepository).save(pathWithNulls); - // Assert that the status is set to DELETED by the superclass - assertThat(pathWithNulls.getHousekeepingStatus()).isEqualTo(HousekeepingStatus.DELETED); - } - - @Test - public void cleanupContent_IcebergTable_ShouldSkipCleanup() { - HousekeepingPath icebergPath = createHousekeepingPath("s3://bucket/iceberg_table", "database", "iceberg_table"); - - when(icebergTableChecker.isIcebergTable("database", "iceberg_table")).thenReturn(true); - - handler.cleanupContent(icebergPath); - // verify that pathCleaner is not called and cleanup is skipped for Iceberg tables - verify(s3PathCleaner, never()).cleanupPath(any(HousekeepingPath.class)); - verify(housekeepingPathRepository).save(icebergPath); - assertThat(icebergPath.getHousekeepingStatus()).isEqualTo(HousekeepingStatus.SKIPPED); - } - - @Test - public void cleanupContent_NonIcebergTable_ShouldProceedWithCleanup() { - HousekeepingPath nonIcebergPath = createHousekeepingPath("s3://bucket/non_iceberg_table", "database", "non_iceberg_table"); - - // Mock icebergTbleChcker to return false - when(icebergTableChecker.isIcebergTable("database", "non_iceberg_table")).thenReturn(false); - - handler.cleanupContent(nonIcebergPath); - - verify(s3PathCleaner).cleanupPath(nonIcebergPath); - verify(housekeepingPathRepository).save(nonIcebergPath); - assertThat(nonIcebergPath.getHousekeepingStatus()).isEqualTo(HousekeepingStatus.DELETED); - } - - @Test - public void cleanupContent_IcebergTableCheckThrowsException_ShouldSetStatusToFailed() { - HousekeepingPath errorPath = createHousekeepingPath("s3://bucket/error_table", "database", "error_table"); - - // Mock the IcebergTableChecker to throw an exception - when(icebergTableChecker.isIcebergTable("database", "error_table")) - .thenThrow(new RuntimeException("Iceberg check failed")); - - handler.cleanupContent(errorPath); - - verify(s3PathCleaner, never()).cleanupPath(any(HousekeepingPath.class)); - verify(housekeepingPathRepository).save(errorPath); - assertThat(errorPath.getHousekeepingStatus()).isEqualTo(HousekeepingStatus.FAILED); - assertThat(errorPath.getCleanupAttempts()).isEqualTo(1); - } - - @Test - public void cleanupContent_IcebergTable_ShouldNotIncrementCleanupAttempts() { - HousekeepingPath icebergPath = createHousekeepingPath("s3://bucket/iceberg_table", "database", "iceberg_table"); - - when(icebergTableChecker.isIcebergTable("database", "iceberg_table")).thenReturn(true); - - handler.cleanupContent(icebergPath); - - assertThat(icebergPath.getCleanupAttempts()).isEqualTo(0); - } - - @Test - public void cleanupContent_NonIcebergTable_ShouldIncrementCleanupAttempts() { - HousekeepingPath nonIcebergPath = createHousekeepingPath("s3://bucket/non_iceberg_table", "database", "non_iceberg_table"); - - when(icebergTableChecker.isIcebergTable("database", "non_iceberg_table")).thenReturn(false); - - handler.cleanupContent(nonIcebergPath); - - assertThat(nonIcebergPath.getCleanupAttempts()).isEqualTo(1); - } - - @Test - public void cleanupContent_MultiplePaths_ShouldHandleEachAccordingly() { - HousekeepingPath icebergPath = createHousekeepingPath("s3://bucket/iceberg_table", "database", "iceberg_table"); - HousekeepingPath nonIcebergPath = createHousekeepingPath("s3://bucket/non_iceberg_table", "database", "non_iceberg_table"); - - when(icebergTableChecker.isIcebergTable("database", "iceberg_table")).thenReturn(true); - when(icebergTableChecker.isIcebergTable("database", "non_iceberg_table")).thenReturn(false); - - handler.cleanupContent(icebergPath); - handler.cleanupContent(nonIcebergPath); - - // Iceberg Path: cleanup skipped - verify(s3PathCleaner, never()).cleanupPath(icebergPath); - verify(housekeepingPathRepository).save(icebergPath); - assertThat(icebergPath.getHousekeepingStatus()).isEqualTo(HousekeepingStatus.SKIPPED); - assertThat(icebergPath.getCleanupAttempts()).isEqualTo(0); - - // Non-Iceberg Path: cleanup proceeded - verify(s3PathCleaner).cleanupPath(nonIcebergPath); - verify(housekeepingPathRepository).save(nonIcebergPath); - assertThat(nonIcebergPath.getHousekeepingStatus()).isEqualTo(HousekeepingStatus.DELETED); - assertThat(nonIcebergPath.getCleanupAttempts()).isEqualTo(1); - } - - private HousekeepingPath createHousekeepingPath(String path, String databaseName, String tableName) { - return HousekeepingPath.builder() - .path(path) - .databaseName(databaseName) - .tableName(tableName) - .housekeepingStatus(HousekeepingStatus.SCHEDULED) - .creationTimestamp(LocalDateTime.now()) - .cleanupDelay(PeriodDuration.of(Duration.ofDays(3))) // Example: 3 days delay - .cleanupAttempts(0) - .lifecycleType(UNREFERENCED.toString()) - .build(); - } } From 1e339c9e8e9924d2d44fb9a42b52392b4c7071ca Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Wed, 20 Nov 2024 13:47:28 +0100 Subject: [PATCH 05/65] cleanup 2 --- .../cleanup/service/PagingCleanupServiceTest.java | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/service/PagingCleanupServiceTest.java b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/service/PagingCleanupServiceTest.java index 00279f22..c75b0d53 100644 --- a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/service/PagingCleanupServiceTest.java +++ b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/service/PagingCleanupServiceTest.java @@ -50,7 +50,6 @@ import com.google.common.collect.Lists; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; -import com.expediagroup.beekeeper.core.checker.IcebergTableChecker; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.core.model.PeriodDuration; @@ -75,10 +74,9 @@ public class PagingCleanupServiceTest { private @Autowired HousekeepingPathRepository housekeepingPathRepository; private @MockBean PathCleaner pathCleaner; - private @MockBean IcebergTableChecker icebergTableChecker; @Test public void typicalWithPaging() { - UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); + UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); pagingCleanupService = new PagingPathCleanupService(List.of(handler), 2, false); List paths = List.of("s3://bucket/some_foo", "s3://bucket/some_bar", "s3://bucket/some_foobar"); @@ -99,7 +97,7 @@ public void typicalWithPaging() { @Test public void mixOfScheduledAndFailedPaths() { - UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); + UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); pagingCleanupService = new PagingPathCleanupService(List.of(handler), 2, false); List paths = List .of(createEntityHousekeepingPath("s3://bucket/some_foo", SCHEDULED), @@ -115,7 +113,7 @@ public void mixOfScheduledAndFailedPaths() { @Test public void mixOfAllPaths() { - UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); + UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); pagingCleanupService = new PagingPathCleanupService(List.of(handler), 2, false); List paths = List .of(createEntityHousekeepingPath("s3://bucket/some_foo", SCHEDULED), @@ -132,7 +130,7 @@ public void mixOfAllPaths() { @Test void pathCleanerException() { - UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); + UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); pagingCleanupService = new PagingPathCleanupService(List.of(handler), 2, false); doThrow(new RuntimeException("Error")).doNothing().when(pathCleaner).cleanupPath(any(HousekeepingPath.class)); @@ -160,7 +158,7 @@ void pathCleanerException() { @Test @Timeout(value = 10) void doNotInfiniteLoopOnRepeatedFailures() { - UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); + UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); pagingCleanupService = new PagingPathCleanupService(List.of(handler), 1, false); List paths = List .of(createEntityHousekeepingPath("s3://bucket/some_foo", FAILED), @@ -188,7 +186,7 @@ void doNotInfiniteLoopOnRepeatedFailures() { @Test @Timeout(value = 10) void doNotInfiniteLoopOnDryRunCleanup() { - UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner, icebergTableChecker); + UnreferencedPathHandler handler = new UnreferencedPathHandler(housekeepingPathRepository, pathCleaner); pagingCleanupService = new PagingPathCleanupService(List.of(handler), 1, true); List paths = List .of(createEntityHousekeepingPath("s3://bucket/some_foo", SCHEDULED), From 62c68d533ff4e31b927ddd79d9b8d4f94feaec69 Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Wed, 20 Nov 2024 14:20:38 +0100 Subject: [PATCH 06/65] main business logic --- .../beekeeper/cleanup/aws/S3PathCleaner.java | 6 ++- .../beekeeper/cleanup/hive/HiveClient.java | 15 ++++++ .../cleanup/hive/HiveMetadataCleaner.java | 9 +++- .../cleanup/metadata/CleanerClient.java | 2 + .../cleanup/validation/IcebergValidator.java | 39 ++++++++++++++ beekeeper-core/pom.xml | 53 ------------------- .../core/checker/IcebergTableChecker.java | 40 -------------- beekeeper-scheduler-apiary/pom.xml | 12 +++-- .../apiary/service/SchedulerApiary.java | 10 ++-- 9 files changed, 85 insertions(+), 101 deletions(-) create mode 100644 beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java delete mode 100644 beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/checker/IcebergTableChecker.java diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java index 46e88657..b7b96050 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java @@ -29,6 +29,7 @@ import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; import com.expediagroup.beekeeper.cleanup.path.SentinelFilesCleaner; +import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.config.FileSystemType; import com.expediagroup.beekeeper.core.error.BeekeeperException; import com.expediagroup.beekeeper.core.model.HousekeepingEntity; @@ -41,17 +42,20 @@ public class S3PathCleaner implements PathCleaner { private final S3Client s3Client; private final SentinelFilesCleaner sentinelFilesCleaner; private final BytesDeletedReporter bytesDeletedReporter; + private IcebergValidator icebergValidator; public S3PathCleaner(S3Client s3Client, SentinelFilesCleaner sentinelFilesCleaner, - BytesDeletedReporter bytesDeletedReporter) { + BytesDeletedReporter bytesDeletedReporter, IcebergValidator icebergValidator) { this.s3Client = s3Client; this.sentinelFilesCleaner = sentinelFilesCleaner; this.bytesDeletedReporter = bytesDeletedReporter; + this.icebergValidator = icebergValidator; } @Override @TimedTaggable("s3-paths-deleted") public void cleanupPath(HousekeepingEntity housekeepingEntity) { + icebergValidator.throwExceptionIfIceberg(housekeepingEntity.getDatabaseName(), housekeepingEntity.getTableName()); S3SchemeURI s3SchemeURI = new S3SchemeURI(housekeepingEntity.getPath()); String key = s3SchemeURI.getKey(); String bucket = s3SchemeURI.getBucket(); diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveClient.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveClient.java index d1009a60..e7ec222d 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveClient.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveClient.java @@ -126,6 +126,21 @@ public Map getTableProperties(String databaseName, String tableN } } + @Override + public String getOutputFormat(String databaseName, String tableName) { + try { + Table table = client.getTable(databaseName, tableName); + if (table.getSd() != null) { + return table.getSd().getOutputFormat(); + } + throw new BeekeeperException( + "Storage descriptor properties were null for \"" + databaseName + "." + tableName + "."); + } catch (TException e) { + throw new BeekeeperException( + "Unexpected exception when getting output format for \"" + databaseName + "." + tableName + ".", e); + } + } + @Override public void close() { client.close(); diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleaner.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleaner.java index b4abf607..08db511c 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleaner.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleaner.java @@ -18,6 +18,7 @@ import com.expediagroup.beekeeper.cleanup.metadata.CleanerClient; import com.expediagroup.beekeeper.cleanup.metadata.MetadataCleaner; import com.expediagroup.beekeeper.cleanup.monitoring.DeletedMetadataReporter; +import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.config.MetadataType; import com.expediagroup.beekeeper.core.model.HousekeepingMetadata; import com.expediagroup.beekeeper.core.monitoring.TimedTaggable; @@ -25,14 +26,18 @@ public class HiveMetadataCleaner implements MetadataCleaner { private DeletedMetadataReporter deletedMetadataReporter; + private IcebergValidator icebergValidator; - public HiveMetadataCleaner(DeletedMetadataReporter deletedMetadataReporter) { + public HiveMetadataCleaner(DeletedMetadataReporter deletedMetadataReporter, IcebergValidator icebergValidator) { this.deletedMetadataReporter = deletedMetadataReporter; + this.icebergValidator = icebergValidator; } @Override @TimedTaggable("hive-table-deleted") public void dropTable(HousekeepingMetadata housekeepingMetadata, CleanerClient client) { + icebergValidator.throwExceptionIfIceberg(housekeepingMetadata.getDatabaseName(), + housekeepingMetadata.getTableName()); client.dropTable(housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName()); deletedMetadataReporter.reportTaggable(housekeepingMetadata, MetadataType.HIVE_TABLE); } @@ -40,6 +45,8 @@ public void dropTable(HousekeepingMetadata housekeepingMetadata, CleanerClient c @Override @TimedTaggable("hive-partition-deleted") public boolean dropPartition(HousekeepingMetadata housekeepingMetadata, CleanerClient client) { + icebergValidator.throwExceptionIfIceberg(housekeepingMetadata.getDatabaseName(), + housekeepingMetadata.getTableName()); boolean partitionDeleted = client .dropPartition(housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName(), housekeepingMetadata.getPartitionName()); diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/metadata/CleanerClient.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/metadata/CleanerClient.java index ee1a3a58..1946302b 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/metadata/CleanerClient.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/metadata/CleanerClient.java @@ -27,4 +27,6 @@ public interface CleanerClient extends Closeable { boolean tableExists(String databaseName, String tableName); Map getTableProperties(String databaseName, String tableName); + + String getOutputFormat(String databaseName, String tableName); } diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java new file mode 100644 index 00000000..86c12dac --- /dev/null +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java @@ -0,0 +1,39 @@ +package com.expediagroup.beekeeper.cleanup.validation; + +import java.util.Map; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.expediagroup.beekeeper.cleanup.metadata.CleanerClient; +import com.expediagroup.beekeeper.core.error.BeekeeperException; + +public class IcebergValidator { + + private static final Logger log = LoggerFactory.getLogger(IcebergValidator.class); + + private final CleanerClient client; + + public IcebergValidator(CleanerClient client) { + this.client = client; + } + + /** + * Beekeeper does not support Iceberg format right now. Iceberg tables in Hive Metastore do not store partition information, + * so Beekeeper tries to clean up the entire table because that information is missing. This method checks if + * the table is an Iceberg table and throw BeekeeperException to stop the process. + * + * @param databaseName + * @param tableName + */ + public void throwExceptionIfIceberg(String databaseName, String tableName) { + Map parameters = client.getTableProperties(databaseName, tableName); + String tableType = parameters.getOrDefault("table_type", "").toLowerCase(); + String format = parameters.getOrDefault("format", "").toLowerCase(); + String outputFormat = client.getOutputFormat(databaseName, tableName).toLowerCase(); + + if (tableType.contains("iceberg") || format.contains("iceberg") || outputFormat.contains("iceberg")) { + throw new BeekeeperException("Iceberg tables are not currently supported in Beekeeper"); + } + } +} diff --git a/beekeeper-core/pom.xml b/beekeeper-core/pom.xml index 73519fcc..58e3760d 100644 --- a/beekeeper-core/pom.xml +++ b/beekeeper-core/pom.xml @@ -10,11 +10,6 @@ beekeeper-core - - 2.3.7 - 1.4.2 - - ch.qos.logback @@ -87,54 +82,6 @@ spring-test test - - - - org.apache.hive - hive-metastore - ${hive.version} - - - org.apache.hbase - hbase-client - - - org.slf4j - slf4j-log4j12 - - - org.apache.logging.log4j - log4j-slf4j-impl - - - junit - junit - - - org.eclipse.jetty.aggregate - jetty-all - - - org.eclipse.jetty.orbit - javax.servlet - - - javax.servlet - servlet-api - - - - - com.hotels - hcommon-hive-metastore - ${hcommon-hive-metastore.version} - - - net.java.dev.jna - jna - - - diff --git a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/checker/IcebergTableChecker.java b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/checker/IcebergTableChecker.java deleted file mode 100644 index cf232973..00000000 --- a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/checker/IcebergTableChecker.java +++ /dev/null @@ -1,40 +0,0 @@ -package com.expediagroup.beekeeper.core.checker; - -import java.util.Map; -import java.util.function.Supplier; - -import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; -import org.apache.hadoop.hive.metastore.api.Table; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.expediagroup.beekeeper.core.error.BeekeeperException; -import com.hotels.hcommon.hive.metastore.client.api.CloseableMetaStoreClient; - -public class IcebergTableChecker { - - private static final Logger log = LoggerFactory.getLogger(IcebergTableChecker.class); - - private final Supplier metaStoreClientSupplier; - - public IcebergTableChecker(Supplier metaStoreClientSupplier) { - this.metaStoreClientSupplier = metaStoreClientSupplier; - } - - public void throwExceptionIfIceberg(String databaseName, String tableName) { - try (CloseableMetaStoreClient client = metaStoreClientSupplier.get()) { - Table table = client.getTable(databaseName, tableName); - - Map parameters = table.getParameters(); - String tableType = parameters.getOrDefault("table_type", "").toLowerCase(); - String format = parameters.getOrDefault("format", "").toLowerCase(); - String outputFormat = table.getSd().getOutputFormat().toLowerCase(); - - if (tableType.contains("iceberg") || format.contains("iceberg") || outputFormat.contains("iceberg")) { - throw new BeekeeperException("Iceberg tables are not currently supported in Beekeeper"); - } - } catch (Exception e) { - throw new BeekeeperException("Error checking if table is Iceberg", e); - } - } -} diff --git a/beekeeper-scheduler-apiary/pom.xml b/beekeeper-scheduler-apiary/pom.xml index bc492abf..e34c241d 100644 --- a/beekeeper-scheduler-apiary/pom.xml +++ b/beekeeper-scheduler-apiary/pom.xml @@ -1,5 +1,6 @@ - + 4.0.0 @@ -21,6 +22,11 @@ beekeeper-scheduler ${project.version} + + com.expediagroup + beekeeper-cleanup + ${project.version} + ch.qos.logback @@ -56,8 +62,8 @@ geronimo-jaspic_1.0_spec - org.eclipse.jetty - jetty-util + org.eclipse.jetty + jetty-util org.eclipse.jetty.aggregate diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index 4cc1e165..da6dc2f5 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -26,6 +26,7 @@ import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.Transactional; +import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.error.BeekeeperException; import com.expediagroup.beekeeper.core.model.HousekeepingEntity; import com.expediagroup.beekeeper.core.model.LifecycleEventType; @@ -38,25 +39,29 @@ public class SchedulerApiary { private final BeekeeperEventReader beekeeperEventReader; private final EnumMap schedulerServiceMap; + private final IcebergValidator icebergValidator; @Autowired public SchedulerApiary( BeekeeperEventReader beekeeperEventReader, - EnumMap schedulerServiceMap + EnumMap schedulerServiceMap, + IcebergValidator icebergValidator ) { this.beekeeperEventReader = beekeeperEventReader; this.schedulerServiceMap = schedulerServiceMap; + this.icebergValidator = icebergValidator; } @Transactional public void scheduleBeekeeperEvent() { Optional housekeepingEntitiesToBeScheduled = beekeeperEventReader.read(); - if (housekeepingEntitiesToBeScheduled.isEmpty()) { return; } + if (housekeepingEntitiesToBeScheduled.isEmpty()) {return;} BeekeeperEvent beekeeperEvent = housekeepingEntitiesToBeScheduled.get(); List housekeepingEntities = beekeeperEvent.getHousekeepingEntities(); for (HousekeepingEntity entity : housekeepingEntities) { try { + icebergValidator.throwExceptionIfIceberg(entity.getDatabaseName(), entity.getTableName()); LifecycleEventType eventType = LifecycleEventType.valueOf(entity.getLifecycleType()); SchedulerService scheduler = schedulerServiceMap.get(eventType); scheduler.scheduleForHousekeeping(entity); @@ -67,7 +72,6 @@ public void scheduleBeekeeperEvent() { e); } } - beekeeperEventReader.delete(beekeeperEvent); } From b6c718f6ce180a8e4175d87e15f82180a882ea10 Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Wed, 20 Nov 2024 14:42:59 +0100 Subject: [PATCH 07/65] adding exception --- .../cleanup/validation/IcebergValidator.java | 6 ++-- .../core/error/BeekeeperIcebergException.java | 33 +++++++++++++++++++ .../apiary/service/SchedulerApiary.java | 8 +++++ 3 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/error/BeekeeperIcebergException.java diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java index 86c12dac..0e9c4a35 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java @@ -6,7 +6,7 @@ import org.slf4j.LoggerFactory; import com.expediagroup.beekeeper.cleanup.metadata.CleanerClient; -import com.expediagroup.beekeeper.core.error.BeekeeperException; +import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; public class IcebergValidator { @@ -21,7 +21,7 @@ public IcebergValidator(CleanerClient client) { /** * Beekeeper does not support Iceberg format right now. Iceberg tables in Hive Metastore do not store partition information, * so Beekeeper tries to clean up the entire table because that information is missing. This method checks if - * the table is an Iceberg table and throw BeekeeperException to stop the process. + * the table is an Iceberg table and throw IcebergTableFoundException to stop the process. * * @param databaseName * @param tableName @@ -33,7 +33,7 @@ public void throwExceptionIfIceberg(String databaseName, String tableName) { String outputFormat = client.getOutputFormat(databaseName, tableName).toLowerCase(); if (tableType.contains("iceberg") || format.contains("iceberg") || outputFormat.contains("iceberg")) { - throw new BeekeeperException("Iceberg tables are not currently supported in Beekeeper"); + throw new BeekeeperIcebergException("Iceberg tables are not currently supported in Beekeeper"); } } } diff --git a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/error/BeekeeperIcebergException.java b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/error/BeekeeperIcebergException.java new file mode 100644 index 00000000..79b02bf1 --- /dev/null +++ b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/error/BeekeeperIcebergException.java @@ -0,0 +1,33 @@ +/** + * Copyright (C) 2019 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.expediagroup.beekeeper.core.error; + +public class BeekeeperIcebergException extends BeekeeperException { + + private static final long serialVersionUID = 1L; + + public BeekeeperIcebergException(String message, Exception e) { + super(message, e); + } + + public BeekeeperIcebergException(String message, Throwable e) { + super(message, e); + } + + public BeekeeperIcebergException(String message) { + super(message); + } +} diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index da6dc2f5..f6bbc707 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -22,12 +22,15 @@ import java.util.List; import java.util.Optional; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.Transactional; import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.error.BeekeeperException; +import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; import com.expediagroup.beekeeper.core.model.HousekeepingEntity; import com.expediagroup.beekeeper.core.model.LifecycleEventType; import com.expediagroup.beekeeper.scheduler.apiary.messaging.BeekeeperEventReader; @@ -37,6 +40,8 @@ @Component public class SchedulerApiary { + private static final Logger log = LoggerFactory.getLogger(SchedulerApiary.class); + private final BeekeeperEventReader beekeeperEventReader; private final EnumMap schedulerServiceMap; private final IcebergValidator icebergValidator; @@ -65,6 +70,9 @@ public void scheduleBeekeeperEvent() { LifecycleEventType eventType = LifecycleEventType.valueOf(entity.getLifecycleType()); SchedulerService scheduler = schedulerServiceMap.get(eventType); scheduler.scheduleForHousekeeping(entity); + } catch (BeekeeperIcebergException e) { + log.warn("Iceberg table are not supported in Beekeeper. Deleting message from queue", e); + beekeeperEventReader.delete(beekeeperEvent); } catch (Exception e) { throw new BeekeeperException(format( "Unable to schedule %s deletion for entity, this message will go back on the queue", From c5ad34353071cdc658bc7ced71099e52f1d959dd Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 20 Nov 2024 14:50:09 +0000 Subject: [PATCH 08/65] Add DB & table name to exception message --- .../cleanup/validation/IcebergValidator.java | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java index 0e9c4a35..e4c3306a 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java @@ -1,3 +1,18 @@ +/** + * Copyright (C) 2019-2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package com.expediagroup.beekeeper.cleanup.validation; import java.util.Map; @@ -21,7 +36,7 @@ public IcebergValidator(CleanerClient client) { /** * Beekeeper does not support Iceberg format right now. Iceberg tables in Hive Metastore do not store partition information, * so Beekeeper tries to clean up the entire table because that information is missing. This method checks if - * the table is an Iceberg table and throw IcebergTableFoundException to stop the process. + * the table is an Iceberg table and throws BeekeeperIcebergException to stop the process. * * @param databaseName * @param tableName @@ -33,7 +48,8 @@ public void throwExceptionIfIceberg(String databaseName, String tableName) { String outputFormat = client.getOutputFormat(databaseName, tableName).toLowerCase(); if (tableType.contains("iceberg") || format.contains("iceberg") || outputFormat.contains("iceberg")) { - throw new BeekeeperIcebergException("Iceberg tables are not currently supported in Beekeeper"); + String errorMessage = String.format("Iceberg tables are not currently supported in Beekeeper. Detected in Database: '%s', Table: '%s'.", databaseName, tableName); + throw new BeekeeperIcebergException(errorMessage); } } } From fd454a8b44efc84cf6dcc06eda9b02986912da7a Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 20 Nov 2024 14:51:55 +0000 Subject: [PATCH 09/65] Update IcebergValidator.java --- .../beekeeper/cleanup/validation/IcebergValidator.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java index e4c3306a..e9e97911 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java @@ -36,7 +36,7 @@ public IcebergValidator(CleanerClient client) { /** * Beekeeper does not support Iceberg format right now. Iceberg tables in Hive Metastore do not store partition information, * so Beekeeper tries to clean up the entire table because that information is missing. This method checks if - * the table is an Iceberg table and throws BeekeeperIcebergException to stop the process. + * the table is an Iceberg table and throws IcebergTableFoundException to stop the process. * * @param databaseName * @param tableName From e41ac6ce7dfd403d7885bcfe2364eec5a5210a88 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 20 Nov 2024 15:06:02 +0000 Subject: [PATCH 10/65] Create IcebergValidatorTest.java --- .../validation/IcebergValidatorTest.java | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java new file mode 100644 index 00000000..bb588d64 --- /dev/null +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java @@ -0,0 +1,85 @@ +/** + * Copyright (C) 2019-2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.expediagroup.beekeeper.cleanup.validation; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.util.HashMap; +import java.util.Map; + +import org.junit.Before; +import org.junit.Test; + +import com.expediagroup.beekeeper.cleanup.metadata.CleanerClient; +import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; + +public class IcebergValidatorTest { + + private CleanerClient cleanerClient; + private IcebergValidator icebergValidator; + + @Before + public void setUp() { + cleanerClient = mock(CleanerClient.class); + icebergValidator = new IcebergValidator(cleanerClient); + } + + @Test(expected = BeekeeperIcebergException.class) + public void shouldThrowExceptionWhenTableTypeIsIceberg() { + Map properties = new HashMap<>(); + properties.put("table_type", "ICEBERG"); + + when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties); + when(cleanerClient.getOutputFormat("db", "table")).thenReturn(""); + + icebergValidator.throwExceptionIfIceberg("db", "table"); + } + + @Test(expected = BeekeeperIcebergException.class) + public void shouldThrowExceptionWhenFormatIsIceberg() { + Map properties = new HashMap<>(); + properties.put("format", "iceberg"); + + when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties); + when(cleanerClient.getOutputFormat("db", "table")).thenReturn(""); + + icebergValidator.throwExceptionIfIceberg("db", "table"); + } + + @Test + public void shouldNotThrowExceptionForNonIcebergTable() { + Map properties = new HashMap<>(); + properties.put("table_type", "HIVE_TABLE"); + + when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties); + when(cleanerClient.getOutputFormat("db", "table")) + .thenReturn("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"); + + icebergValidator.throwExceptionIfIceberg("db", "table"); + } + + @Test(expected = BeekeeperIcebergException.class) + public void shouldThrowExceptionWhenOutputFormatContainsIceberg() { + Map properties = new HashMap<>(); + + when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties); + when(cleanerClient.getOutputFormat("db", "table")) + .thenReturn("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); + + icebergValidator.throwExceptionIfIceberg("db", "table"); + } +} From a2939d52a35c02320de47dd18d1b6d04f958da50 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 20 Nov 2024 15:11:43 +0000 Subject: [PATCH 11/65] Update HiveMetadataCleanerTest.java --- .../cleanup/hive/HiveMetadataCleanerTest.java | 49 ++++++++++++++++++- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java index bf230190..3ed5aa8b 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2021 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,8 @@ */ package com.expediagroup.beekeeper.cleanup.hive; +import static org.junit.Assert.assertThrows; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -26,7 +28,9 @@ import org.mockito.junit.jupiter.MockitoExtension; import com.expediagroup.beekeeper.cleanup.monitoring.DeletedMetadataReporter; +import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.config.MetadataType; +import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; import com.expediagroup.beekeeper.core.model.HousekeepingMetadata; @ExtendWith(MockitoExtension.class) @@ -35,6 +39,7 @@ public class HiveMetadataCleanerTest { private @Mock HousekeepingMetadata housekeepingMetadata; private @Mock DeletedMetadataReporter deletedMetadataReporter; private @Mock HiveClient hiveClient; + private @Mock IcebergValidator icebergValidator; private HiveMetadataCleaner cleaner; private static final String DATABASE = "database"; @@ -43,14 +48,18 @@ public class HiveMetadataCleanerTest { @BeforeEach public void init() { - cleaner = new HiveMetadataCleaner(deletedMetadataReporter); + cleaner = new HiveMetadataCleaner(deletedMetadataReporter, icebergValidator); } @Test public void typicalDropTable() { when(housekeepingMetadata.getDatabaseName()).thenReturn(DATABASE); when(housekeepingMetadata.getTableName()).thenReturn(TABLE_NAME); + cleaner.dropTable(housekeepingMetadata, hiveClient); + // Verify that the IcebergValidator is called + verify(icebergValidator).throwExceptionIfIceberg(DATABASE, TABLE_NAME); + verify(hiveClient).dropTable(DATABASE, TABLE_NAME); verify(deletedMetadataReporter).reportTaggable(housekeepingMetadata, MetadataType.HIVE_TABLE); } @@ -62,6 +71,9 @@ public void typicalDropPartition() { when(hiveClient.dropPartition(DATABASE, TABLE_NAME, PARTITION_NAME)).thenReturn(true); cleaner.dropPartition(housekeepingMetadata, hiveClient); + + verify(icebergValidator).throwExceptionIfIceberg(DATABASE, TABLE_NAME); + verify(hiveClient).dropPartition(DATABASE, TABLE_NAME, PARTITION_NAME); verify(deletedMetadataReporter).reportTaggable(housekeepingMetadata, MetadataType.HIVE_PARTITION); } @@ -81,4 +93,37 @@ public void tableExists() { cleaner.tableExists(hiveClient, DATABASE, TABLE_NAME); verify(hiveClient).tableExists(DATABASE, TABLE_NAME); } + + @Test + public void dropTableWhenIcebergTable() { + when(housekeepingMetadata.getDatabaseName()).thenReturn(DATABASE); + when(housekeepingMetadata.getTableName()).thenReturn(TABLE_NAME); + doThrow(new BeekeeperIcebergException("Iceberg table")) + .when(icebergValidator).throwExceptionIfIceberg(DATABASE, TABLE_NAME); + + assertThrows( + BeekeeperIcebergException.class, + () -> cleaner.dropTable(housekeepingMetadata, hiveClient) + ); + + // Verify that dropTable was not called on hiveClient + verify(hiveClient, never()).dropTable(DATABASE, TABLE_NAME); + verify(deletedMetadataReporter, never()).reportTaggable(housekeepingMetadata, MetadataType.HIVE_TABLE); + } + + @Test + public void dropPartitionWhenIcebergTable() { + when(housekeepingMetadata.getDatabaseName()).thenReturn(DATABASE); + when(housekeepingMetadata.getTableName()).thenReturn(TABLE_NAME); + doThrow(new BeekeeperIcebergException("Iceberg table")) + .when(icebergValidator).throwExceptionIfIceberg(DATABASE, TABLE_NAME); + + assertThrows( + BeekeeperIcebergException.class, + () -> cleaner.dropPartition(housekeepingMetadata, hiveClient) + ); + + verify(hiveClient, never()).dropPartition(DATABASE, TABLE_NAME, PARTITION_NAME); + verify(deletedMetadataReporter, never()).reportTaggable(housekeepingMetadata, MetadataType.HIVE_PARTITION); + } } From 223e086f2ea6291a9e41dd0f4b7308e24ee08489 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 20 Nov 2024 16:44:31 +0000 Subject: [PATCH 12/65] Updating and adding S3PathCleaner tests Integrated LocalStack with containers. Rule is from Junit 4 --- .../cleanup/aws/S3DryRunPathCleanerTest.java | 16 ++--- .../cleanup/aws/S3PathCleanerTest.java | 71 +++++++++++++++---- 2 files changed, 67 insertions(+), 20 deletions(-) diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java index 42adb416..583e0018 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2023 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,13 +23,13 @@ import java.time.LocalDateTime; import org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider; -import org.junit.Rule; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.testcontainers.containers.localstack.LocalStackContainer; +import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; import org.testcontainers.utility.DockerImageName; @@ -38,6 +38,7 @@ import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; +import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.PeriodDuration; @@ -58,20 +59,19 @@ class S3DryRunPathCleanerTest { private HousekeepingPath housekeepingPath; private AmazonS3 amazonS3; private @Mock BytesDeletedReporter bytesDeletedReporter; + @Mock private IcebergValidator icebergValidator; + private boolean dryRunEnabled = true; private S3PathCleaner s3DryRunPathCleaner; - @Rule + @Container public static LocalStackContainer awsContainer = new LocalStackContainer( DockerImageName.parse("localstack/localstack:0.14.2")).withServices(S3); - static { - awsContainer.start(); - } - public static String S3_ENDPOINT = awsContainer.getEndpointConfiguration(S3).getServiceEndpoint(); @BeforeEach void setUp() { + String S3_ENDPOINT = awsContainer.getEndpointConfiguration(S3).getServiceEndpoint(); amazonS3 = AmazonS3ClientBuilder .standard() .withCredentials(new BasicAWSCredentialsProvider("accesskey", "secretkey")) @@ -83,7 +83,7 @@ void setUp() { .getObjectSummaries() .forEach(object -> amazonS3.deleteObject(bucket, object.getKey())); S3Client s3Client = new S3Client(amazonS3, dryRunEnabled); - s3DryRunPathCleaner = new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter); + s3DryRunPathCleaner = new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter, icebergValidator); housekeepingPath = HousekeepingPath .builder() .path(absolutePath) diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java index 7b00bcb6..7a046c98 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java @@ -21,8 +21,10 @@ import static org.assertj.core.api.Assertions.assertThatCode; import static org.assertj.core.api.Assertions.assertThatExceptionOfType; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; @@ -33,13 +35,13 @@ import java.util.List; import org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider; -import org.junit.Rule; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.testcontainers.containers.localstack.LocalStackContainer; +import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; import org.testcontainers.utility.DockerImageName; @@ -55,8 +57,10 @@ import com.amazonaws.services.s3.model.S3ObjectSummary; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; +import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.config.FileSystemType; import com.expediagroup.beekeeper.core.error.BeekeeperException; +import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.PeriodDuration; @@ -78,19 +82,16 @@ class S3PathCleanerTest { private S3Client s3Client; private S3SentinelFilesCleaner s3SentinelFilesCleaner; private @Mock BytesDeletedReporter bytesDeletedReporter; - + private @Mock IcebergValidator icebergValidator; private S3PathCleaner s3PathCleaner; - @Rule + @Container public static LocalStackContainer awsContainer = new LocalStackContainer( DockerImageName.parse("localstack/localstack:0.14.2")).withServices(S3); - static { - awsContainer.start(); - } - public static String S3_ENDPOINT = awsContainer.getEndpointConfiguration(S3).getServiceEndpoint(); @BeforeEach void setUp() { + String S3_ENDPOINT = awsContainer.getEndpointConfiguration(S3).getServiceEndpoint(); amazonS3 = AmazonS3ClientBuilder .standard() .withCredentials(new BasicAWSCredentialsProvider("accesskey", "secretkey")) @@ -104,7 +105,7 @@ void setUp() { boolean dryRunEnabled = false; s3Client = new S3Client(amazonS3, dryRunEnabled); s3SentinelFilesCleaner = new S3SentinelFilesCleaner(s3Client); - s3PathCleaner = new S3PathCleaner(s3Client, s3SentinelFilesCleaner, bytesDeletedReporter); + s3PathCleaner = new S3PathCleaner(s3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); String tableName = "table"; String databaseName = "database"; housekeepingPath = HousekeepingPath @@ -257,7 +258,7 @@ void sentinelFilesCleanerThrowsException() { amazonS3.putObject(bucket, key1, content); - s3PathCleaner = new S3PathCleaner(s3Client, s3SentinelFilesCleaner, bytesDeletedReporter); + s3PathCleaner = new S3PathCleaner(s3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); assertThatCode(() -> s3PathCleaner.cleanupPath(housekeepingPath)).doesNotThrowAnyException(); assertThat(amazonS3.doesObjectExist(bucket, key1)).isFalse(); } @@ -322,7 +323,7 @@ void sentinelFilesForParentsAndPathWithTrailingSlash() { @Test void noBytesDeletedMetricWhenFileDeletionFails() { S3Client mockS3Client = mock(S3Client.class); - s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter); + s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); when(mockS3Client.doesObjectExist(bucket, key1)).thenReturn(true); ObjectMetadata objectMetadata = new ObjectMetadata(); objectMetadata.setContentLength(10); @@ -338,7 +339,7 @@ void noBytesDeletedMetricWhenFileDeletionFails() { @Test void noBytesDeletedMetricWhenDirectoryDeletionFails() { S3Client mockS3Client = mock(S3Client.class); - s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter); + s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); doThrow(AmazonServiceException.class).when(mockS3Client).listObjects(bucket, keyRootAsDirectory); assertThatExceptionOfType(AmazonServiceException.class) @@ -351,7 +352,7 @@ void reportBytesDeletedWhenDirectoryDeletionPartiallyFails() { AmazonS3 mockAmazonS3 = mock(AmazonS3.class); S3Client mockS3Client = new S3Client(mockAmazonS3, false); mockOneOutOfTwoObjectsDeleted(mockAmazonS3); - s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter); + s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); assertThatExceptionOfType(BeekeeperException.class) .isThrownBy(() -> s3PathCleaner.cleanupPath(housekeepingPath)) .withMessage(format("Not all files could be deleted at path \"%s/%s\"; deleted 1/2 objects. " @@ -368,6 +369,52 @@ void extractingURIFails() { .withMessage(format("'%s' is not an S3 path.", path)); } + @Test + void shouldThrowBeekeeperIcebergExceptionWhenIcebergTableDetected() { + doThrow(new BeekeeperIcebergException("Iceberg tables are not supported")) + .when(icebergValidator) + .throwExceptionIfIceberg(housekeepingPath.getDatabaseName(), housekeepingPath.getTableName()); + + assertThatExceptionOfType(BeekeeperIcebergException.class) + .isThrownBy(() -> s3PathCleaner.cleanupPath(housekeepingPath)) + .withMessage("Iceberg tables are not supported"); + + verify(icebergValidator).throwExceptionIfIceberg(housekeepingPath.getDatabaseName(), housekeepingPath.getTableName()); + verifyNoInteractions(bytesDeletedReporter); + } + + @Test + void shouldNotReportBytesDeletedWhenIcebergValidatorThrows() { + doThrow(new BeekeeperIcebergException("Iceberg tables are not supported")) + .when(icebergValidator) + .throwExceptionIfIceberg(housekeepingPath.getDatabaseName(), housekeepingPath.getTableName()); + + assertThatExceptionOfType(BeekeeperIcebergException.class) + .isThrownBy(() -> s3PathCleaner.cleanupPath(housekeepingPath)); + + verify(bytesDeletedReporter, never()).reportTaggable(anyLong(), any(), any()); + } + + @Test + void shouldProceedWithDeletionWhenNotIcebergTable() { + // setting up objects in the bucket + amazonS3.putObject(bucket, key1, content); // Add the files + amazonS3.putObject(bucket, key2, content); + + // housekeepingPath is set + housekeepingPath.setPath("s3://" + bucket + "/" + keyRoot); + + assertThatCode(() -> s3PathCleaner.cleanupPath(housekeepingPath)) + .doesNotThrowAnyException(); + + // verify objects are deleted and reporter is called + assertThat(amazonS3.doesObjectExist(bucket, key1)).isFalse(); + assertThat(amazonS3.doesObjectExist(bucket, key2)).isFalse(); + + long expectedBytesDeleted = content.getBytes().length * 2L; // 11 bytes('some content') * 2 = 22 bytes + verify(bytesDeletedReporter).reportTaggable(expectedBytesDeleted, housekeepingPath, FileSystemType.S3); + } + private void mockOneOutOfTwoObjectsDeleted(AmazonS3 mockAmazonS3) { S3ObjectSummary s3ObjectSummary = new S3ObjectSummary(); s3ObjectSummary.setBucketName(bucket); From 1f6e360ac04a32473e957a9d0bd0f1f5155cf072 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 20 Nov 2024 16:52:36 +0000 Subject: [PATCH 13/65] Adding IcebergValidator to constructors --- .../metadata/cleanup/context/CommonBeans.java | 11 +++++++---- .../metadata/cleanup/context/CommonBeansTest.java | 6 ++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java index 353adfd9..6526f248 100644 --- a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java +++ b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java @@ -48,6 +48,7 @@ import com.expediagroup.beekeeper.cleanup.service.CleanupService; import com.expediagroup.beekeeper.cleanup.service.DisableTablesService; import com.expediagroup.beekeeper.cleanup.service.RepositoryCleanupService; +import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.repository.HousekeepingMetadataRepository; import com.expediagroup.beekeeper.metadata.cleanup.handler.ExpiredMetadataHandler; import com.expediagroup.beekeeper.metadata.cleanup.handler.MetadataHandler; @@ -102,8 +103,9 @@ public DeletedMetadataReporter deletedMetadataReporter( @Bean(name = "hiveTableCleaner") MetadataCleaner metadataCleaner( - DeletedMetadataReporter deletedMetadataReporter) { - return new HiveMetadataCleaner(deletedMetadataReporter); + DeletedMetadataReporter deletedMetadataReporter, + IcebergValidator icebergValidator) { + return new HiveMetadataCleaner(deletedMetadataReporter, icebergValidator); } @Bean @@ -139,8 +141,9 @@ public S3Client s3Client(AmazonS3 amazonS3, @Value("${properties.dry-run-enabled @Bean(name = "s3PathCleaner") PathCleaner pathCleaner( S3Client s3Client, - BytesDeletedReporter bytesDeletedReporter) { - return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter); + BytesDeletedReporter bytesDeletedReporter, + IcebergValidator icebergValidator) { + return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter, icebergValidator); } @Bean(name = "expiredMetadataHandler") diff --git a/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeansTest.java b/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeansTest.java index 36085496..83dbc389 100644 --- a/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeansTest.java +++ b/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeansTest.java @@ -49,6 +49,7 @@ import com.expediagroup.beekeeper.cleanup.service.CleanupService; import com.expediagroup.beekeeper.cleanup.service.DisableTablesService; import com.expediagroup.beekeeper.cleanup.service.RepositoryCleanupService; +import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.repository.HousekeepingMetadataRepository; import com.expediagroup.beekeeper.metadata.cleanup.handler.ExpiredMetadataHandler; import com.expediagroup.beekeeper.metadata.cleanup.service.MetadataDisableTablesService; @@ -76,6 +77,7 @@ public class CommonBeansTest { private @Mock PathCleaner pathCleaner; private @Mock MeterRegistry meterRegistry; private @Mock HiveClientFactory hiveClientFactory; + private @Mock IcebergValidator icebergValidator; @BeforeEach public void awsSetUp() { @@ -122,7 +124,7 @@ public void verifyHiveClient() { @Test public void verifyHiveMetadataCleaner() { DeletedMetadataReporter reporter = commonBeans.deletedMetadataReporter(meterRegistry, false); - MetadataCleaner metadataCleaner = commonBeans.metadataCleaner(reporter); + MetadataCleaner metadataCleaner = commonBeans.metadataCleaner(reporter, icebergValidator); assertThat(metadataCleaner).isInstanceOf(HiveMetadataCleaner.class); } @@ -152,7 +154,7 @@ public void verifyS3Client() { void verifyS3pathCleaner() { BytesDeletedReporter reporter = commonBeans.bytesDeletedReporter(meterRegistry, false); S3Client s3Client = commonBeans.s3Client(commonBeans.amazonS3(), false); - PathCleaner pathCleaner = commonBeans.pathCleaner(s3Client, reporter); + PathCleaner pathCleaner = commonBeans.pathCleaner(s3Client, reporter, icebergValidator); assertThat(pathCleaner).isInstanceOf(S3PathCleaner.class); } From 1905b0ae9831831220356b44d0ec69875a61c1c7 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 20 Nov 2024 16:57:34 +0000 Subject: [PATCH 14/65] Updating Junit imports --- .../generator/ExpiredHousekeepingMetadataGeneratorTest.java | 3 ++- .../generator/UnreferencedHousekeepingPathGeneratorTest.java | 2 +- .../scheduler/apiary/handler/MessageEventHandlerTest.java | 2 +- .../scheduler/apiary/service/SchedulerApiaryTest.java | 4 +++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/generator/ExpiredHousekeepingMetadataGeneratorTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/generator/ExpiredHousekeepingMetadataGeneratorTest.java index 7d753fa5..189ab938 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/generator/ExpiredHousekeepingMetadataGeneratorTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/generator/ExpiredHousekeepingMetadataGeneratorTest.java @@ -18,7 +18,7 @@ import static java.lang.String.format; import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.fail; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -33,6 +33,7 @@ import java.util.List; import java.util.Map; + import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/generator/UnreferencedHousekeepingPathGeneratorTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/generator/UnreferencedHousekeepingPathGeneratorTest.java index f74bc4c7..c8dc6cd3 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/generator/UnreferencedHousekeepingPathGeneratorTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/generator/UnreferencedHousekeepingPathGeneratorTest.java @@ -18,7 +18,7 @@ import static java.lang.String.format; import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.fail; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/handler/MessageEventHandlerTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/handler/MessageEventHandlerTest.java index 3f1cdd30..9a7f2a92 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/handler/MessageEventHandlerTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/handler/MessageEventHandlerTest.java @@ -16,7 +16,6 @@ package com.expediagroup.beekeeper.scheduler.apiary.handler; import static org.assertj.core.api.Assertions.assertThat; -import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -24,6 +23,7 @@ import java.util.List; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java index 31f48702..a1853225 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java @@ -42,6 +42,7 @@ import com.expedia.apiary.extensions.receiver.common.messaging.MessageEvent; +import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.error.BeekeeperException; import com.expediagroup.beekeeper.core.model.HousekeepingEntity; import com.expediagroup.beekeeper.core.model.HousekeepingMetadata; @@ -61,6 +62,7 @@ public class SchedulerApiaryTest { @Mock private BeekeeperEventReader beekeeperEventReader; @Mock private HousekeepingPath path; @Mock private HousekeepingMetadata table; + @Mock private IcebergValidator icebergValidator; private SchedulerApiary scheduler; @@ -69,7 +71,7 @@ public void init() { EnumMap schedulerMap = new EnumMap<>(LifecycleEventType.class); schedulerMap.put(UNREFERENCED, pathSchedulerService); schedulerMap.put(EXPIRED, tableSchedulerService); - scheduler = new SchedulerApiary(beekeeperEventReader, schedulerMap); + scheduler = new SchedulerApiary(beekeeperEventReader, schedulerMap, icebergValidator); } @Test From efca2c93260bdcfa269ff5c330be482a2792d30c Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 20 Nov 2024 17:35:59 +0000 Subject: [PATCH 15/65] Update SchedulerApiaryTest.java --- .../apiary/service/SchedulerApiaryTest.java | 41 +++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java index a1853225..7551d627 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2020 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,11 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.fail; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.lenient; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoInteractions; @@ -37,13 +41,13 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; -import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; import com.expedia.apiary.extensions.receiver.common.messaging.MessageEvent; import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.error.BeekeeperException; +import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; import com.expediagroup.beekeeper.core.model.HousekeepingEntity; import com.expediagroup.beekeeper.core.model.HousekeepingMetadata; import com.expediagroup.beekeeper.core.model.HousekeepingPath; @@ -79,6 +83,7 @@ public void typicalPathSchedule() { Optional event = Optional.of(newHousekeepingEvent(path, UNREFERENCED)); when(beekeeperEventReader.read()).thenReturn(event); scheduler.scheduleBeekeeperEvent(); + verify(icebergValidator).throwExceptionIfIceberg(path.getDatabaseName(), path.getTableName()); verify(pathSchedulerService).scheduleForHousekeeping(path); verifyNoInteractions(tableSchedulerService); verify(beekeeperEventReader).delete(event.get()); @@ -89,6 +94,8 @@ public void typicalTableSchedule() { Optional event = Optional.of(newHousekeepingEvent(table, EXPIRED)); when(beekeeperEventReader.read()).thenReturn(event); scheduler.scheduleBeekeeperEvent(); + + verify(icebergValidator).throwExceptionIfIceberg(table.getDatabaseName(), table.getTableName()); verify(tableSchedulerService).scheduleForHousekeeping(table); verifyNoInteractions(pathSchedulerService); verify(beekeeperEventReader).delete(event.get()); @@ -98,6 +105,8 @@ public void typicalTableSchedule() { public void typicalNoSchedule() { when(beekeeperEventReader.read()).thenReturn(Optional.empty()); scheduler.scheduleBeekeeperEvent(); + + verifyNoInteractions(icebergValidator); verifyNoInteractions(pathSchedulerService); verifyNoInteractions(tableSchedulerService); verify(beekeeperEventReader, times(0)).delete(any()); @@ -113,6 +122,7 @@ public void housekeepingPathRepositoryThrowsException() { scheduler.scheduleBeekeeperEvent(); fail("Should have thrown exception"); } catch (Exception e) { + verify(icebergValidator).throwExceptionIfIceberg(path.getDatabaseName(), path.getTableName()); verify(pathSchedulerService).scheduleForHousekeeping(path); verify(beekeeperEventReader, times(0)).delete(any()); verifyNoInteractions(tableSchedulerService); @@ -133,6 +143,7 @@ public void housekeepingTableRepositoryThrowsException() { scheduler.scheduleBeekeeperEvent(); fail("Should have thrown exception"); } catch (Exception e) { + verify(icebergValidator).throwExceptionIfIceberg(table.getDatabaseName(), table.getTableName()); verify(tableSchedulerService).scheduleForHousekeeping(table); verify(beekeeperEventReader, times(0)).delete(any()); verifyNoInteractions(pathSchedulerService); @@ -143,6 +154,26 @@ public void housekeepingTableRepositoryThrowsException() { } } + @Test + public void icebergValidatorThrowsException() { + String databaseName = "database"; + String tableName = "table"; + when(path.getDatabaseName()).thenReturn(databaseName); + when(path.getTableName()).thenReturn(tableName); + Optional event = Optional.of(newHousekeepingEvent(path, UNREFERENCED)); + when(beekeeperEventReader.read()).thenReturn(event); + + doThrow(new BeekeeperIcebergException("Iceberg table")) + .when(icebergValidator).throwExceptionIfIceberg(eq(databaseName), eq(tableName)); + + scheduler.scheduleBeekeeperEvent(); + + verify(icebergValidator).throwExceptionIfIceberg(databaseName, tableName); + verifyNoInteractions(pathSchedulerService); + verifyNoInteractions(tableSchedulerService); + verify(beekeeperEventReader, times(2)).delete(event.get()); + } + @Test public void typicalClose() throws Exception { scheduler.close(); @@ -151,7 +182,9 @@ public void typicalClose() throws Exception { private BeekeeperEvent newHousekeepingEvent(HousekeepingEntity housekeepingEntity, LifecycleEventType lifecycleEventType) { - when(housekeepingEntity.getLifecycleType()).thenReturn(lifecycleEventType.name()); - return new BeekeeperEvent(List.of(housekeepingEntity), Mockito.mock(MessageEvent.class)); + lenient().when(housekeepingEntity.getLifecycleType()).thenReturn(lifecycleEventType.name()); + when(housekeepingEntity.getDatabaseName()).thenReturn("database"); + when(housekeepingEntity.getTableName()).thenReturn("table"); + return new BeekeeperEvent(List.of(housekeepingEntity), mock(MessageEvent.class)); } } From d16bc0aa527faaaa845c95203683e6cd87978ec8 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 20 Nov 2024 17:36:16 +0000 Subject: [PATCH 16/65] Update CommonBeans --- .../beekeeper/path/cleanup/context/CommonBeans.java | 6 ++++-- .../beekeeper/path/cleanup/context/CommonBeansTest.java | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java index 0b118243..911fe3d8 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java @@ -40,6 +40,7 @@ import com.expediagroup.beekeeper.cleanup.service.CleanupService; import com.expediagroup.beekeeper.cleanup.service.DisableTablesService; import com.expediagroup.beekeeper.cleanup.service.RepositoryCleanupService; +import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; import com.expediagroup.beekeeper.path.cleanup.handler.GenericPathHandler; import com.expediagroup.beekeeper.path.cleanup.service.PagingPathCleanupService; @@ -85,8 +86,9 @@ public S3Client s3Client(AmazonS3 amazonS3, @Value("${properties.dry-run-enabled @Bean(name = "s3PathCleaner") PathCleaner pathCleaner( S3Client s3Client, - BytesDeletedReporter bytesDeletedReporter) { - return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter); + BytesDeletedReporter bytesDeletedReporter, + IcebergValidator icebergValidator) { + return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter, icebergValidator); } @Bean diff --git a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java index a6667b73..4db85925 100644 --- a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java +++ b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2021 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,6 +40,7 @@ import com.expediagroup.beekeeper.cleanup.service.CleanupService; import com.expediagroup.beekeeper.cleanup.service.DisableTablesService; import com.expediagroup.beekeeper.cleanup.service.RepositoryCleanupService; +import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; import com.expediagroup.beekeeper.path.cleanup.service.PagingPathCleanupService; import com.expediagroup.beekeeper.path.cleanup.service.PathRepositoryCleanupService; @@ -59,6 +60,7 @@ class CommonBeansTest { private final CommonBeans commonBeans = new CommonBeans(); private @Mock HousekeepingPathRepository repository; private @Mock BytesDeletedReporter bytesDeletedReporter; + private @Mock IcebergValidator icebergValidator; @BeforeEach void setUp() { @@ -100,7 +102,7 @@ void verifyS3pathCleaner() { S3Client s3Client = commonBeans.s3Client(commonBeans.amazonS3(), dryRunEnabled); MeterRegistry meterRegistry = mock(GraphiteMeterRegistry.class); - PathCleaner pathCleaner = commonBeans.pathCleaner(s3Client, bytesDeletedReporter); + PathCleaner pathCleaner = commonBeans.pathCleaner(s3Client, bytesDeletedReporter, icebergValidator); assertThat(pathCleaner).isInstanceOf(S3PathCleaner.class); } From 9bf7248b3bf74dcf380feae150987f2d1aa894ae Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Thu, 21 Nov 2024 09:29:49 +0000 Subject: [PATCH 17/65] clean-up add comment --- .../beekeeper/metadata/cleanup/context/CommonBeans.java | 9 +++------ .../metadata/cleanup/context/CommonBeansTest.java | 3 +-- .../ExpiredHousekeepingMetadataGeneratorTest.java | 3 +-- .../scheduler/apiary/service/SchedulerApiaryTest.java | 2 +- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java index 6526f248..36509532 100644 --- a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java +++ b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java @@ -81,8 +81,7 @@ public CloseableMetaStoreClientFactory metaStoreClientFactory() { @Bean Supplier metaStoreClientSupplier( - CloseableMetaStoreClientFactory metaStoreClientFactory, - HiveConf hiveConf) { + CloseableMetaStoreClientFactory metaStoreClientFactory, HiveConf hiveConf) { String name = "beekeeper-metadata-cleanup"; return new HiveMetaStoreClientSupplier(metaStoreClientFactory, hiveConf, name); } @@ -103,8 +102,7 @@ public DeletedMetadataReporter deletedMetadataReporter( @Bean(name = "hiveTableCleaner") MetadataCleaner metadataCleaner( - DeletedMetadataReporter deletedMetadataReporter, - IcebergValidator icebergValidator) { + DeletedMetadataReporter deletedMetadataReporter, IcebergValidator icebergValidator) { return new HiveMetadataCleaner(deletedMetadataReporter, icebergValidator); } @@ -141,8 +139,7 @@ public S3Client s3Client(AmazonS3 amazonS3, @Value("${properties.dry-run-enabled @Bean(name = "s3PathCleaner") PathCleaner pathCleaner( S3Client s3Client, - BytesDeletedReporter bytesDeletedReporter, - IcebergValidator icebergValidator) { + BytesDeletedReporter bytesDeletedReporter, IcebergValidator icebergValidator) { return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter, icebergValidator); } diff --git a/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeansTest.java b/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeansTest.java index 83dbc389..e5659ea9 100644 --- a/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeansTest.java +++ b/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeansTest.java @@ -161,8 +161,7 @@ void verifyS3pathCleaner() { @Test public void verifyExpiredMetadataHandler() { ExpiredMetadataHandler expiredMetadataHandler = commonBeans.expiredMetadataHandler(hiveClientFactory, - metadataRepository, - metadataCleaner, pathCleaner); + metadataRepository, metadataCleaner, pathCleaner); assertThat(expiredMetadataHandler).isInstanceOf(ExpiredMetadataHandler.class); } diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/generator/ExpiredHousekeepingMetadataGeneratorTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/generator/ExpiredHousekeepingMetadataGeneratorTest.java index 189ab938..5041c910 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/generator/ExpiredHousekeepingMetadataGeneratorTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/generator/ExpiredHousekeepingMetadataGeneratorTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2022 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -33,7 +33,6 @@ import java.util.List; import java.util.Map; - import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java index 7551d627..045d1827 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java @@ -171,7 +171,7 @@ public void icebergValidatorThrowsException() { verify(icebergValidator).throwExceptionIfIceberg(databaseName, tableName); verifyNoInteractions(pathSchedulerService); verifyNoInteractions(tableSchedulerService); - verify(beekeeperEventReader, times(2)).delete(event.get()); + verify(beekeeperEventReader, times(2)).delete(event.get()); // We need to fix this, we are deleting twice, not sure why } @Test From 4c45de2e5b37daef3fd90c1c9ff2fd31524a6fb6 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Thu, 21 Nov 2024 10:09:50 +0000 Subject: [PATCH 18/65] Remove extra deletion --- .../beekeeper/scheduler/apiary/service/SchedulerApiary.java | 4 +--- .../scheduler/apiary/service/SchedulerApiaryTest.java | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index f6bbc707..5ddc31ab 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -72,12 +72,10 @@ public void scheduleBeekeeperEvent() { scheduler.scheduleForHousekeeping(entity); } catch (BeekeeperIcebergException e) { log.warn("Iceberg table are not supported in Beekeeper. Deleting message from queue", e); - beekeeperEventReader.delete(beekeeperEvent); } catch (Exception e) { throw new BeekeeperException(format( "Unable to schedule %s deletion for entity, this message will go back on the queue", - entity.getLifecycleType()), - e); + entity.getLifecycleType()), e); } } beekeeperEventReader.delete(beekeeperEvent); diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java index 045d1827..6e2d1264 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java @@ -171,7 +171,7 @@ public void icebergValidatorThrowsException() { verify(icebergValidator).throwExceptionIfIceberg(databaseName, tableName); verifyNoInteractions(pathSchedulerService); verifyNoInteractions(tableSchedulerService); - verify(beekeeperEventReader, times(2)).delete(event.get()); // We need to fix this, we are deleting twice, not sure why + verify(beekeeperEventReader).delete(event.get()); } @Test From 61c2f88f03f9a2f29dd543a616ea4634a550a6b3 Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Thu, 21 Nov 2024 12:04:47 +0100 Subject: [PATCH 19/65] adding beans --- .../cleanup/validation/IcebergValidator.java | 33 +++++++++------ .../metadata/cleanup/context/CommonBeans.java | 5 +++ .../path/cleanup/context/CommonBeans.java | 39 ++++++++++++++++++ .../scheduler/apiary/context/CommonBeans.java | 40 +++++++++++++++++++ 4 files changed, 105 insertions(+), 12 deletions(-) diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java index e9e97911..45e0145c 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java @@ -15,41 +15,50 @@ */ package com.expediagroup.beekeeper.cleanup.validation; +import static java.lang.String.format; + import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.expediagroup.beekeeper.cleanup.metadata.CleanerClient; +import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory; import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; public class IcebergValidator { private static final Logger log = LoggerFactory.getLogger(IcebergValidator.class); - private final CleanerClient client; + private final CleanerClientFactory cleanerClientFactory; - public IcebergValidator(CleanerClient client) { - this.client = client; + public IcebergValidator(CleanerClientFactory cleanerClientFactory) { + this.cleanerClientFactory = cleanerClientFactory; } /** * Beekeeper does not support Iceberg format right now. Iceberg tables in Hive Metastore do not store partition information, * so Beekeeper tries to clean up the entire table because that information is missing. This method checks if - * the table is an Iceberg table and throws IcebergTableFoundException to stop the process. + * the table is an Iceberg table and throws BeekeeperIcebergException to stop the process. * * @param databaseName * @param tableName */ public void throwExceptionIfIceberg(String databaseName, String tableName) { - Map parameters = client.getTableProperties(databaseName, tableName); - String tableType = parameters.getOrDefault("table_type", "").toLowerCase(); - String format = parameters.getOrDefault("format", "").toLowerCase(); - String outputFormat = client.getOutputFormat(databaseName, tableName).toLowerCase(); - - if (tableType.contains("iceberg") || format.contains("iceberg") || outputFormat.contains("iceberg")) { - String errorMessage = String.format("Iceberg tables are not currently supported in Beekeeper. Detected in Database: '%s', Table: '%s'.", databaseName, tableName); - throw new BeekeeperIcebergException(errorMessage); + try (CleanerClient client = cleanerClientFactory.newInstance()) { + Map parameters = client.getTableProperties(databaseName, tableName); + String tableType = parameters.getOrDefault("table_type", "").toLowerCase(); + String format = parameters.getOrDefault("format", "").toLowerCase(); + String outputFormat = client.getOutputFormat(databaseName, tableName).toLowerCase(); + if (tableType.contains("iceberg") || format.contains("iceberg") || outputFormat.contains("iceberg")) { + throw new BeekeeperIcebergException( + format("Iceberg table %s.%s is not currently supported in Beekeeper.", databaseName, + tableName)); + } + } catch (Exception e) { + throw new BeekeeperIcebergException( + format("Iceberg table %s.%s is not currently supported in Beekeeper.", databaseName, + tableName), e); } } } diff --git a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java index 36509532..0dd2fc94 100644 --- a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java +++ b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java @@ -93,6 +93,11 @@ public CleanerClientFactory clientFactory( return new HiveClientFactory(metaStoreClientSupplier, dryRunEnabled); } + @Bean + public IcebergValidator icebergValidator(CleanerClientFactory clientFactory) { + return new IcebergValidator(clientFactory); + } + @Bean public DeletedMetadataReporter deletedMetadataReporter( MeterRegistry meterRegistry, diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java index 911fe3d8..eff5fac8 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java @@ -16,7 +16,9 @@ package com.expediagroup.beekeeper.path.cleanup.context; import java.util.List; +import java.util.function.Supplier; +import org.apache.hadoop.hive.conf.HiveConf; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.domain.EntityScan; import org.springframework.context.annotation.Bean; @@ -35,6 +37,8 @@ import com.expediagroup.beekeeper.cleanup.aws.S3Client; import com.expediagroup.beekeeper.cleanup.aws.S3PathCleaner; import com.expediagroup.beekeeper.cleanup.aws.S3SentinelFilesCleaner; +import com.expediagroup.beekeeper.cleanup.hive.HiveClientFactory; +import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; import com.expediagroup.beekeeper.cleanup.service.CleanupService; @@ -46,6 +50,10 @@ import com.expediagroup.beekeeper.path.cleanup.service.PagingPathCleanupService; import com.expediagroup.beekeeper.path.cleanup.service.PathRepositoryCleanupService; +import com.hotels.hcommon.hive.metastore.client.api.CloseableMetaStoreClient; +import com.hotels.hcommon.hive.metastore.client.closeable.CloseableMetaStoreClientFactory; +import com.hotels.hcommon.hive.metastore.client.supplier.HiveMetaStoreClientSupplier; + @Configuration @EnableScheduling @ComponentScan({ "com.expediagroup.beekeeper.core", "com.expediagroup.beekeeper.cleanup" }) @@ -110,4 +118,35 @@ RepositoryCleanupService repositoryCleanupService( DisableTablesService disableTablesService() { return () -> {}; } + + @Bean + public HiveConf hiveConf(@Value("${properties.metastore-uri}") String metastoreUri) { + HiveConf conf = new HiveConf(); + conf.setVar(HiveConf.ConfVars.METASTOREURIS, metastoreUri); + return conf; + } + + @Bean + public CloseableMetaStoreClientFactory metaStoreClientFactory() { + return new CloseableMetaStoreClientFactory(); + } + + @Bean + Supplier metaStoreClientSupplier( + CloseableMetaStoreClientFactory metaStoreClientFactory, HiveConf hiveConf) { + String name = "beekeeper-scheduler-apiary"; + return new HiveMetaStoreClientSupplier(metaStoreClientFactory, hiveConf, name); + } + + @Bean(name = "hiveClientFactory") + public CleanerClientFactory clientFactory( + Supplier metaStoreClientSupplier, + @Value("${properties.dry-run-enabled}") boolean dryRunEnabled) { + return new HiveClientFactory(metaStoreClientSupplier, dryRunEnabled); + } + + @Bean + public IcebergValidator icebergValidator(CleanerClientFactory clientFactory) { + return new IcebergValidator(clientFactory); + } } diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java index 492017c6..fc2ee418 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java @@ -17,7 +17,9 @@ import java.util.EnumMap; import java.util.List; +import java.util.function.Supplier; +import org.apache.hadoop.hive.conf.HiveConf; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.domain.EntityScan; @@ -37,6 +39,9 @@ import com.expedia.apiary.extensions.receiver.common.messaging.MessageReader; import com.expedia.apiary.extensions.receiver.sqs.messaging.SqsMessageReader; +import com.expediagroup.beekeeper.cleanup.hive.HiveClientFactory; +import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory; +import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.model.LifecycleEventType; import com.expediagroup.beekeeper.scheduler.apiary.filter.EventTypeListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.filter.ListenerEventFilter; @@ -52,6 +57,10 @@ import com.expediagroup.beekeeper.scheduler.apiary.messaging.RetryingMessageReader; import com.expediagroup.beekeeper.scheduler.service.SchedulerService; +import com.hotels.hcommon.hive.metastore.client.api.CloseableMetaStoreClient; +import com.hotels.hcommon.hive.metastore.client.closeable.CloseableMetaStoreClientFactory; +import com.hotels.hcommon.hive.metastore.client.supplier.HiveMetaStoreClientSupplier; + @Configuration @ComponentScan(basePackages = { "com.expediagroup.beekeeper.core", "com.expediagroup.beekeeper.scheduler" }) @EntityScan(basePackages = { "com.expediagroup.beekeeper.core" }) @@ -139,4 +148,35 @@ public BeekeeperEventReader eventReader( return new MessageReaderAdapter(messageReader, handlers); } + + @Bean + public HiveConf hiveConf(@Value("${properties.metastore-uri}") String metastoreUri) { + HiveConf conf = new HiveConf(); + conf.setVar(HiveConf.ConfVars.METASTOREURIS, metastoreUri); + return conf; + } + + @Bean + public CloseableMetaStoreClientFactory metaStoreClientFactory() { + return new CloseableMetaStoreClientFactory(); + } + + @Bean + Supplier metaStoreClientSupplier( + CloseableMetaStoreClientFactory metaStoreClientFactory, HiveConf hiveConf) { + String name = "beekeeper-scheduler-apiary"; + return new HiveMetaStoreClientSupplier(metaStoreClientFactory, hiveConf, name); + } + + @Bean(name = "hiveClientFactory") + public CleanerClientFactory clientFactory( + Supplier metaStoreClientSupplier, + @Value("${properties.dry-run-enabled}") boolean dryRunEnabled) { + return new HiveClientFactory(metaStoreClientSupplier, dryRunEnabled); + } + + @Bean + public IcebergValidator icebergValidator(CleanerClientFactory clientFactory) { + return new IcebergValidator(clientFactory); + } } From 4e0b82b6cd59fa0f276323e4ca69b1a23754803f Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Thu, 21 Nov 2024 11:59:58 +0000 Subject: [PATCH 20/65] fix tests --- .../validation/IcebergValidatorTest.java | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java index bb588d64..663a40bb 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java @@ -16,6 +16,7 @@ package com.expediagroup.beekeeper.cleanup.validation; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import java.util.HashMap; @@ -25,21 +26,25 @@ import org.junit.Test; import com.expediagroup.beekeeper.cleanup.metadata.CleanerClient; +import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory; import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; public class IcebergValidatorTest { + private CleanerClientFactory cleanerClientFactory; private CleanerClient cleanerClient; private IcebergValidator icebergValidator; @Before - public void setUp() { + public void setUp() throws Exception { + cleanerClientFactory = mock(CleanerClientFactory.class); cleanerClient = mock(CleanerClient.class); - icebergValidator = new IcebergValidator(cleanerClient); + when(cleanerClientFactory.newInstance()).thenReturn(cleanerClient); + icebergValidator = new IcebergValidator(cleanerClientFactory); } @Test(expected = BeekeeperIcebergException.class) - public void shouldThrowExceptionWhenTableTypeIsIceberg() { + public void shouldThrowExceptionWhenTableTypeIsIceberg() throws Exception { Map properties = new HashMap<>(); properties.put("table_type", "ICEBERG"); @@ -47,10 +52,12 @@ public void shouldThrowExceptionWhenTableTypeIsIceberg() { when(cleanerClient.getOutputFormat("db", "table")).thenReturn(""); icebergValidator.throwExceptionIfIceberg("db", "table"); + verify(cleanerClientFactory).newInstance(); + verify(cleanerClient).close(); } @Test(expected = BeekeeperIcebergException.class) - public void shouldThrowExceptionWhenFormatIsIceberg() { + public void shouldThrowExceptionWhenFormatIsIceberg() throws Exception { Map properties = new HashMap<>(); properties.put("format", "iceberg"); @@ -61,7 +68,7 @@ public void shouldThrowExceptionWhenFormatIsIceberg() { } @Test - public void shouldNotThrowExceptionForNonIcebergTable() { + public void shouldNotThrowExceptionForNonIcebergTable() throws Exception { Map properties = new HashMap<>(); properties.put("table_type", "HIVE_TABLE"); @@ -70,10 +77,12 @@ public void shouldNotThrowExceptionForNonIcebergTable() { .thenReturn("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"); icebergValidator.throwExceptionIfIceberg("db", "table"); + verify(cleanerClientFactory).newInstance(); + verify(cleanerClient).close(); } @Test(expected = BeekeeperIcebergException.class) - public void shouldThrowExceptionWhenOutputFormatContainsIceberg() { + public void shouldThrowExceptionWhenOutputFormatContainsIceberg() throws Exception { Map properties = new HashMap<>(); when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties); From e5488738c0720dc60a16dc915e1a43184743c617 Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Thu, 21 Nov 2024 15:00:25 +0100 Subject: [PATCH 21/65] fixing it tests for metadata cleanup --- .../beekeeper/cleanup/hive/HiveClient.java | 8 ++- .../cleanup/validation/IcebergValidator.java | 9 +-- .../BeekeeperPathCleanupIntegrationTest.java | 67 ++++++++++++++++--- .../integration/utils/ContainerTestUtils.java | 2 +- 4 files changed, 69 insertions(+), 17 deletions(-) diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveClient.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveClient.java index e7ec222d..007afeb1 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveClient.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveClient.java @@ -128,17 +128,19 @@ public Map getTableProperties(String databaseName, String tableN @Override public String getOutputFormat(String databaseName, String tableName) { + String result = null; try { Table table = client.getTable(databaseName, tableName); if (table.getSd() != null) { - return table.getSd().getOutputFormat(); + result = table.getSd().getOutputFormat(); } - throw new BeekeeperException( - "Storage descriptor properties were null for \"" + databaseName + "." + tableName + "."); + } catch (NoSuchObjectException e) { + log.warn("Table {}.{} does not exist", databaseName, tableName); } catch (TException e) { throw new BeekeeperException( "Unexpected exception when getting output format for \"" + databaseName + "." + tableName + ".", e); } + return result; } @Override diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java index 45e0145c..f94a5d64 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java @@ -39,7 +39,7 @@ public IcebergValidator(CleanerClientFactory cleanerClientFactory) { /** * Beekeeper does not support Iceberg format right now. Iceberg tables in Hive Metastore do not store partition information, * so Beekeeper tries to clean up the entire table because that information is missing. This method checks if - * the table is an Iceberg table and throws BeekeeperIcebergException to stop the process. + * the table is an Iceberg table and throws IcebergTableFoundException to stop the process. * * @param databaseName * @param tableName @@ -49,15 +49,16 @@ public void throwExceptionIfIceberg(String databaseName, String tableName) { Map parameters = client.getTableProperties(databaseName, tableName); String tableType = parameters.getOrDefault("table_type", "").toLowerCase(); String format = parameters.getOrDefault("format", "").toLowerCase(); - String outputFormat = client.getOutputFormat(databaseName, tableName).toLowerCase(); - if (tableType.contains("iceberg") || format.contains("iceberg") || outputFormat.contains("iceberg")) { + String outputFormat = client.getOutputFormat(databaseName, tableName); + if (tableType.contains("iceberg") || format.contains("iceberg") || (outputFormat != null + && outputFormat.toLowerCase().contains("iceberg"))) { throw new BeekeeperIcebergException( format("Iceberg table %s.%s is not currently supported in Beekeeper.", databaseName, tableName)); } } catch (Exception e) { throw new BeekeeperIcebergException( - format("Iceberg table %s.%s is not currently supported in Beekeeper.", databaseName, + format("Unexpected exception when identifying if table %s.%s is Iceberg.", databaseName, tableName), e); } } diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java index 0b8c29b0..a38f515f 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java @@ -15,6 +15,9 @@ */ package com.expediagroup.beekeeper.integration; +import static org.apache.hadoop.fs.s3a.Constants.ACCESS_KEY; +import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; +import static org.apache.hadoop.fs.s3a.Constants.SECRET_KEY; import static org.assertj.core.api.Assertions.assertThat; import static org.awaitility.Awaitility.await; import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3; @@ -27,20 +30,24 @@ import java.sql.SQLException; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.thrift.TException; import org.awaitility.Duration; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; import org.testcontainers.containers.localstack.LocalStackContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; @@ -51,10 +58,14 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.CreateBucketRequest; +import com.google.common.collect.ImmutableMap; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; +import com.expediagroup.beekeeper.integration.utils.HiveTestUtils; import com.expediagroup.beekeeper.path.cleanup.BeekeeperPathCleanup; +import com.hotels.beeju.extensions.ThriftHiveMetaStoreJUnitExtension; + @Testcontainers public class BeekeeperPathCleanupIntegrationTest extends BeekeeperIntegrationTestBase { @@ -63,6 +74,12 @@ public class BeekeeperPathCleanupIntegrationTest extends BeekeeperIntegrationTes private static final String SCHEDULER_DELAY_MS_PROPERTY = "properties.scheduler-delay-ms"; private static final String DRY_RUN_ENABLED_PROPERTY = "properties.dry-run-enabled"; private static final String AWS_S3_ENDPOINT_PROPERTY = "aws.s3.endpoint"; + private static final String METASTORE_URI_PROPERTY = "properties.metastore-uri"; + private static final String AWS_DISABLE_GET_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disableGetObjectMD5Validation"; + private static final String AWS_DISABLE_PUT_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disablePutObjectMD5Validation"; + + private static final String S3_ACCESS_KEY = "access"; + private static final String S3_SECRET_KEY = "secret"; private static final String BUCKET = "test-path-bucket"; private static final String DB_AND_TABLE_PREFIX = DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE; @@ -84,16 +101,35 @@ public class BeekeeperPathCleanupIntegrationTest extends BeekeeperIntegrationTes @Container private static final LocalStackContainer S3_CONTAINER = ContainerTestUtils.awsContainer(S3); + static { + S3_CONTAINER.start(); + } private static AmazonS3 amazonS3; - + private static final String S3_ENDPOINT = ContainerTestUtils.awsServiceEndpoint(S3_CONTAINER, S3); private final ExecutorService executorService = Executors.newFixedThreadPool(1); + private static Map metastoreProperties = ImmutableMap + .builder() + .put(ENDPOINT, S3_ENDPOINT) + .put(ACCESS_KEY, S3_ACCESS_KEY) + .put(SECRET_KEY, S3_SECRET_KEY) + .build(); + + @RegisterExtension + public ThriftHiveMetaStoreJUnitExtension thriftHiveMetaStore = new ThriftHiveMetaStoreJUnitExtension( + DATABASE_NAME_VALUE, metastoreProperties); + + private HiveTestUtils hiveTestUtils; + private HiveMetaStoreClient metastoreClient; + @BeforeAll public static void init() { System.setProperty(SPRING_PROFILES_ACTIVE_PROPERTY, SPRING_PROFILES_ACTIVE); System.setProperty(SCHEDULER_DELAY_MS_PROPERTY, SCHEDULER_DELAY_MS); System.setProperty(DRY_RUN_ENABLED_PROPERTY, DRY_RUN_ENABLED); - System.setProperty(AWS_S3_ENDPOINT_PROPERTY, ContainerTestUtils.awsServiceEndpoint(S3_CONTAINER, S3)); + System.setProperty(AWS_S3_ENDPOINT_PROPERTY, S3_ENDPOINT); + System.setProperty(AWS_DISABLE_GET_VALIDATION_PROPERTY, "true"); + System.setProperty(AWS_DISABLE_PUT_VALIDATION_PROPERTY, "true"); amazonS3 = ContainerTestUtils.s3Client(S3_CONTAINER, AWS_REGION); amazonS3.createBucket(new CreateBucketRequest(BUCKET, AWS_REGION)); @@ -105,12 +141,18 @@ public static void teardown() { System.clearProperty(SCHEDULER_DELAY_MS_PROPERTY); System.clearProperty(DRY_RUN_ENABLED_PROPERTY); System.clearProperty(AWS_S3_ENDPOINT_PROPERTY); + System.clearProperty(METASTORE_URI_PROPERTY); amazonS3.shutdown(); + S3_CONTAINER.stop(); } @BeforeEach public void setup() { + System.setProperty(METASTORE_URI_PROPERTY, thriftHiveMetaStore.getThriftConnectionUri()); + metastoreClient = thriftHiveMetaStore.client(); + hiveTestUtils = new HiveTestUtils(metastoreClient); + amazonS3.listObjectsV2(BUCKET) .getObjectSummaries() .forEach(object -> amazonS3.deleteObject(BUCKET, object.getKey())); @@ -126,7 +168,8 @@ public void stop() throws InterruptedException { } @Test - public void cleanupPathsForFile() throws SQLException { + public void cleanupPathsForFile() throws SQLException, TException { + hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_OTHER, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_SENTINEL, ""); @@ -143,7 +186,8 @@ public void cleanupPathsForFile() throws SQLException { } @Test - public void cleanupPathsForDirectory() throws SQLException { + public void cleanupPathsForDirectory() throws SQLException, TException { + hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY2, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_OTHER, CONTENT); @@ -162,7 +206,8 @@ public void cleanupPathsForDirectory() throws SQLException { } @Test - public void cleanupPathsForDirectoryWithSpace() throws SQLException { + public void cleanupPathsForDirectoryWithSpace() throws SQLException, TException { + hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); String objectKeyRoot = DB_AND_TABLE_PREFIX + "/ /id1/partition1"; String objectKey1 = objectKeyRoot + "/file1"; String objectKey2 = objectKeyRoot + "/file2"; @@ -182,7 +227,8 @@ public void cleanupPathsForDirectoryWithSpace() throws SQLException { } @Test - public void cleanupPathsForDirectoryWithTrailingSlash() throws SQLException { + public void cleanupPathsForDirectoryWithTrailingSlash() throws SQLException, TException { + hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY2, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_OTHER, CONTENT); @@ -199,7 +245,8 @@ public void cleanupPathsForDirectoryWithTrailingSlash() throws SQLException { } @Test - public void cleanupSentinelForParent() throws SQLException { + public void cleanupSentinelForParent() throws SQLException, TException { + hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); String parentSentinel = DB_AND_TABLE_PREFIX + "/id1_$folder$"; String tableSentinel = DB_AND_TABLE_PREFIX + "_$folder$"; String databaseSentinel = "database_$folder$"; @@ -223,7 +270,8 @@ public void cleanupSentinelForParent() throws SQLException { } @Test - public void cleanupSentinelForNonEmptyParent() throws SQLException { + public void cleanupSentinelForNonEmptyParent() throws SQLException, TException { + hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); String parentSentinel = DB_AND_TABLE_PREFIX + "/id1_$folder$"; String tableSentinel = DB_AND_TABLE_PREFIX + "_$folder$"; amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); @@ -245,7 +293,8 @@ public void cleanupSentinelForNonEmptyParent() throws SQLException { } @Test - public void metrics() throws SQLException { + public void metrics() throws SQLException, TException { + hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_SENTINEL, ""); diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/ContainerTestUtils.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/ContainerTestUtils.java index 32b8241d..c8cce2fc 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/ContainerTestUtils.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/ContainerTestUtils.java @@ -30,7 +30,7 @@ public class ContainerTestUtils { public static MySQLContainer mySqlContainer() { - MySQLContainer container = new MySQLContainer("mysql:8.0.26").withDatabaseName("beekeeper"); + MySQLContainer container = new MySQLContainer("mysql:8.0.40").withDatabaseName("beekeeper"); container.withCommand("--default-authentication-plugin=mysql_native_password"); return container; } From 8b1ca85d2fe40a4c9a7b6b77bba5e2fc87c9f075 Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Thu, 21 Nov 2024 15:05:32 +0100 Subject: [PATCH 22/65] fix path cleanup --- .../BeekeeperPathCleanupIntegrationTest.java | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java index a38f515f..ed9ed046 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java @@ -77,7 +77,7 @@ public class BeekeeperPathCleanupIntegrationTest extends BeekeeperIntegrationTes private static final String METASTORE_URI_PROPERTY = "properties.metastore-uri"; private static final String AWS_DISABLE_GET_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disableGetObjectMD5Validation"; private static final String AWS_DISABLE_PUT_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disablePutObjectMD5Validation"; - + private static final String S3_ACCESS_KEY = "access"; private static final String S3_SECRET_KEY = "secret"; @@ -88,6 +88,7 @@ public class BeekeeperPathCleanupIntegrationTest extends BeekeeperIntegrationTes private static final String OBJECT_KEY2 = DB_AND_TABLE_PREFIX + "/id1/partition1/file2"; private static final String OBJECT_KEY_SENTINEL = DB_AND_TABLE_PREFIX + "/id1/partition1_$folder$"; private static final String ABSOLUTE_PATH = "s3://" + BUCKET + "/" + OBJECT_KEY_ROOT; + private static final String TABLE_PATH = "s3a://" + BUCKET + "/" + DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/"; private static final String OBJECT_KEY_OTHER = DB_AND_TABLE_PREFIX + "/id1/partition10/file1"; private static final String OBJECT_KEY_OTHER_SENTINEL = DB_AND_TABLE_PREFIX + "/id1/partition10_$folder$"; @@ -142,6 +143,8 @@ public static void teardown() { System.clearProperty(DRY_RUN_ENABLED_PROPERTY); System.clearProperty(AWS_S3_ENDPOINT_PROPERTY); System.clearProperty(METASTORE_URI_PROPERTY); + System.clearProperty(AWS_DISABLE_GET_VALIDATION_PROPERTY); + System.clearProperty(AWS_DISABLE_PUT_VALIDATION_PROPERTY); amazonS3.shutdown(); S3_CONTAINER.stop(); @@ -169,7 +172,7 @@ public void stop() throws InterruptedException { @Test public void cleanupPathsForFile() throws SQLException, TException { - hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); + hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_OTHER, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_SENTINEL, ""); @@ -187,7 +190,7 @@ public void cleanupPathsForFile() throws SQLException, TException { @Test public void cleanupPathsForDirectory() throws SQLException, TException { - hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); + hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY2, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_OTHER, CONTENT); @@ -207,7 +210,7 @@ public void cleanupPathsForDirectory() throws SQLException, TException { @Test public void cleanupPathsForDirectoryWithSpace() throws SQLException, TException { - hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); + hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); String objectKeyRoot = DB_AND_TABLE_PREFIX + "/ /id1/partition1"; String objectKey1 = objectKeyRoot + "/file1"; String objectKey2 = objectKeyRoot + "/file2"; @@ -228,7 +231,7 @@ public void cleanupPathsForDirectoryWithSpace() throws SQLException, TException @Test public void cleanupPathsForDirectoryWithTrailingSlash() throws SQLException, TException { - hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); + hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY2, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_OTHER, CONTENT); @@ -246,7 +249,7 @@ public void cleanupPathsForDirectoryWithTrailingSlash() throws SQLException, TEx @Test public void cleanupSentinelForParent() throws SQLException, TException { - hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); + hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); String parentSentinel = DB_AND_TABLE_PREFIX + "/id1_$folder$"; String tableSentinel = DB_AND_TABLE_PREFIX + "_$folder$"; String databaseSentinel = "database_$folder$"; @@ -271,7 +274,7 @@ public void cleanupSentinelForParent() throws SQLException, TException { @Test public void cleanupSentinelForNonEmptyParent() throws SQLException, TException { - hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); + hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); String parentSentinel = DB_AND_TABLE_PREFIX + "/id1_$folder$"; String tableSentinel = DB_AND_TABLE_PREFIX + "_$folder$"; amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); @@ -294,7 +297,7 @@ public void cleanupSentinelForNonEmptyParent() throws SQLException, TException { @Test public void metrics() throws SQLException, TException { - hiveTestUtils.createTable(ABSOLUTE_PATH, TABLE_NAME_VALUE, false); + hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_SENTINEL, ""); From 631502b2e4a56820aee5b2cf5a4c7952963e4a14 Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Thu, 21 Nov 2024 16:35:09 +0100 Subject: [PATCH 23/65] fix main problem with tests --- ...etadataSchedulerApiaryIntegrationTest.java | 37 ++++++++++++++++--- .../integration/utils/ContainerTestUtils.java | 2 +- pom.xml | 5 ++- 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java index ccbf19c6..321a89ad 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java @@ -36,6 +36,7 @@ import java.util.Set; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; @@ -45,6 +46,7 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; import org.testcontainers.containers.localstack.LocalStackContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; @@ -54,6 +56,7 @@ import io.micrometer.core.instrument.composite.CompositeMeterRegistry; import com.amazonaws.services.sqs.AmazonSQS; +import com.amazonaws.services.sqs.model.CreateQueueResult; import com.amazonaws.services.sqs.model.PurgeQueueRequest; import com.amazonaws.services.sqs.model.SendMessageRequest; @@ -64,19 +67,27 @@ import com.expediagroup.beekeeper.integration.model.AlterTableSqsMessage; import com.expediagroup.beekeeper.integration.model.CreateTableSqsMessage; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; +import com.expediagroup.beekeeper.integration.utils.HiveTestUtils; import com.expediagroup.beekeeper.scheduler.apiary.BeekeeperSchedulerApiary; +import com.hotels.beeju.extensions.ThriftHiveMetaStoreJUnitExtension; + @Testcontainers public class BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest extends BeekeeperIntegrationTestBase { private static final int TIMEOUT = 30; + private static final String DRY_RUN_ENABLED_PROPERTY = "properties.dry-run-enabled"; private static final String APIARY_QUEUE_URL_PROPERTY = "properties.apiary.queue-url"; + private static final String METASTORE_URI_PROPERTY = "properties.metastore-uri"; private static final String QUEUE = "apiary-receiver-queue"; private static final String SCHEDULED_EXPIRED_METRIC = "metadata-scheduled"; private static final String HEALTHCHECK_URI = "http://localhost:8080/actuator/health"; private static final String PROMETHEUS_URI = "http://localhost:8080/actuator/prometheus"; + private static final String S3_ACCESS_KEY = "access"; + private static final String S3_SECRET_KEY = "secret"; + private static final String PARTITION_KEYS = "{ \"event_date\": \"date\", \"event_hour\": \"smallint\"}"; private static final String PARTITION_A_VALUES = "[ \"2020-01-01\", \"0\" ]"; private static final String PARTITION_B_VALUES = "[ \"2020-01-01\", \"1\" ]"; @@ -84,30 +95,44 @@ public class BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest extends Beek private static final String PARTITION_B_NAME = "event_date=2020-01-01/event_hour=1"; private static final String LOCATION_A = "s3://bucket/table1/partition"; private static final String LOCATION_B = "s3://bucket/table2/partition"; + private static final String TABLE_PATH = "/tmp/bucket/" + DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/"; @Container private static final LocalStackContainer SQS_CONTAINER = ContainerTestUtils.awsContainer(SQS); private static AmazonSQS amazonSQS; + private static String queueUrl; + + @RegisterExtension + public ThriftHiveMetaStoreJUnitExtension thriftHiveMetaStore = new ThriftHiveMetaStoreJUnitExtension( + DATABASE_NAME_VALUE); + + private HiveTestUtils hiveTestUtils; + private HiveMetaStoreClient metastoreClient; @BeforeAll public static void init() { - String queueUrl = ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE); - System.setProperty(APIARY_QUEUE_URL_PROPERTY, queueUrl); - + System.setProperty(DRY_RUN_ENABLED_PROPERTY, "false"); amazonSQS = ContainerTestUtils.sqsClient(SQS_CONTAINER, AWS_REGION); - amazonSQS.createQueue(QUEUE); + CreateQueueResult queue = amazonSQS.createQueue(QUEUE); + queueUrl = queue.getQueueUrl(); + System.setProperty(APIARY_QUEUE_URL_PROPERTY, queueUrl); } @AfterAll public static void teardown() { System.clearProperty(APIARY_QUEUE_URL_PROPERTY); + System.clearProperty(DRY_RUN_ENABLED_PROPERTY); amazonSQS.shutdown(); } @BeforeEach public void setup() { - amazonSQS.purgeQueue(new PurgeQueueRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE))); + System.setProperty(METASTORE_URI_PROPERTY, thriftHiveMetaStore.getThriftConnectionUri()); + metastoreClient = thriftHiveMetaStore.client(); + hiveTestUtils = new HiveTestUtils(metastoreClient); + + amazonSQS.purgeQueue(new PurgeQueueRequest(queueUrl)); executorService.execute(() -> BeekeeperSchedulerApiary.main(new String[] {})); await().atMost(Duration.ONE_MINUTE).until(BeekeeperSchedulerApiary::isRunning); } @@ -230,7 +255,7 @@ public void prometheus() { } private SendMessageRequest sendMessageRequest(String payload) { - return new SendMessageRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE), payload); + return new SendMessageRequest(queueUrl, payload); } private void assertExpiredMetadata(HousekeepingMetadata actual, String expectedPath, String partitionName) { diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/ContainerTestUtils.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/ContainerTestUtils.java index c8cce2fc..d0d96b7b 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/ContainerTestUtils.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/ContainerTestUtils.java @@ -36,7 +36,7 @@ public static MySQLContainer mySqlContainer() { } public static LocalStackContainer awsContainer(LocalStackContainer.Service service) { - return new LocalStackContainer().withServices(service); + return new LocalStackContainer("0.13.1").withServices(service); } public static String awsServiceEndpoint(LocalStackContainer awsContainer, LocalStackContainer.Service service) { diff --git a/pom.xml b/pom.xml index 4683bea5..a2008896 100644 --- a/pom.xml +++ b/pom.xml @@ -1,5 +1,6 @@ - + 4.0.0 @@ -50,7 +51,7 @@ 1.27 2.7.9 5.3.25 - 1.17.1 + 1.17.6 11-slim From c1a7c96d35ea2e10d0a6427cf62358d088e98dda Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Thu, 21 Nov 2024 16:03:03 +0000 Subject: [PATCH 24/65] Fix BeekeeperDryRunPathCleanupIntegrationTest --- ...eeperDryRunPathCleanupIntegrationTest.java | 44 ++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java index 63d2e443..9d7e5272 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java @@ -15,6 +15,9 @@ */ package com.expediagroup.beekeeper.integration; +import static org.apache.hadoop.fs.s3a.Constants.ACCESS_KEY; +import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; +import static org.apache.hadoop.fs.s3a.Constants.SECRET_KEY; import static org.assertj.core.api.Assertions.assertThat; import static org.awaitility.Awaitility.await; import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3; @@ -25,10 +28,12 @@ import java.sql.SQLException; import java.util.List; +import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.awaitility.Duration; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; @@ -36,6 +41,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; +import org.junit.jupiter.api.extension.RegisterExtension; import org.mockito.junit.jupiter.MockitoExtension; import org.testcontainers.containers.localstack.LocalStackContainer; import org.testcontainers.junit.jupiter.Container; @@ -47,12 +53,16 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.CreateBucketRequest; +import com.google.common.collect.ImmutableMap; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; +import com.expediagroup.beekeeper.integration.utils.HiveTestUtils; import com.expediagroup.beekeeper.integration.utils.TestAppender; import com.expediagroup.beekeeper.path.cleanup.BeekeeperPathCleanup; +import com.hotels.beeju.extensions.ThriftHiveMetaStoreJUnitExtension; + @Testcontainers @ExtendWith(MockitoExtension.class) public class BeekeeperDryRunPathCleanupIntegrationTest extends BeekeeperIntegrationTestBase { @@ -62,6 +72,12 @@ public class BeekeeperDryRunPathCleanupIntegrationTest extends BeekeeperIntegrat private static final String SCHEDULER_DELAY_MS_PROPERTY = "properties.scheduler-delay-ms"; private static final String DRY_RUN_ENABLED_PROPERTY = "properties.dry-run-enabled"; private static final String AWS_S3_ENDPOINT_PROPERTY = "aws.s3.endpoint"; + private static final String METASTORE_URI_PROPERTY = "properties.metastore-uri"; + private static final String AWS_DISABLE_GET_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disableGetObjectMD5Validation"; + private static final String AWS_DISABLE_PUT_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disablePutObjectMD5Validation"; + + private static final String S3_ACCESS_KEY = "access"; + private static final String S3_SECRET_KEY = "secret"; private static final String BUCKET = "test-path-bucket"; private static final String DB_AND_TABLE_PREFIX = DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE; @@ -83,17 +99,37 @@ public class BeekeeperDryRunPathCleanupIntegrationTest extends BeekeeperIntegrat @Container private static final LocalStackContainer S3_CONTAINER = ContainerTestUtils.awsContainer(S3); + static { + S3_CONTAINER.start(); + } private static AmazonS3 amazonS3; + private static final String S3_ENDPOINT = ContainerTestUtils.awsServiceEndpoint(S3_CONTAINER, S3); + private final ExecutorService executorService = Executors.newFixedThreadPool(1); private final TestAppender appender = new TestAppender(); + private static Map metastoreProperties = ImmutableMap + .builder() + .put(ENDPOINT, S3_ENDPOINT) + .put(ACCESS_KEY, S3_ACCESS_KEY) + .put(SECRET_KEY, S3_SECRET_KEY) + .build(); + + @RegisterExtension + public ThriftHiveMetaStoreJUnitExtension thriftHiveMetaStore = new ThriftHiveMetaStoreJUnitExtension( + DATABASE_NAME_VALUE, metastoreProperties); + private HiveTestUtils hiveTestUtils; + private HiveMetaStoreClient metastoreClient; + @BeforeAll public static void init() { System.setProperty(SPRING_PROFILES_ACTIVE_PROPERTY, SPRING_PROFILES_ACTIVE); System.setProperty(SCHEDULER_DELAY_MS_PROPERTY, SCHEDULER_DELAY_MS); System.setProperty(DRY_RUN_ENABLED_PROPERTY, DRY_RUN_ENABLED); - System.setProperty(AWS_S3_ENDPOINT_PROPERTY, ContainerTestUtils.awsServiceEndpoint(S3_CONTAINER, S3)); + System.setProperty(AWS_S3_ENDPOINT_PROPERTY, S3_ENDPOINT); + System.setProperty(AWS_DISABLE_GET_VALIDATION_PROPERTY, "true"); + System.setProperty(AWS_DISABLE_PUT_VALIDATION_PROPERTY, "true"); amazonS3 = ContainerTestUtils.s3Client(S3_CONTAINER, AWS_REGION); amazonS3.createBucket(new CreateBucketRequest(BUCKET, AWS_REGION)); @@ -105,12 +141,18 @@ public static void teardown() { System.clearProperty(SCHEDULER_DELAY_MS_PROPERTY); System.clearProperty(DRY_RUN_ENABLED_PROPERTY); System.clearProperty(AWS_S3_ENDPOINT_PROPERTY); + System.clearProperty(METASTORE_URI_PROPERTY); amazonS3.shutdown(); + S3_CONTAINER.stop(); } @BeforeEach public void setup() { + System.setProperty(METASTORE_URI_PROPERTY, thriftHiveMetaStore.getThriftConnectionUri()); + metastoreClient = thriftHiveMetaStore.client(); + hiveTestUtils = new HiveTestUtils(metastoreClient); + amazonS3.listObjectsV2(BUCKET) .getObjectSummaries() .forEach(object -> amazonS3.deleteObject(BUCKET, object.getKey())); From 90c287109bbc4344e90820a3fbdc2457720b3389 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Thu, 21 Nov 2024 17:21:53 +0000 Subject: [PATCH 25/65] revert changes to fix BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest --- .../beekeeper/integration/utils/ContainerTestUtils.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/ContainerTestUtils.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/ContainerTestUtils.java index d0d96b7b..32b8241d 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/ContainerTestUtils.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/ContainerTestUtils.java @@ -30,13 +30,13 @@ public class ContainerTestUtils { public static MySQLContainer mySqlContainer() { - MySQLContainer container = new MySQLContainer("mysql:8.0.40").withDatabaseName("beekeeper"); + MySQLContainer container = new MySQLContainer("mysql:8.0.26").withDatabaseName("beekeeper"); container.withCommand("--default-authentication-plugin=mysql_native_password"); return container; } public static LocalStackContainer awsContainer(LocalStackContainer.Service service) { - return new LocalStackContainer("0.13.1").withServices(service); + return new LocalStackContainer().withServices(service); } public static String awsServiceEndpoint(LocalStackContainer awsContainer, LocalStackContainer.Service service) { From 45fcc2643b896a4b461ba187c3459891cc21bee0 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Thu, 21 Nov 2024 19:02:18 +0000 Subject: [PATCH 26/65] Added missing properties to fix BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest --- ...rencedPathSchedulerApiaryIntegrationTest.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java index 92a3d0c0..af66ac2e 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java @@ -45,6 +45,7 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.RegisterExtension; import org.testcontainers.containers.localstack.LocalStackContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; @@ -66,11 +67,14 @@ import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; import com.expediagroup.beekeeper.scheduler.apiary.BeekeeperSchedulerApiary; +import com.hotels.beeju.extensions.ThriftHiveMetaStoreJUnitExtension; + @Testcontainers public class BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest extends BeekeeperIntegrationTestBase { private static final int TIMEOUT = 5; private static final String APIARY_QUEUE_URL_PROPERTY = "properties.apiary.queue-url"; + private static final String DRY_RUN_ENABLED_PROPERTY = "properties.dry-run-enabled"; private static final String QUEUE = "apiary-receiver-queue"; private static final String SCHEDULED_ORPHANED_METRIC = "paths-scheduled"; @@ -81,6 +85,10 @@ public class BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest extends Bee private static final LocalStackContainer SQS_CONTAINER = ContainerTestUtils.awsContainer(SQS); private static AmazonSQS amazonSQS; + @RegisterExtension + public ThriftHiveMetaStoreJUnitExtension thriftHiveMetaStore = new ThriftHiveMetaStoreJUnitExtension( + DATABASE_NAME_VALUE); + @BeforeAll public static void init() { String queueUrl = ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE); @@ -93,12 +101,17 @@ public static void init() { @AfterAll public static void teardown() { System.clearProperty(APIARY_QUEUE_URL_PROPERTY); + System.clearProperty("properties.metastore-uri"); + System.clearProperty("properties.dry-run-enabled"); amazonSQS.shutdown(); } @BeforeEach public void setup() { + System.setProperty("properties.metastore-uri", thriftHiveMetaStore.getThriftConnectionUri()); + System.setProperty("properties.dry-run-enabled", "false"); + amazonSQS.purgeQueue(new PurgeQueueRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE))); executorService.execute(() -> BeekeeperSchedulerApiary.main(new String[] {})); await().atMost(Duration.ONE_MINUTE).until(BeekeeperSchedulerApiary::isRunning); @@ -108,6 +121,9 @@ public void setup() { public void stop() throws InterruptedException { BeekeeperSchedulerApiary.stop(); executorService.awaitTermination(5, TimeUnit.SECONDS); + + System.clearProperty("properties.metastore-uri"); + System.clearProperty("properties.dry-run-enabled"); } @Test From 06914d1d42e2abbdcb55561bdc25f631cb6a5e63 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Fri, 22 Nov 2024 12:24:24 +0000 Subject: [PATCH 27/65] Add integration test for metadatacleanup --- ...ekeeperMetadataCleanupIntegrationTest.java | 26 +++++++++++++ .../integration/utils/HiveTestUtils.java | 37 +++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java index d0e52df9..8e88682e 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java @@ -26,6 +26,7 @@ import static com.expediagroup.beekeeper.cleanup.monitoring.DeletedMetadataReporter.METRIC_NAME; import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.DELETED; import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.DISABLED; +import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.SKIPPED; import static com.expediagroup.beekeeper.integration.CommonTestVariables.AWS_REGION; import static com.expediagroup.beekeeper.integration.CommonTestVariables.DATABASE_NAME_VALUE; import static com.expediagroup.beekeeper.integration.CommonTestVariables.LONG_CLEANUP_DELAY_VALUE; @@ -33,6 +34,7 @@ import static com.expediagroup.beekeeper.integration.CommonTestVariables.TABLE_NAME_VALUE; import java.sql.SQLException; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -225,6 +227,30 @@ public void cleanupPartitionedTable() throws Exception { assertThat(amazonS3.doesObjectExist(BUCKET, PARTITIONED_OBJECT_KEY)).isFalse(); } + @Test + public void shouldSkipCleanupForIcebergTable() throws Exception { + // Define custom table props and outputFormat for an Iceberg table + Map tableProperties = new HashMap<>(); + tableProperties.put("table_type", "ICEBERG"); + tableProperties.put("format", "ICEBERG/PARQUET"); + String outputFormat = "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"; + // Create the Iceberg table in the Hive metastore + hiveTestUtils.createTableWithProperties( + PARTITIONED_TABLE_PATH, TABLE_NAME_VALUE, true, tableProperties, outputFormat, true); + // Add data to the S3 bucket + amazonS3.putObject(BUCKET, PARTITIONED_TABLE_OBJECT_KEY, TABLE_DATA); + // Insert expired metadata for the Iceberg table + insertExpiredMetadata(PARTITIONED_TABLE_PATH, null); + // wait for cleanup process to run + await() + .atMost(TIMEOUT, TimeUnit.SECONDS) + .until(() -> getExpiredMetadata().get(0).getHousekeepingStatus() == SKIPPED); + // Verify that the table still exists + assertThat(metastoreClient.tableExists(DATABASE_NAME_VALUE, TABLE_NAME_VALUE)).isTrue(); // this is fine, the table is not changed + // Verify that the data in S3 is still present + assertThat(amazonS3.doesObjectExist(BUCKET, PARTITIONED_TABLE_OBJECT_KEY)).isTrue(); // this too is fine, the data is not changed + } + @Test public void cleanupPartitionButNotTable() throws Exception { Table table = hiveTestUtils.createTable(PARTITIONED_TABLE_PATH, TABLE_NAME_VALUE, true); diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java index 896efe25..8dd53b15 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; +import java.util.Map; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.TableType; @@ -111,4 +112,40 @@ private Partition newTablePartition(Table hiveTable, List values, URI lo partition.getSd().setLocation(location.toString()); return partition; } + + public Table createTableWithProperties(String path, String tableName, boolean partitioned, Map tableProperties, String outputFormat, boolean withBeekeeperProperty) + throws TException { + Table hiveTable = new Table(); + hiveTable.setDbName(DATABASE_NAME_VALUE); + hiveTable.setTableName(tableName); + hiveTable.setTableType(TableType.EXTERNAL_TABLE.name()); + hiveTable.putToParameters("EXTERNAL", "TRUE"); + + // Add custom table props + if (tableProperties != null) { + hiveTable.getParameters().putAll(tableProperties); + } + if (withBeekeeperProperty) { + hiveTable.putToParameters(LifecycleEventType.EXPIRED.getTableParameterName(), "true"); + } + if (partitioned) { + hiveTable.setPartitionKeys(PARTITION_COLUMNS); + } + StorageDescriptor sd = new StorageDescriptor(); + sd.setCols(DATA_COLUMNS); + sd.setLocation(path); + sd.setParameters(new HashMap<>()); + // Set the output format for the storage descriptor, defaulting to TextOutputFormat if not specified + if (outputFormat != null) { + sd.setOutputFormat(outputFormat); + } else { + sd.setOutputFormat(TextOutputFormat.class.getName()); + } + sd.setSerdeInfo(new SerDeInfo()); + sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde"); + hiveTable.setSd(sd); + metastoreClient.createTable(hiveTable); + + return hiveTable; + } } From a09e9f172451e648bdf6d4747b107a4d6fb7b15b Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Sun, 24 Nov 2024 22:57:23 +0000 Subject: [PATCH 28/65] Update metadataHandler to catch beekeeperException --- .../handler/ExpiredMetadataHandler.java | 35 ++++++++++++------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java index 28a3cfe4..4df763e3 100644 --- a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java +++ b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java @@ -32,6 +32,8 @@ import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory; import com.expediagroup.beekeeper.cleanup.metadata.MetadataCleaner; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; +import com.expediagroup.beekeeper.core.error.BeekeeperException; +import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; import com.expediagroup.beekeeper.core.model.HousekeepingMetadata; import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.core.repository.HousekeepingMetadataRepository; @@ -113,18 +115,21 @@ private boolean cleanUpTable(CleanerClient client, HousekeepingMetadata housekee String tableName = housekeepingMetadata.getTableName(); log.info("Cleaning up metadata for \"{}.{}\"", databaseName, tableName); if (metadataCleaner.tableExists(client, databaseName, tableName)) { - metadataCleaner.dropTable(housekeepingMetadata, client); - pathCleaner.cleanupPath(housekeepingMetadata); + try { + metadataCleaner.dropTable(housekeepingMetadata, client); + pathCleaner.cleanupPath(housekeepingMetadata); + } catch (BeekeeperException e) { + log.warn("Skipping cleanup for Iceberg table \"{}.{}\": {}", databaseName, tableName, e.getMessage()); + updateStatus(housekeepingMetadata, SKIPPED, dryRunEnabled); + return false; + } } else { log.info("Cannot drop table \"{}.{}\". Table does not exist.", databaseName, tableName); } return true; } - private boolean cleanupPartition( - CleanerClient client, - HousekeepingMetadata housekeepingMetadata, - boolean dryRunEnabled) { + private boolean cleanupPartition(CleanerClient client, HousekeepingMetadata housekeepingMetadata, boolean dryRunEnabled) { if (!S3PathValidator.validPartitionPath(housekeepingMetadata.getPath())) { log.warn("Will not clean up partition path \"{}\" because it is not valid.", housekeepingMetadata.getPath()); updateStatus(housekeepingMetadata, SKIPPED, dryRunEnabled); @@ -132,16 +137,20 @@ private boolean cleanupPartition( } String databaseName = housekeepingMetadata.getDatabaseName(); String tableName = housekeepingMetadata.getTableName(); - log.info("Cleaning up metadata for \"{}.{}\"", databaseName, tableName); + log.info("Cleaning up metadata for partition \"{}\" in table \"{}.{}\"", housekeepingMetadata.getPartitionName(), databaseName, tableName); if (metadataCleaner.tableExists(client, databaseName, tableName)) { - boolean partitionDeleted = metadataCleaner.dropPartition(housekeepingMetadata, client); - if (partitionDeleted) { - pathCleaner.cleanupPath(housekeepingMetadata); + try { + boolean partitionDeleted = metadataCleaner.dropPartition(housekeepingMetadata, client); + if (partitionDeleted) { + pathCleaner.cleanupPath(housekeepingMetadata); + } + } catch (BeekeeperException e) { + log.warn("Skipping cleanup for Iceberg partition \"{}\" in table \"{}.{}\": {}", housekeepingMetadata.getPartitionName(), databaseName, tableName, e.getMessage()); + updateStatus(housekeepingMetadata, SKIPPED, dryRunEnabled); + return false; } } else { - log - .info("Cannot drop partition \"{}\" from table \"{}.{}\". Table does not exist.", - housekeepingMetadata.getPartitionName(), databaseName, tableName); + log.info("Cannot drop partition \"{}\" from table \"{}.{}\". Table does not exist.", housekeepingMetadata.getPartitionName(), databaseName, tableName); } return true; } From 8c1ce389ca07991d05b312a55fec776e1889ec6d Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Sun, 24 Nov 2024 23:38:05 +0000 Subject: [PATCH 29/65] cleanup --- .../cleanup/validation/IcebergValidator.java | 12 +++++----- .../cleanup/aws/S3PathCleanerTest.java | 12 ---------- .../cleanup/hive/HiveMetadataCleanerTest.java | 5 ++--- .../validation/IcebergValidatorTest.java | 22 +++++++++++++++++++ 4 files changed, 29 insertions(+), 22 deletions(-) diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java index f94a5d64..c4167232 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java @@ -37,9 +37,9 @@ public IcebergValidator(CleanerClientFactory cleanerClientFactory) { } /** - * Beekeeper does not support Iceberg format right now. Iceberg tables in Hive Metastore do not store partition information, - * so Beekeeper tries to clean up the entire table because that information is missing. This method checks if - * the table is an Iceberg table and throws IcebergTableFoundException to stop the process. + * Beekeeper currently does not support the Iceberg format. Iceberg tables in the Hive Metastore do not store partition information, + * causing Beekeeper to attempt to clean up the entire table due to the missing information. This method checks if + * the table is an Iceberg table and throws a BeekeeperIcebergException to stop the process. * * @param databaseName * @param tableName @@ -53,13 +53,11 @@ public void throwExceptionIfIceberg(String databaseName, String tableName) { if (tableType.contains("iceberg") || format.contains("iceberg") || (outputFormat != null && outputFormat.toLowerCase().contains("iceberg"))) { throw new BeekeeperIcebergException( - format("Iceberg table %s.%s is not currently supported in Beekeeper.", databaseName, - tableName)); + format("Iceberg table %s.%s is not currently supported in Beekeeper.", databaseName, tableName)); } } catch (Exception e) { throw new BeekeeperIcebergException( - format("Unexpected exception when identifying if table %s.%s is Iceberg.", databaseName, - tableName), e); + format("Unexpected exception when identifying if table %s.%s is Iceberg.", databaseName, tableName), e); } } } diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java index 7a046c98..35387a42 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java @@ -383,18 +383,6 @@ void shouldThrowBeekeeperIcebergExceptionWhenIcebergTableDetected() { verifyNoInteractions(bytesDeletedReporter); } - @Test - void shouldNotReportBytesDeletedWhenIcebergValidatorThrows() { - doThrow(new BeekeeperIcebergException("Iceberg tables are not supported")) - .when(icebergValidator) - .throwExceptionIfIceberg(housekeepingPath.getDatabaseName(), housekeepingPath.getTableName()); - - assertThatExceptionOfType(BeekeeperIcebergException.class) - .isThrownBy(() -> s3PathCleaner.cleanupPath(housekeepingPath)); - - verify(bytesDeletedReporter, never()).reportTaggable(anyLong(), any(), any()); - } - @Test void shouldProceedWithDeletionWhenNotIcebergTable() { // setting up objects in the bucket diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java index 3ed5aa8b..6a579cb8 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java @@ -95,7 +95,7 @@ public void tableExists() { } @Test - public void dropTableWhenIcebergTable() { + public void doesNotDropTableWhenIcebergTable() { when(housekeepingMetadata.getDatabaseName()).thenReturn(DATABASE); when(housekeepingMetadata.getTableName()).thenReturn(TABLE_NAME); doThrow(new BeekeeperIcebergException("Iceberg table")) @@ -106,13 +106,12 @@ public void dropTableWhenIcebergTable() { () -> cleaner.dropTable(housekeepingMetadata, hiveClient) ); - // Verify that dropTable was not called on hiveClient verify(hiveClient, never()).dropTable(DATABASE, TABLE_NAME); verify(deletedMetadataReporter, never()).reportTaggable(housekeepingMetadata, MetadataType.HIVE_TABLE); } @Test - public void dropPartitionWhenIcebergTable() { + public void doesNotDropPartitionWhenIcebergTable() { when(housekeepingMetadata.getDatabaseName()).thenReturn(DATABASE); when(housekeepingMetadata.getTableName()).thenReturn(TABLE_NAME); doThrow(new BeekeeperIcebergException("Iceberg table")) diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java index 663a40bb..dcaf308c 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java @@ -91,4 +91,26 @@ public void shouldThrowExceptionWhenOutputFormatContainsIceberg() throws Excepti icebergValidator.throwExceptionIfIceberg("db", "table"); } + + @Test(expected = BeekeeperIcebergException.class) + public void shouldThrowExceptionWhenFormatIsNullButTableTypeIsIceberg() throws Exception { + Map properties = new HashMap<>(); + properties.put("table_type", "ICEBERG"); + + when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties); + when(cleanerClient.getOutputFormat("db", "table")).thenReturn(""); + + icebergValidator.throwExceptionIfIceberg("db", "table"); + } + + @Test + public void shouldNotThrowExceptionWhenOutputFormatIsNull() throws Exception { + Map properties = new HashMap<>(); + properties.put("table_type", "HIVE_TABLE"); + + when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties); + when(cleanerClient.getOutputFormat("db", "table")).thenReturn(null); + + icebergValidator.throwExceptionIfIceberg("db", "table"); + } } From 33a22c144fc823cf2251498b219d9dcc71599d92 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Mon, 25 Nov 2024 10:39:30 +0000 Subject: [PATCH 30/65] Update path-cleanup housekeeping status --- .../BeekeeperPathCleanupIntegrationTest.java | 30 +++++++++++++++++++ .../cleanup/handler/GenericPathHandler.java | 19 ++++++++---- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java index ed9ed046..e370906c 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java @@ -24,11 +24,13 @@ import static com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter.METRIC_NAME; import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.DELETED; +import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.SKIPPED; import static com.expediagroup.beekeeper.integration.CommonTestVariables.AWS_REGION; import static com.expediagroup.beekeeper.integration.CommonTestVariables.DATABASE_NAME_VALUE; import static com.expediagroup.beekeeper.integration.CommonTestVariables.TABLE_NAME_VALUE; import java.sql.SQLException; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -60,6 +62,9 @@ import com.amazonaws.services.s3.model.CreateBucketRequest; import com.google.common.collect.ImmutableMap; +import com.expediagroup.beekeeper.core.model.HousekeepingEntity; +import com.expediagroup.beekeeper.core.model.HousekeepingPath; +import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; import com.expediagroup.beekeeper.integration.utils.HiveTestUtils; import com.expediagroup.beekeeper.path.cleanup.BeekeeperPathCleanup; @@ -295,6 +300,31 @@ public void cleanupSentinelForNonEmptyParent() throws SQLException, TException { assertThat(amazonS3.doesObjectExist(BUCKET, tableSentinel)).isTrue(); } + @Test + public void shouldSkipCleanupForIcebergTable() throws Exception { + // add iceberg table props + Map tableProperties = new HashMap<>(); + tableProperties.put("table_type", "ICEBERG"); + tableProperties.put("format", "ICEBERG/PARQUET"); + String outputFormat = "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"; + // create iceberg table + hiveTestUtils.createTableWithProperties( + TABLE_PATH, TABLE_NAME_VALUE, false, tableProperties, outputFormat, true); + // add data + String objectKey = DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/file1"; + amazonS3.putObject(BUCKET, objectKey, CONTENT); + // insert housekeepingPath record + String path = "s3://" + BUCKET + "/" + DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/"; + insertUnreferencedPath(path); // Uses default database and table names + // wait for the cleanup process to run and update to skipped + await().atMost(TIMEOUT, TimeUnit.SECONDS) + .until(() -> getUnreferencedPaths().get(0).getHousekeepingStatus() == SKIPPED); + // verify that the data in S3 is still present + assertThat(amazonS3.doesObjectExist(BUCKET, objectKey)) + .withFailMessage("S3 object %s should still exist as cleanup was skipped.", objectKey) + .isTrue(); + } + @Test public void metrics() throws SQLException, TException { hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java index 30442c22..ad85a3b0 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java @@ -24,6 +24,7 @@ import org.springframework.data.domain.Slice; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; +import com.expediagroup.beekeeper.core.error.BeekeeperException; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; @@ -67,12 +68,20 @@ public Pageable processPage(Pageable pageable, Slice page, boo } private boolean cleanUpPath(HousekeepingPath housekeepingPath) { - if (S3PathValidator.validTablePath(housekeepingPath.getPath())) { - pathCleaner.cleanupPath(housekeepingPath); - return true; + try { + if (S3PathValidator.validTablePath(housekeepingPath.getPath())) { + pathCleaner.cleanupPath(housekeepingPath); + return true; + } + log.warn("Will not clean up path \"{}\" because it is not valid.", housekeepingPath.getPath()); + return false; + } catch (BeekeeperException e) { + // Handle Iceberg table by updating status to SKIPPED + updateStatus(housekeepingPath, HousekeepingStatus.SKIPPED); + log.warn("Skipping cleanup for Iceberg table \"{}.{}\": {}", housekeepingPath.getDatabaseName(), + housekeepingPath.getTableName(), e.getMessage()); + return false; } - log.warn("Will not clean up path \"{}\" because it is not valid.", housekeepingPath.getPath()); - return false; } private void cleanupContent(HousekeepingPath housekeepingPath) { From a4b896a6f117e1fd29d4750374418137f24fb3a0 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Mon, 25 Nov 2024 13:24:28 +0000 Subject: [PATCH 31/65] cleanup --- .../cleanup/aws/S3PathCleanerTest.java | 7 +------ .../cleanup/hive/HiveMetadataCleanerTest.java | 3 ++- .../core/error/BeekeeperIcebergException.java | 2 +- ...perDryRunMetadataCleanupIntegrationTest.java | 4 ++++ ...BeekeeperMetadataCleanupIntegrationTest.java | 17 ++++++++--------- .../cleanup/handler/ExpiredMetadataHandler.java | 1 - .../cleanup/handler/GenericPathHandler.java | 3 +-- .../apiary/service/SchedulerApiary.java | 2 +- .../apiary/service/SchedulerApiaryTest.java | 1 - 9 files changed, 18 insertions(+), 22 deletions(-) diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java index 35387a42..6eddc429 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java @@ -21,10 +21,8 @@ import static org.assertj.core.api.Assertions.assertThatCode; import static org.assertj.core.api.Assertions.assertThatExceptionOfType; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyLong; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; @@ -385,17 +383,14 @@ void shouldThrowBeekeeperIcebergExceptionWhenIcebergTableDetected() { @Test void shouldProceedWithDeletionWhenNotIcebergTable() { - // setting up objects in the bucket - amazonS3.putObject(bucket, key1, content); // Add the files + amazonS3.putObject(bucket, key1, content); amazonS3.putObject(bucket, key2, content); - // housekeepingPath is set housekeepingPath.setPath("s3://" + bucket + "/" + keyRoot); assertThatCode(() -> s3PathCleaner.cleanupPath(housekeepingPath)) .doesNotThrowAnyException(); - // verify objects are deleted and reporter is called assertThat(amazonS3.doesObjectExist(bucket, key1)).isFalse(); assertThat(amazonS3.doesObjectExist(bucket, key2)).isFalse(); diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java index 6a579cb8..842a7c23 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java @@ -57,7 +57,7 @@ public void typicalDropTable() { when(housekeepingMetadata.getTableName()).thenReturn(TABLE_NAME); cleaner.dropTable(housekeepingMetadata, hiveClient); - // Verify that the IcebergValidator is called + verify(icebergValidator).throwExceptionIfIceberg(DATABASE, TABLE_NAME); verify(hiveClient).dropTable(DATABASE, TABLE_NAME); verify(deletedMetadataReporter).reportTaggable(housekeepingMetadata, MetadataType.HIVE_TABLE); @@ -77,6 +77,7 @@ public void typicalDropPartition() { verify(deletedMetadataReporter).reportTaggable(housekeepingMetadata, MetadataType.HIVE_PARTITION); } + @Test public void dontReportWhenPartitionNotDropped() { when(housekeepingMetadata.getDatabaseName()).thenReturn(DATABASE); diff --git a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/error/BeekeeperIcebergException.java b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/error/BeekeeperIcebergException.java index 79b02bf1..d85be542 100644 --- a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/error/BeekeeperIcebergException.java +++ b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/error/BeekeeperIcebergException.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunMetadataCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunMetadataCleanupIntegrationTest.java index 3ee2b135..3470bc7f 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunMetadataCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunMetadataCleanupIntegrationTest.java @@ -22,12 +22,14 @@ import static org.awaitility.Awaitility.await; import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3; +import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.SKIPPED; import static com.expediagroup.beekeeper.integration.CommonTestVariables.AWS_REGION; import static com.expediagroup.beekeeper.integration.CommonTestVariables.DATABASE_NAME_VALUE; import static com.expediagroup.beekeeper.integration.CommonTestVariables.LONG_CLEANUP_DELAY_VALUE; import static com.expediagroup.beekeeper.integration.CommonTestVariables.TABLE_NAME_VALUE; import java.sql.SQLException; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -60,6 +62,7 @@ import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; import com.expediagroup.beekeeper.cleanup.monitoring.DeletedMetadataReporter; +import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; import com.expediagroup.beekeeper.integration.utils.HiveTestUtils; import com.expediagroup.beekeeper.integration.utils.TestAppender; @@ -94,6 +97,7 @@ public class BeekeeperDryRunMetadataCleanupIntegrationTest extends BeekeeperInte private static final String ROOT_PATH = "s3a://" + BUCKET + "/" + DATABASE_NAME_VALUE + "/"; + private static final String TABLE_PATH = "s3a://" + BUCKET + "/" + DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/"; private static final String PARTITIONED_TABLE_PATH = ROOT_PATH + PARTITIONED_TABLE_NAME + "/id1"; private static final String PARTITION_ROOT_PATH = ROOT_PATH + "some_location/id1"; private static final String PARTITION_PATH = PARTITION_ROOT_PATH + "/" + PARTITION_NAME + "/file1"; diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java index 8e88682e..aed57e4f 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java @@ -69,6 +69,8 @@ import com.google.common.collect.ImmutableMap; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; +import com.expediagroup.beekeeper.core.model.HousekeepingPath; +import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; import com.expediagroup.beekeeper.integration.utils.HiveTestUtils; import com.expediagroup.beekeeper.metadata.cleanup.BeekeeperMetadataCleanup; @@ -229,26 +231,23 @@ public void cleanupPartitionedTable() throws Exception { @Test public void shouldSkipCleanupForIcebergTable() throws Exception { - // Define custom table props and outputFormat for an Iceberg table Map tableProperties = new HashMap<>(); tableProperties.put("table_type", "ICEBERG"); tableProperties.put("format", "ICEBERG/PARQUET"); String outputFormat = "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"; - // Create the Iceberg table in the Hive metastore + hiveTestUtils.createTableWithProperties( PARTITIONED_TABLE_PATH, TABLE_NAME_VALUE, true, tableProperties, outputFormat, true); - // Add data to the S3 bucket amazonS3.putObject(BUCKET, PARTITIONED_TABLE_OBJECT_KEY, TABLE_DATA); - // Insert expired metadata for the Iceberg table + insertExpiredMetadata(PARTITIONED_TABLE_PATH, null); - // wait for cleanup process to run + await() .atMost(TIMEOUT, TimeUnit.SECONDS) .until(() -> getExpiredMetadata().get(0).getHousekeepingStatus() == SKIPPED); - // Verify that the table still exists - assertThat(metastoreClient.tableExists(DATABASE_NAME_VALUE, TABLE_NAME_VALUE)).isTrue(); // this is fine, the table is not changed - // Verify that the data in S3 is still present - assertThat(amazonS3.doesObjectExist(BUCKET, PARTITIONED_TABLE_OBJECT_KEY)).isTrue(); // this too is fine, the data is not changed + + assertThat(metastoreClient.tableExists(DATABASE_NAME_VALUE, TABLE_NAME_VALUE)).isTrue(); + assertThat(amazonS3.doesObjectExist(BUCKET, PARTITIONED_TABLE_OBJECT_KEY)).isTrue(); } @Test diff --git a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java index 4df763e3..9a0f628e 100644 --- a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java +++ b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java @@ -33,7 +33,6 @@ import com.expediagroup.beekeeper.cleanup.metadata.MetadataCleaner; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; import com.expediagroup.beekeeper.core.error.BeekeeperException; -import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; import com.expediagroup.beekeeper.core.model.HousekeepingMetadata; import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.core.repository.HousekeepingMetadataRepository; diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java index ad85a3b0..efe70173 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java @@ -76,9 +76,8 @@ private boolean cleanUpPath(HousekeepingPath housekeepingPath) { log.warn("Will not clean up path \"{}\" because it is not valid.", housekeepingPath.getPath()); return false; } catch (BeekeeperException e) { - // Handle Iceberg table by updating status to SKIPPED updateStatus(housekeepingPath, HousekeepingStatus.SKIPPED); - log.warn("Skipping cleanup for Iceberg table \"{}.{}\": {}", housekeepingPath.getDatabaseName(), + log.warn("Skipping cleanup for table \"{}.{}\": {}", housekeepingPath.getDatabaseName(), housekeepingPath.getTableName(), e.getMessage()); return false; } diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index 5ddc31ab..c07a5381 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2020 Expedia, Inc. + * Copyright (C) 2019-2024 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java index 6e2d1264..351c3562 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java @@ -24,7 +24,6 @@ import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.lenient; import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoInteractions; From 1be81b8815fcab937597edcb570a8e70597650a5 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Mon, 25 Nov 2024 13:25:41 +0000 Subject: [PATCH 32/65] cleanup --- .../BeekeeperDryRunMetadataCleanupIntegrationTest.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunMetadataCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunMetadataCleanupIntegrationTest.java index 3470bc7f..3ee2b135 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunMetadataCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunMetadataCleanupIntegrationTest.java @@ -22,14 +22,12 @@ import static org.awaitility.Awaitility.await; import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3; -import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.SKIPPED; import static com.expediagroup.beekeeper.integration.CommonTestVariables.AWS_REGION; import static com.expediagroup.beekeeper.integration.CommonTestVariables.DATABASE_NAME_VALUE; import static com.expediagroup.beekeeper.integration.CommonTestVariables.LONG_CLEANUP_DELAY_VALUE; import static com.expediagroup.beekeeper.integration.CommonTestVariables.TABLE_NAME_VALUE; import java.sql.SQLException; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -62,7 +60,6 @@ import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; import com.expediagroup.beekeeper.cleanup.monitoring.DeletedMetadataReporter; -import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; import com.expediagroup.beekeeper.integration.utils.HiveTestUtils; import com.expediagroup.beekeeper.integration.utils.TestAppender; @@ -97,7 +94,6 @@ public class BeekeeperDryRunMetadataCleanupIntegrationTest extends BeekeeperInte private static final String ROOT_PATH = "s3a://" + BUCKET + "/" + DATABASE_NAME_VALUE + "/"; - private static final String TABLE_PATH = "s3a://" + BUCKET + "/" + DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/"; private static final String PARTITIONED_TABLE_PATH = ROOT_PATH + PARTITIONED_TABLE_NAME + "/id1"; private static final String PARTITION_ROOT_PATH = ROOT_PATH + "some_location/id1"; private static final String PARTITION_PATH = PARTITION_ROOT_PATH + "/" + PARTITION_NAME + "/file1"; From 66ad26163abfcf1a475c027e2b5b7d411d62d443 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Mon, 25 Nov 2024 13:31:53 +0000 Subject: [PATCH 33/65] cleanup --- .../cleanup/aws/S3DryRunPathCleanerTest.java | 2 +- .../BeekeeperPathCleanupIntegrationTest.java | 15 +++++++-------- .../integration/utils/HiveTestUtils.java | 3 +-- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java index 583e0018..2ce72451 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java @@ -58,7 +58,7 @@ class S3DryRunPathCleanerTest { private HousekeepingPath housekeepingPath; private AmazonS3 amazonS3; - private @Mock BytesDeletedReporter bytesDeletedReporter; + @Mock private BytesDeletedReporter bytesDeletedReporter; @Mock private IcebergValidator icebergValidator; private boolean dryRunEnabled = true; diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java index e370906c..dd1be3b7 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java @@ -302,24 +302,23 @@ public void cleanupSentinelForNonEmptyParent() throws SQLException, TException { @Test public void shouldSkipCleanupForIcebergTable() throws Exception { - // add iceberg table props Map tableProperties = new HashMap<>(); tableProperties.put("table_type", "ICEBERG"); tableProperties.put("format", "ICEBERG/PARQUET"); String outputFormat = "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"; - // create iceberg table + hiveTestUtils.createTableWithProperties( TABLE_PATH, TABLE_NAME_VALUE, false, tableProperties, outputFormat, true); - // add data + String objectKey = DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/file1"; - amazonS3.putObject(BUCKET, objectKey, CONTENT); - // insert housekeepingPath record String path = "s3://" + BUCKET + "/" + DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/"; - insertUnreferencedPath(path); // Uses default database and table names - // wait for the cleanup process to run and update to skipped + + amazonS3.putObject(BUCKET, objectKey, CONTENT); + insertUnreferencedPath(path); + await().atMost(TIMEOUT, TimeUnit.SECONDS) .until(() -> getUnreferencedPaths().get(0).getHousekeepingStatus() == SKIPPED); - // verify that the data in S3 is still present + assertThat(amazonS3.doesObjectExist(BUCKET, objectKey)) .withFailMessage("S3 object %s should still exist as cleanup was skipped.", objectKey) .isTrue(); diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java index 8dd53b15..222fafdd 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java @@ -121,7 +121,6 @@ public Table createTableWithProperties(String path, String tableName, boolean pa hiveTable.setTableType(TableType.EXTERNAL_TABLE.name()); hiveTable.putToParameters("EXTERNAL", "TRUE"); - // Add custom table props if (tableProperties != null) { hiveTable.getParameters().putAll(tableProperties); } @@ -135,7 +134,7 @@ public Table createTableWithProperties(String path, String tableName, boolean pa sd.setCols(DATA_COLUMNS); sd.setLocation(path); sd.setParameters(new HashMap<>()); - // Set the output format for the storage descriptor, defaulting to TextOutputFormat if not specified + if (outputFormat != null) { sd.setOutputFormat(outputFormat); } else { From 0948aeaf880b469293c7260a33480d1c717c50ae Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Mon, 25 Nov 2024 13:58:20 +0000 Subject: [PATCH 34/65] Update beekeeper to runtime exception --- .../cleanup/service/PagingMetadataCleanupServiceTest.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/service/PagingMetadataCleanupServiceTest.java b/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/service/PagingMetadataCleanupServiceTest.java index 1a14b3f8..00f8a762 100644 --- a/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/service/PagingMetadataCleanupServiceTest.java +++ b/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/service/PagingMetadataCleanupServiceTest.java @@ -235,10 +235,10 @@ public void mixOfAllPaths() { } @Test - void metadataCleanerException() { + public void metadataCleanerException() { Mockito .doNothing() - .doThrow(new BeekeeperException("Error")) + .doThrow(new RuntimeException("Error")) .when(metadataCleaner) .dropTable(Mockito.any(HousekeepingMetadata.class), Mockito.any(HiveClient.class)); @@ -270,7 +270,7 @@ void metadataCleanerException() { } @Test - void invalidPaths() { + public void invalidPaths() { List tables = List .of(createHousekeepingMetadata("table1", "s3://invalid", null, SCHEDULED), createHousekeepingMetadata("table2", "s3://invalid/path", "partition", SCHEDULED)); From ed8745f38fec4ace4045a9f305a58aa0d3ab49d7 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Mon, 25 Nov 2024 18:35:24 +0000 Subject: [PATCH 35/65] bump versions for testing --- beekeeper-api/pom.xml | 2 +- beekeeper-cleanup/pom.xml | 2 +- beekeeper-core/pom.xml | 2 +- beekeeper-integration-tests/pom.xml | 2 +- beekeeper-metadata-cleanup/pom.xml | 2 +- beekeeper-path-cleanup/pom.xml | 2 +- beekeeper-scheduler-apiary/pom.xml | 2 +- beekeeper-scheduler/pom.xml | 2 +- beekeeper-vacuum-tool/pom.xml | 2 +- pom.xml | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/beekeeper-api/pom.xml b/beekeeper-api/pom.xml index 6ea88ffb..85fc8b81 100644 --- a/beekeeper-api/pom.xml +++ b/beekeeper-api/pom.xml @@ -4,7 +4,7 @@ beekeeper-parent com.expediagroup - 3.5.8-SNAPSHOT + 3.5.800-SNAPSHOT beekeeper-api diff --git a/beekeeper-cleanup/pom.xml b/beekeeper-cleanup/pom.xml index e3f59c07..d32e498a 100644 --- a/beekeeper-cleanup/pom.xml +++ b/beekeeper-cleanup/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.8-SNAPSHOT + 3.5.800-SNAPSHOT beekeeper-cleanup diff --git a/beekeeper-core/pom.xml b/beekeeper-core/pom.xml index 58e3760d..4c3914ad 100644 --- a/beekeeper-core/pom.xml +++ b/beekeeper-core/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.8-SNAPSHOT + 3.5.800-SNAPSHOT beekeeper-core diff --git a/beekeeper-integration-tests/pom.xml b/beekeeper-integration-tests/pom.xml index 98c1f209..65c8ef69 100644 --- a/beekeeper-integration-tests/pom.xml +++ b/beekeeper-integration-tests/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.8-SNAPSHOT + 3.5.800-SNAPSHOT beekeeper-integration-tests diff --git a/beekeeper-metadata-cleanup/pom.xml b/beekeeper-metadata-cleanup/pom.xml index daa61483..499d00c0 100644 --- a/beekeeper-metadata-cleanup/pom.xml +++ b/beekeeper-metadata-cleanup/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.8-SNAPSHOT + 3.5.800-SNAPSHOT beekeeper-metadata-cleanup diff --git a/beekeeper-path-cleanup/pom.xml b/beekeeper-path-cleanup/pom.xml index ae9c43f3..ac1a2723 100644 --- a/beekeeper-path-cleanup/pom.xml +++ b/beekeeper-path-cleanup/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.8-SNAPSHOT + 3.5.800-SNAPSHOT beekeeper-path-cleanup diff --git a/beekeeper-scheduler-apiary/pom.xml b/beekeeper-scheduler-apiary/pom.xml index e34c241d..7542da1e 100644 --- a/beekeeper-scheduler-apiary/pom.xml +++ b/beekeeper-scheduler-apiary/pom.xml @@ -6,7 +6,7 @@ beekeeper-parent com.expediagroup - 3.5.8-SNAPSHOT + 3.5.800-SNAPSHOT beekeeper-scheduler-apiary diff --git a/beekeeper-scheduler/pom.xml b/beekeeper-scheduler/pom.xml index ccdaf7bc..71ff5ad9 100644 --- a/beekeeper-scheduler/pom.xml +++ b/beekeeper-scheduler/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.8-SNAPSHOT + 3.5.800-SNAPSHOT beekeeper-scheduler diff --git a/beekeeper-vacuum-tool/pom.xml b/beekeeper-vacuum-tool/pom.xml index eac17a57..7399b317 100644 --- a/beekeeper-vacuum-tool/pom.xml +++ b/beekeeper-vacuum-tool/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.8-SNAPSHOT + 3.5.800-SNAPSHOT beekeeper-vacuum-tool diff --git a/pom.xml b/pom.xml index a2008896..b10ec464 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ beekeeper-parent - 3.5.8-SNAPSHOT + 3.5.800-SNAPSHOT Beekeeper is a service which manages the cleanup of tables and unreferenced S3 paths. 2019 pom From 1047c57a4129dc1fcabedbef308d770e379f95af Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Mon, 25 Nov 2024 22:52:41 +0000 Subject: [PATCH 36/65] Add Hadoop dependencies --- beekeeper-path-cleanup/pom.xml | 22 ++++++++++++++++++++++ beekeeper-scheduler-apiary/pom.xml | 23 +++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/beekeeper-path-cleanup/pom.xml b/beekeeper-path-cleanup/pom.xml index ac1a2723..a94bfaca 100644 --- a/beekeeper-path-cleanup/pom.xml +++ b/beekeeper-path-cleanup/pom.xml @@ -10,6 +10,12 @@ beekeeper-path-cleanup + + 2.8.1 + 2.3.7 + UTF-8 + + com.amazonaws @@ -68,6 +74,22 @@ 27.1-jre + + org.apache.hadoop + hadoop-mapreduce-client-core + ${hadoop.version} + + + org.slf4j + slf4j-log4j12 + + + javax.servlet + servlet-api + + + + org.awaitility diff --git a/beekeeper-scheduler-apiary/pom.xml b/beekeeper-scheduler-apiary/pom.xml index 7542da1e..a3abac71 100644 --- a/beekeeper-scheduler-apiary/pom.xml +++ b/beekeeper-scheduler-apiary/pom.xml @@ -11,6 +11,29 @@ beekeeper-scheduler-apiary + + 2.8.1 + 2.3.7 + UTF-8 + + + + + org.apache.hadoop + hadoop-mapreduce-client-core + ${hadoop.version} + + + org.slf4j + slf4j-log4j12 + + + javax.servlet + servlet-api + + + + com.amazonaws From eb13799c2b72eb769c6f362a174fd5d539831d7c Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Mon, 25 Nov 2024 22:58:08 +0000 Subject: [PATCH 37/65] Update pom.xml --- beekeeper-scheduler-apiary/pom.xml | 35 +++++++++++++++--------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/beekeeper-scheduler-apiary/pom.xml b/beekeeper-scheduler-apiary/pom.xml index a3abac71..9ee238f2 100644 --- a/beekeeper-scheduler-apiary/pom.xml +++ b/beekeeper-scheduler-apiary/pom.xml @@ -17,24 +17,25 @@ UTF-8 - - - org.apache.hadoop - hadoop-mapreduce-client-core - ${hadoop.version} - - - org.slf4j - slf4j-log4j12 - - - javax.servlet - servlet-api - - - - + + + + org.apache.hadoop + hadoop-mapreduce-client-core + ${hadoop.version} + + + org.slf4j + slf4j-log4j12 + + + javax.servlet + servlet-api + + + + com.amazonaws aws-java-sdk-sts From 812565ed9d3acec6f9d8fa7c8a1297a3049fa66b Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Tue, 26 Nov 2024 14:17:52 +0000 Subject: [PATCH 38/65] Revert changes to beekeeper-path --- .../beekeeper/cleanup/aws/S3PathCleaner.java | 4 +- .../cleanup/aws/S3DryRunPathCleanerTest.java | 7 +- .../cleanup/aws/S3PathCleanerTest.java | 56 +++------- ...eeperDryRunPathCleanupIntegrationTest.java | 46 +------- .../BeekeeperPathCleanupIntegrationTest.java | 101 ++---------------- .../metadata/cleanup/context/CommonBeans.java | 2 +- beekeeper-path-cleanup/pom.xml | 21 ---- .../path/cleanup/context/CommonBeans.java | 47 +------- .../cleanup/handler/GenericPathHandler.java | 20 ++-- .../path/cleanup/context/CommonBeansTest.java | 8 +- 10 files changed, 42 insertions(+), 270 deletions(-) diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java index b7b96050..801dd0ef 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java @@ -45,17 +45,15 @@ public class S3PathCleaner implements PathCleaner { private IcebergValidator icebergValidator; public S3PathCleaner(S3Client s3Client, SentinelFilesCleaner sentinelFilesCleaner, - BytesDeletedReporter bytesDeletedReporter, IcebergValidator icebergValidator) { + BytesDeletedReporter bytesDeletedReporter) { this.s3Client = s3Client; this.sentinelFilesCleaner = sentinelFilesCleaner; this.bytesDeletedReporter = bytesDeletedReporter; - this.icebergValidator = icebergValidator; } @Override @TimedTaggable("s3-paths-deleted") public void cleanupPath(HousekeepingEntity housekeepingEntity) { - icebergValidator.throwExceptionIfIceberg(housekeepingEntity.getDatabaseName(), housekeepingEntity.getTableName()); S3SchemeURI s3SchemeURI = new S3SchemeURI(housekeepingEntity.getPath()); String key = s3SchemeURI.getKey(); String bucket = s3SchemeURI.getBucket(); diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java index 2ce72451..c9d3d318 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java @@ -38,7 +38,6 @@ import com.amazonaws.services.s3.AmazonS3ClientBuilder; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; -import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.PeriodDuration; @@ -58,8 +57,8 @@ class S3DryRunPathCleanerTest { private HousekeepingPath housekeepingPath; private AmazonS3 amazonS3; - @Mock private BytesDeletedReporter bytesDeletedReporter; - @Mock private IcebergValidator icebergValidator; + private @Mock BytesDeletedReporter bytesDeletedReporter; + private boolean dryRunEnabled = true; @@ -83,7 +82,7 @@ void setUp() { .getObjectSummaries() .forEach(object -> amazonS3.deleteObject(bucket, object.getKey())); S3Client s3Client = new S3Client(amazonS3, dryRunEnabled); - s3DryRunPathCleaner = new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter, icebergValidator); + s3DryRunPathCleaner = new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter); housekeepingPath = HousekeepingPath .builder() .path(absolutePath) diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java index 6eddc429..102c424e 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java @@ -33,13 +33,13 @@ import java.util.List; import org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider; +import org.junit.Rule; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; import org.mockito.junit.jupiter.MockitoExtension; import org.testcontainers.containers.localstack.LocalStackContainer; -import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; import org.testcontainers.utility.DockerImageName; @@ -55,10 +55,8 @@ import com.amazonaws.services.s3.model.S3ObjectSummary; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; -import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.config.FileSystemType; import com.expediagroup.beekeeper.core.error.BeekeeperException; -import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.PeriodDuration; @@ -80,16 +78,19 @@ class S3PathCleanerTest { private S3Client s3Client; private S3SentinelFilesCleaner s3SentinelFilesCleaner; private @Mock BytesDeletedReporter bytesDeletedReporter; - private @Mock IcebergValidator icebergValidator; + private S3PathCleaner s3PathCleaner; - @Container + @Rule public static LocalStackContainer awsContainer = new LocalStackContainer( DockerImageName.parse("localstack/localstack:0.14.2")).withServices(S3); + static { + awsContainer.start(); + } + public static String S3_ENDPOINT = awsContainer.getEndpointConfiguration(S3).getServiceEndpoint(); @BeforeEach void setUp() { - String S3_ENDPOINT = awsContainer.getEndpointConfiguration(S3).getServiceEndpoint(); amazonS3 = AmazonS3ClientBuilder .standard() .withCredentials(new BasicAWSCredentialsProvider("accesskey", "secretkey")) @@ -103,7 +104,7 @@ void setUp() { boolean dryRunEnabled = false; s3Client = new S3Client(amazonS3, dryRunEnabled); s3SentinelFilesCleaner = new S3SentinelFilesCleaner(s3Client); - s3PathCleaner = new S3PathCleaner(s3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); + s3PathCleaner = new S3PathCleaner(s3Client, s3SentinelFilesCleaner, bytesDeletedReporter); String tableName = "table"; String databaseName = "database"; housekeepingPath = HousekeepingPath @@ -256,7 +257,7 @@ void sentinelFilesCleanerThrowsException() { amazonS3.putObject(bucket, key1, content); - s3PathCleaner = new S3PathCleaner(s3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); + s3PathCleaner = new S3PathCleaner(s3Client, s3SentinelFilesCleaner, bytesDeletedReporter); assertThatCode(() -> s3PathCleaner.cleanupPath(housekeepingPath)).doesNotThrowAnyException(); assertThat(amazonS3.doesObjectExist(bucket, key1)).isFalse(); } @@ -321,7 +322,7 @@ void sentinelFilesForParentsAndPathWithTrailingSlash() { @Test void noBytesDeletedMetricWhenFileDeletionFails() { S3Client mockS3Client = mock(S3Client.class); - s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); + s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter); when(mockS3Client.doesObjectExist(bucket, key1)).thenReturn(true); ObjectMetadata objectMetadata = new ObjectMetadata(); objectMetadata.setContentLength(10); @@ -337,7 +338,7 @@ void noBytesDeletedMetricWhenFileDeletionFails() { @Test void noBytesDeletedMetricWhenDirectoryDeletionFails() { S3Client mockS3Client = mock(S3Client.class); - s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); + s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter); doThrow(AmazonServiceException.class).when(mockS3Client).listObjects(bucket, keyRootAsDirectory); assertThatExceptionOfType(AmazonServiceException.class) @@ -350,7 +351,7 @@ void reportBytesDeletedWhenDirectoryDeletionPartiallyFails() { AmazonS3 mockAmazonS3 = mock(AmazonS3.class); S3Client mockS3Client = new S3Client(mockAmazonS3, false); mockOneOutOfTwoObjectsDeleted(mockAmazonS3); - s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter, icebergValidator); + s3PathCleaner = new S3PathCleaner(mockS3Client, s3SentinelFilesCleaner, bytesDeletedReporter); assertThatExceptionOfType(BeekeeperException.class) .isThrownBy(() -> s3PathCleaner.cleanupPath(housekeepingPath)) .withMessage(format("Not all files could be deleted at path \"%s/%s\"; deleted 1/2 objects. " @@ -367,37 +368,6 @@ void extractingURIFails() { .withMessage(format("'%s' is not an S3 path.", path)); } - @Test - void shouldThrowBeekeeperIcebergExceptionWhenIcebergTableDetected() { - doThrow(new BeekeeperIcebergException("Iceberg tables are not supported")) - .when(icebergValidator) - .throwExceptionIfIceberg(housekeepingPath.getDatabaseName(), housekeepingPath.getTableName()); - - assertThatExceptionOfType(BeekeeperIcebergException.class) - .isThrownBy(() -> s3PathCleaner.cleanupPath(housekeepingPath)) - .withMessage("Iceberg tables are not supported"); - - verify(icebergValidator).throwExceptionIfIceberg(housekeepingPath.getDatabaseName(), housekeepingPath.getTableName()); - verifyNoInteractions(bytesDeletedReporter); - } - - @Test - void shouldProceedWithDeletionWhenNotIcebergTable() { - amazonS3.putObject(bucket, key1, content); - amazonS3.putObject(bucket, key2, content); - - housekeepingPath.setPath("s3://" + bucket + "/" + keyRoot); - - assertThatCode(() -> s3PathCleaner.cleanupPath(housekeepingPath)) - .doesNotThrowAnyException(); - - assertThat(amazonS3.doesObjectExist(bucket, key1)).isFalse(); - assertThat(amazonS3.doesObjectExist(bucket, key2)).isFalse(); - - long expectedBytesDeleted = content.getBytes().length * 2L; // 11 bytes('some content') * 2 = 22 bytes - verify(bytesDeletedReporter).reportTaggable(expectedBytesDeleted, housekeepingPath, FileSystemType.S3); - } - private void mockOneOutOfTwoObjectsDeleted(AmazonS3 mockAmazonS3) { S3ObjectSummary s3ObjectSummary = new S3ObjectSummary(); s3ObjectSummary.setBucketName(bucket); @@ -415,4 +385,4 @@ private void mockOneOutOfTwoObjectsDeleted(AmazonS3 mockAmazonS3) { when(mockAmazonS3.deleteObjects(any(DeleteObjectsRequest.class))) .thenReturn(new DeleteObjectsResult(List.of(deletedObject))); } -} +} \ No newline at end of file diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java index 9d7e5272..d00e4799 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java @@ -15,9 +15,6 @@ */ package com.expediagroup.beekeeper.integration; -import static org.apache.hadoop.fs.s3a.Constants.ACCESS_KEY; -import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; -import static org.apache.hadoop.fs.s3a.Constants.SECRET_KEY; import static org.assertj.core.api.Assertions.assertThat; import static org.awaitility.Awaitility.await; import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3; @@ -28,12 +25,10 @@ import java.sql.SQLException; import java.util.List; -import java.util.Map; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.awaitility.Duration; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; @@ -41,7 +36,6 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; -import org.junit.jupiter.api.extension.RegisterExtension; import org.mockito.junit.jupiter.MockitoExtension; import org.testcontainers.containers.localstack.LocalStackContainer; import org.testcontainers.junit.jupiter.Container; @@ -53,16 +47,12 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.CreateBucketRequest; -import com.google.common.collect.ImmutableMap; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; -import com.expediagroup.beekeeper.integration.utils.HiveTestUtils; import com.expediagroup.beekeeper.integration.utils.TestAppender; import com.expediagroup.beekeeper.path.cleanup.BeekeeperPathCleanup; -import com.hotels.beeju.extensions.ThriftHiveMetaStoreJUnitExtension; - @Testcontainers @ExtendWith(MockitoExtension.class) public class BeekeeperDryRunPathCleanupIntegrationTest extends BeekeeperIntegrationTestBase { @@ -72,12 +62,6 @@ public class BeekeeperDryRunPathCleanupIntegrationTest extends BeekeeperIntegrat private static final String SCHEDULER_DELAY_MS_PROPERTY = "properties.scheduler-delay-ms"; private static final String DRY_RUN_ENABLED_PROPERTY = "properties.dry-run-enabled"; private static final String AWS_S3_ENDPOINT_PROPERTY = "aws.s3.endpoint"; - private static final String METASTORE_URI_PROPERTY = "properties.metastore-uri"; - private static final String AWS_DISABLE_GET_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disableGetObjectMD5Validation"; - private static final String AWS_DISABLE_PUT_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disablePutObjectMD5Validation"; - - private static final String S3_ACCESS_KEY = "access"; - private static final String S3_SECRET_KEY = "secret"; private static final String BUCKET = "test-path-bucket"; private static final String DB_AND_TABLE_PREFIX = DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE; @@ -99,37 +83,17 @@ public class BeekeeperDryRunPathCleanupIntegrationTest extends BeekeeperIntegrat @Container private static final LocalStackContainer S3_CONTAINER = ContainerTestUtils.awsContainer(S3); - static { - S3_CONTAINER.start(); - } private static AmazonS3 amazonS3; - private static final String S3_ENDPOINT = ContainerTestUtils.awsServiceEndpoint(S3_CONTAINER, S3); - private final ExecutorService executorService = Executors.newFixedThreadPool(1); private final TestAppender appender = new TestAppender(); - private static Map metastoreProperties = ImmutableMap - .builder() - .put(ENDPOINT, S3_ENDPOINT) - .put(ACCESS_KEY, S3_ACCESS_KEY) - .put(SECRET_KEY, S3_SECRET_KEY) - .build(); - - @RegisterExtension - public ThriftHiveMetaStoreJUnitExtension thriftHiveMetaStore = new ThriftHiveMetaStoreJUnitExtension( - DATABASE_NAME_VALUE, metastoreProperties); - private HiveTestUtils hiveTestUtils; - private HiveMetaStoreClient metastoreClient; - @BeforeAll public static void init() { System.setProperty(SPRING_PROFILES_ACTIVE_PROPERTY, SPRING_PROFILES_ACTIVE); System.setProperty(SCHEDULER_DELAY_MS_PROPERTY, SCHEDULER_DELAY_MS); System.setProperty(DRY_RUN_ENABLED_PROPERTY, DRY_RUN_ENABLED); - System.setProperty(AWS_S3_ENDPOINT_PROPERTY, S3_ENDPOINT); - System.setProperty(AWS_DISABLE_GET_VALIDATION_PROPERTY, "true"); - System.setProperty(AWS_DISABLE_PUT_VALIDATION_PROPERTY, "true"); + System.setProperty(AWS_S3_ENDPOINT_PROPERTY, ContainerTestUtils.awsServiceEndpoint(S3_CONTAINER, S3)); amazonS3 = ContainerTestUtils.s3Client(S3_CONTAINER, AWS_REGION); amazonS3.createBucket(new CreateBucketRequest(BUCKET, AWS_REGION)); @@ -141,18 +105,12 @@ public static void teardown() { System.clearProperty(SCHEDULER_DELAY_MS_PROPERTY); System.clearProperty(DRY_RUN_ENABLED_PROPERTY); System.clearProperty(AWS_S3_ENDPOINT_PROPERTY); - System.clearProperty(METASTORE_URI_PROPERTY); amazonS3.shutdown(); - S3_CONTAINER.stop(); } @BeforeEach public void setup() { - System.setProperty(METASTORE_URI_PROPERTY, thriftHiveMetaStore.getThriftConnectionUri()); - metastoreClient = thriftHiveMetaStore.client(); - hiveTestUtils = new HiveTestUtils(metastoreClient); - amazonS3.listObjectsV2(BUCKET) .getObjectSummaries() .forEach(object -> amazonS3.deleteObject(BUCKET, object.getKey())); @@ -289,4 +247,4 @@ private void assertS3ClientLogs(int expected) { } assertThat(logsFromS3Client).isEqualTo(expected); } -} +} \ No newline at end of file diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java index dd1be3b7..257760e4 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java @@ -15,41 +15,32 @@ */ package com.expediagroup.beekeeper.integration; -import static org.apache.hadoop.fs.s3a.Constants.ACCESS_KEY; -import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; -import static org.apache.hadoop.fs.s3a.Constants.SECRET_KEY; import static org.assertj.core.api.Assertions.assertThat; import static org.awaitility.Awaitility.await; import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3; import static com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter.METRIC_NAME; import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.DELETED; -import static com.expediagroup.beekeeper.core.model.HousekeepingStatus.SKIPPED; import static com.expediagroup.beekeeper.integration.CommonTestVariables.AWS_REGION; import static com.expediagroup.beekeeper.integration.CommonTestVariables.DATABASE_NAME_VALUE; import static com.expediagroup.beekeeper.integration.CommonTestVariables.TABLE_NAME_VALUE; import java.sql.SQLException; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; -import org.apache.thrift.TException; import org.awaitility.Duration; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; import org.testcontainers.containers.localstack.LocalStackContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; @@ -60,17 +51,10 @@ import com.amazonaws.services.s3.AmazonS3; import com.amazonaws.services.s3.model.CreateBucketRequest; -import com.google.common.collect.ImmutableMap; -import com.expediagroup.beekeeper.core.model.HousekeepingEntity; -import com.expediagroup.beekeeper.core.model.HousekeepingPath; -import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; -import com.expediagroup.beekeeper.integration.utils.HiveTestUtils; import com.expediagroup.beekeeper.path.cleanup.BeekeeperPathCleanup; -import com.hotels.beeju.extensions.ThriftHiveMetaStoreJUnitExtension; - @Testcontainers public class BeekeeperPathCleanupIntegrationTest extends BeekeeperIntegrationTestBase { @@ -79,12 +63,6 @@ public class BeekeeperPathCleanupIntegrationTest extends BeekeeperIntegrationTes private static final String SCHEDULER_DELAY_MS_PROPERTY = "properties.scheduler-delay-ms"; private static final String DRY_RUN_ENABLED_PROPERTY = "properties.dry-run-enabled"; private static final String AWS_S3_ENDPOINT_PROPERTY = "aws.s3.endpoint"; - private static final String METASTORE_URI_PROPERTY = "properties.metastore-uri"; - private static final String AWS_DISABLE_GET_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disableGetObjectMD5Validation"; - private static final String AWS_DISABLE_PUT_VALIDATION_PROPERTY = "com.amazonaws.services.s3.disablePutObjectMD5Validation"; - - private static final String S3_ACCESS_KEY = "access"; - private static final String S3_SECRET_KEY = "secret"; private static final String BUCKET = "test-path-bucket"; private static final String DB_AND_TABLE_PREFIX = DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE; @@ -93,7 +71,6 @@ public class BeekeeperPathCleanupIntegrationTest extends BeekeeperIntegrationTes private static final String OBJECT_KEY2 = DB_AND_TABLE_PREFIX + "/id1/partition1/file2"; private static final String OBJECT_KEY_SENTINEL = DB_AND_TABLE_PREFIX + "/id1/partition1_$folder$"; private static final String ABSOLUTE_PATH = "s3://" + BUCKET + "/" + OBJECT_KEY_ROOT; - private static final String TABLE_PATH = "s3a://" + BUCKET + "/" + DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/"; private static final String OBJECT_KEY_OTHER = DB_AND_TABLE_PREFIX + "/id1/partition10/file1"; private static final String OBJECT_KEY_OTHER_SENTINEL = DB_AND_TABLE_PREFIX + "/id1/partition10_$folder$"; @@ -107,35 +84,16 @@ public class BeekeeperPathCleanupIntegrationTest extends BeekeeperIntegrationTes @Container private static final LocalStackContainer S3_CONTAINER = ContainerTestUtils.awsContainer(S3); - static { - S3_CONTAINER.start(); - } private static AmazonS3 amazonS3; - private static final String S3_ENDPOINT = ContainerTestUtils.awsServiceEndpoint(S3_CONTAINER, S3); - private final ExecutorService executorService = Executors.newFixedThreadPool(1); - - private static Map metastoreProperties = ImmutableMap - .builder() - .put(ENDPOINT, S3_ENDPOINT) - .put(ACCESS_KEY, S3_ACCESS_KEY) - .put(SECRET_KEY, S3_SECRET_KEY) - .build(); - @RegisterExtension - public ThriftHiveMetaStoreJUnitExtension thriftHiveMetaStore = new ThriftHiveMetaStoreJUnitExtension( - DATABASE_NAME_VALUE, metastoreProperties); - - private HiveTestUtils hiveTestUtils; - private HiveMetaStoreClient metastoreClient; + private final ExecutorService executorService = Executors.newFixedThreadPool(1); @BeforeAll public static void init() { System.setProperty(SPRING_PROFILES_ACTIVE_PROPERTY, SPRING_PROFILES_ACTIVE); System.setProperty(SCHEDULER_DELAY_MS_PROPERTY, SCHEDULER_DELAY_MS); System.setProperty(DRY_RUN_ENABLED_PROPERTY, DRY_RUN_ENABLED); - System.setProperty(AWS_S3_ENDPOINT_PROPERTY, S3_ENDPOINT); - System.setProperty(AWS_DISABLE_GET_VALIDATION_PROPERTY, "true"); - System.setProperty(AWS_DISABLE_PUT_VALIDATION_PROPERTY, "true"); + System.setProperty(AWS_S3_ENDPOINT_PROPERTY, ContainerTestUtils.awsServiceEndpoint(S3_CONTAINER, S3)); amazonS3 = ContainerTestUtils.s3Client(S3_CONTAINER, AWS_REGION); amazonS3.createBucket(new CreateBucketRequest(BUCKET, AWS_REGION)); @@ -147,20 +105,12 @@ public static void teardown() { System.clearProperty(SCHEDULER_DELAY_MS_PROPERTY); System.clearProperty(DRY_RUN_ENABLED_PROPERTY); System.clearProperty(AWS_S3_ENDPOINT_PROPERTY); - System.clearProperty(METASTORE_URI_PROPERTY); - System.clearProperty(AWS_DISABLE_GET_VALIDATION_PROPERTY); - System.clearProperty(AWS_DISABLE_PUT_VALIDATION_PROPERTY); amazonS3.shutdown(); - S3_CONTAINER.stop(); } @BeforeEach public void setup() { - System.setProperty(METASTORE_URI_PROPERTY, thriftHiveMetaStore.getThriftConnectionUri()); - metastoreClient = thriftHiveMetaStore.client(); - hiveTestUtils = new HiveTestUtils(metastoreClient); - amazonS3.listObjectsV2(BUCKET) .getObjectSummaries() .forEach(object -> amazonS3.deleteObject(BUCKET, object.getKey())); @@ -176,8 +126,7 @@ public void stop() throws InterruptedException { } @Test - public void cleanupPathsForFile() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void cleanupPathsForFile() throws SQLException { amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_OTHER, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_SENTINEL, ""); @@ -194,8 +143,7 @@ public void cleanupPathsForFile() throws SQLException, TException { } @Test - public void cleanupPathsForDirectory() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void cleanupPathsForDirectory() throws SQLException { amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY2, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_OTHER, CONTENT); @@ -214,8 +162,7 @@ public void cleanupPathsForDirectory() throws SQLException, TException { } @Test - public void cleanupPathsForDirectoryWithSpace() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void cleanupPathsForDirectoryWithSpace() throws SQLException { String objectKeyRoot = DB_AND_TABLE_PREFIX + "/ /id1/partition1"; String objectKey1 = objectKeyRoot + "/file1"; String objectKey2 = objectKeyRoot + "/file2"; @@ -235,8 +182,7 @@ public void cleanupPathsForDirectoryWithSpace() throws SQLException, TException } @Test - public void cleanupPathsForDirectoryWithTrailingSlash() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void cleanupPathsForDirectoryWithTrailingSlash() throws SQLException { amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY2, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_OTHER, CONTENT); @@ -253,8 +199,7 @@ public void cleanupPathsForDirectoryWithTrailingSlash() throws SQLException, TEx } @Test - public void cleanupSentinelForParent() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void cleanupSentinelForParent() throws SQLException { String parentSentinel = DB_AND_TABLE_PREFIX + "/id1_$folder$"; String tableSentinel = DB_AND_TABLE_PREFIX + "_$folder$"; String databaseSentinel = "database_$folder$"; @@ -278,8 +223,7 @@ public void cleanupSentinelForParent() throws SQLException, TException { } @Test - public void cleanupSentinelForNonEmptyParent() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void cleanupSentinelForNonEmptyParent() throws SQLException { String parentSentinel = DB_AND_TABLE_PREFIX + "/id1_$folder$"; String tableSentinel = DB_AND_TABLE_PREFIX + "_$folder$"; amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); @@ -301,32 +245,7 @@ public void cleanupSentinelForNonEmptyParent() throws SQLException, TException { } @Test - public void shouldSkipCleanupForIcebergTable() throws Exception { - Map tableProperties = new HashMap<>(); - tableProperties.put("table_type", "ICEBERG"); - tableProperties.put("format", "ICEBERG/PARQUET"); - String outputFormat = "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"; - - hiveTestUtils.createTableWithProperties( - TABLE_PATH, TABLE_NAME_VALUE, false, tableProperties, outputFormat, true); - - String objectKey = DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/file1"; - String path = "s3://" + BUCKET + "/" + DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/"; - - amazonS3.putObject(BUCKET, objectKey, CONTENT); - insertUnreferencedPath(path); - - await().atMost(TIMEOUT, TimeUnit.SECONDS) - .until(() -> getUnreferencedPaths().get(0).getHousekeepingStatus() == SKIPPED); - - assertThat(amazonS3.doesObjectExist(BUCKET, objectKey)) - .withFailMessage("S3 object %s should still exist as cleanup was skipped.", objectKey) - .isTrue(); - } - - @Test - public void metrics() throws SQLException, TException { - hiveTestUtils.createTable(TABLE_PATH, TABLE_NAME_VALUE, false); + public void metrics() throws SQLException { amazonS3.putObject(BUCKET, OBJECT_KEY1, CONTENT); amazonS3.putObject(BUCKET, OBJECT_KEY_SENTINEL, ""); @@ -364,4 +283,4 @@ public void prometheus() { await().atMost(TIMEOUT, TimeUnit.SECONDS) .until(() -> client.execute(request).getStatusLine().getStatusCode() == 200); } -} +} \ No newline at end of file diff --git a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java index 0dd2fc94..b5ddbb2a 100644 --- a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java +++ b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java @@ -145,7 +145,7 @@ public S3Client s3Client(AmazonS3 amazonS3, @Value("${properties.dry-run-enabled PathCleaner pathCleaner( S3Client s3Client, BytesDeletedReporter bytesDeletedReporter, IcebergValidator icebergValidator) { - return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter, icebergValidator); + return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter); } @Bean(name = "expiredMetadataHandler") diff --git a/beekeeper-path-cleanup/pom.xml b/beekeeper-path-cleanup/pom.xml index a94bfaca..5d324fb5 100644 --- a/beekeeper-path-cleanup/pom.xml +++ b/beekeeper-path-cleanup/pom.xml @@ -10,12 +10,6 @@ beekeeper-path-cleanup - - 2.8.1 - 2.3.7 - UTF-8 - - com.amazonaws @@ -74,21 +68,6 @@ 27.1-jre - - org.apache.hadoop - hadoop-mapreduce-client-core - ${hadoop.version} - - - org.slf4j - slf4j-log4j12 - - - javax.servlet - servlet-api - - - diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java index eff5fac8..b8715a2f 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java @@ -16,9 +16,7 @@ package com.expediagroup.beekeeper.path.cleanup.context; import java.util.List; -import java.util.function.Supplier; -import org.apache.hadoop.hive.conf.HiveConf; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.domain.EntityScan; import org.springframework.context.annotation.Bean; @@ -37,23 +35,16 @@ import com.expediagroup.beekeeper.cleanup.aws.S3Client; import com.expediagroup.beekeeper.cleanup.aws.S3PathCleaner; import com.expediagroup.beekeeper.cleanup.aws.S3SentinelFilesCleaner; -import com.expediagroup.beekeeper.cleanup.hive.HiveClientFactory; -import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; import com.expediagroup.beekeeper.cleanup.service.CleanupService; import com.expediagroup.beekeeper.cleanup.service.DisableTablesService; import com.expediagroup.beekeeper.cleanup.service.RepositoryCleanupService; -import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; import com.expediagroup.beekeeper.path.cleanup.handler.GenericPathHandler; import com.expediagroup.beekeeper.path.cleanup.service.PagingPathCleanupService; import com.expediagroup.beekeeper.path.cleanup.service.PathRepositoryCleanupService; -import com.hotels.hcommon.hive.metastore.client.api.CloseableMetaStoreClient; -import com.hotels.hcommon.hive.metastore.client.closeable.CloseableMetaStoreClientFactory; -import com.hotels.hcommon.hive.metastore.client.supplier.HiveMetaStoreClientSupplier; - @Configuration @EnableScheduling @ComponentScan({ "com.expediagroup.beekeeper.core", "com.expediagroup.beekeeper.cleanup" }) @@ -94,9 +85,8 @@ public S3Client s3Client(AmazonS3 amazonS3, @Value("${properties.dry-run-enabled @Bean(name = "s3PathCleaner") PathCleaner pathCleaner( S3Client s3Client, - BytesDeletedReporter bytesDeletedReporter, - IcebergValidator icebergValidator) { - return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter, icebergValidator); + BytesDeletedReporter bytesDeletedReporter) { + return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter); } @Bean @@ -118,35 +108,4 @@ RepositoryCleanupService repositoryCleanupService( DisableTablesService disableTablesService() { return () -> {}; } - - @Bean - public HiveConf hiveConf(@Value("${properties.metastore-uri}") String metastoreUri) { - HiveConf conf = new HiveConf(); - conf.setVar(HiveConf.ConfVars.METASTOREURIS, metastoreUri); - return conf; - } - - @Bean - public CloseableMetaStoreClientFactory metaStoreClientFactory() { - return new CloseableMetaStoreClientFactory(); - } - - @Bean - Supplier metaStoreClientSupplier( - CloseableMetaStoreClientFactory metaStoreClientFactory, HiveConf hiveConf) { - String name = "beekeeper-scheduler-apiary"; - return new HiveMetaStoreClientSupplier(metaStoreClientFactory, hiveConf, name); - } - - @Bean(name = "hiveClientFactory") - public CleanerClientFactory clientFactory( - Supplier metaStoreClientSupplier, - @Value("${properties.dry-run-enabled}") boolean dryRunEnabled) { - return new HiveClientFactory(metaStoreClientSupplier, dryRunEnabled); - } - - @Bean - public IcebergValidator icebergValidator(CleanerClientFactory clientFactory) { - return new IcebergValidator(clientFactory); - } -} +} \ No newline at end of file diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java index efe70173..85ed3182 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java @@ -24,7 +24,6 @@ import org.springframework.data.domain.Slice; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; -import com.expediagroup.beekeeper.core.error.BeekeeperException; import com.expediagroup.beekeeper.core.model.HousekeepingPath; import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; @@ -68,19 +67,12 @@ public Pageable processPage(Pageable pageable, Slice page, boo } private boolean cleanUpPath(HousekeepingPath housekeepingPath) { - try { - if (S3PathValidator.validTablePath(housekeepingPath.getPath())) { - pathCleaner.cleanupPath(housekeepingPath); - return true; - } - log.warn("Will not clean up path \"{}\" because it is not valid.", housekeepingPath.getPath()); - return false; - } catch (BeekeeperException e) { - updateStatus(housekeepingPath, HousekeepingStatus.SKIPPED); - log.warn("Skipping cleanup for table \"{}.{}\": {}", housekeepingPath.getDatabaseName(), - housekeepingPath.getTableName(), e.getMessage()); - return false; + if (S3PathValidator.validTablePath(housekeepingPath.getPath())) { + pathCleaner.cleanupPath(housekeepingPath); + return true; } + log.warn("Will not clean up path \"{}\" because it is not valid.", housekeepingPath.getPath()); + return false; } private void cleanupContent(HousekeepingPath housekeepingPath) { @@ -107,4 +99,4 @@ private void updateStatus(HousekeepingPath housekeepingPath, HousekeepingStatus housekeepingPath.setHousekeepingStatus(status); housekeepingPathRepository.save(housekeepingPath); } -} +} \ No newline at end of file diff --git a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java index 4db85925..967549b5 100644 --- a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java +++ b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2024 Expedia, Inc. + * Copyright (C) 2019-2021 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,6 @@ import com.expediagroup.beekeeper.cleanup.service.CleanupService; import com.expediagroup.beekeeper.cleanup.service.DisableTablesService; import com.expediagroup.beekeeper.cleanup.service.RepositoryCleanupService; -import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.repository.HousekeepingPathRepository; import com.expediagroup.beekeeper.path.cleanup.service.PagingPathCleanupService; import com.expediagroup.beekeeper.path.cleanup.service.PathRepositoryCleanupService; @@ -60,7 +59,6 @@ class CommonBeansTest { private final CommonBeans commonBeans = new CommonBeans(); private @Mock HousekeepingPathRepository repository; private @Mock BytesDeletedReporter bytesDeletedReporter; - private @Mock IcebergValidator icebergValidator; @BeforeEach void setUp() { @@ -102,7 +100,7 @@ void verifyS3pathCleaner() { S3Client s3Client = commonBeans.s3Client(commonBeans.amazonS3(), dryRunEnabled); MeterRegistry meterRegistry = mock(GraphiteMeterRegistry.class); - PathCleaner pathCleaner = commonBeans.pathCleaner(s3Client, bytesDeletedReporter, icebergValidator); + PathCleaner pathCleaner = commonBeans.pathCleaner(s3Client, bytesDeletedReporter); assertThat(pathCleaner).isInstanceOf(S3PathCleaner.class); } @@ -123,4 +121,4 @@ public void verifyDisableTablesService() { DisableTablesService disableTablesService = commonBeans.disableTablesService(); assertThat(disableTablesService).isNotNull(); } -} +} \ No newline at end of file From 26b404c0635b2e625fb8ce9c4199835fc348e47b Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Tue, 26 Nov 2024 14:23:04 +0000 Subject: [PATCH 39/65] revert more path-cleanup --- .../com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java | 2 -- .../expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java index 801dd0ef..46e88657 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleaner.java @@ -29,7 +29,6 @@ import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; import com.expediagroup.beekeeper.cleanup.path.SentinelFilesCleaner; -import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.config.FileSystemType; import com.expediagroup.beekeeper.core.error.BeekeeperException; import com.expediagroup.beekeeper.core.model.HousekeepingEntity; @@ -42,7 +41,6 @@ public class S3PathCleaner implements PathCleaner { private final S3Client s3Client; private final SentinelFilesCleaner sentinelFilesCleaner; private final BytesDeletedReporter bytesDeletedReporter; - private IcebergValidator icebergValidator; public S3PathCleaner(S3Client s3Client, SentinelFilesCleaner sentinelFilesCleaner, BytesDeletedReporter bytesDeletedReporter) { diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java index 102c424e..7b00bcb6 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3PathCleanerTest.java @@ -385,4 +385,4 @@ private void mockOneOutOfTwoObjectsDeleted(AmazonS3 mockAmazonS3) { when(mockAmazonS3.deleteObjects(any(DeleteObjectsRequest.class))) .thenReturn(new DeleteObjectsResult(List.of(deletedObject))); } -} \ No newline at end of file +} From 101ab88786a4128e50a54733b7bfab771ed21f44 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Tue, 26 Nov 2024 14:26:06 +0000 Subject: [PATCH 40/65] Revert path-cleanup --- .../integration/BeekeeperDryRunPathCleanupIntegrationTest.java | 2 +- .../integration/BeekeeperPathCleanupIntegrationTest.java | 2 +- .../beekeeper/path/cleanup/context/CommonBeans.java | 2 +- .../beekeeper/path/cleanup/handler/GenericPathHandler.java | 2 +- .../beekeeper/path/cleanup/context/CommonBeansTest.java | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java index d00e4799..63d2e443 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperDryRunPathCleanupIntegrationTest.java @@ -247,4 +247,4 @@ private void assertS3ClientLogs(int expected) { } assertThat(logsFromS3Client).isEqualTo(expected); } -} \ No newline at end of file +} diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java index 257760e4..0b8c29b0 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperPathCleanupIntegrationTest.java @@ -283,4 +283,4 @@ public void prometheus() { await().atMost(TIMEOUT, TimeUnit.SECONDS) .until(() -> client.execute(request).getStatusLine().getStatusCode() == 200); } -} \ No newline at end of file +} diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java index b8715a2f..0b118243 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeans.java @@ -108,4 +108,4 @@ RepositoryCleanupService repositoryCleanupService( DisableTablesService disableTablesService() { return () -> {}; } -} \ No newline at end of file +} diff --git a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java index 85ed3182..30442c22 100644 --- a/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java +++ b/beekeeper-path-cleanup/src/main/java/com/expediagroup/beekeeper/path/cleanup/handler/GenericPathHandler.java @@ -99,4 +99,4 @@ private void updateStatus(HousekeepingPath housekeepingPath, HousekeepingStatus housekeepingPath.setHousekeepingStatus(status); housekeepingPathRepository.save(housekeepingPath); } -} \ No newline at end of file +} diff --git a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java index 967549b5..a6667b73 100644 --- a/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java +++ b/beekeeper-path-cleanup/src/test/java/com/expediagroup/beekeeper/path/cleanup/context/CommonBeansTest.java @@ -121,4 +121,4 @@ public void verifyDisableTablesService() { DisableTablesService disableTablesService = commonBeans.disableTablesService(); assertThat(disableTablesService).isNotNull(); } -} \ No newline at end of file +} From c64665041edbb21ab4a2f17ae450249e943cfb3c Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Tue, 26 Nov 2024 14:35:26 +0000 Subject: [PATCH 41/65] cleanup --- .../beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java | 1 - .../beekeeper/cleanup/hive/HiveMetadataCleanerTest.java | 1 - beekeeper-path-cleanup/pom.xml | 1 - 3 files changed, 3 deletions(-) diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java index c9d3d318..5fab57af 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/aws/S3DryRunPathCleanerTest.java @@ -59,7 +59,6 @@ class S3DryRunPathCleanerTest { private AmazonS3 amazonS3; private @Mock BytesDeletedReporter bytesDeletedReporter; - private boolean dryRunEnabled = true; private S3PathCleaner s3DryRunPathCleaner; diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java index 842a7c23..5520b8fb 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/hive/HiveMetadataCleanerTest.java @@ -77,7 +77,6 @@ public void typicalDropPartition() { verify(deletedMetadataReporter).reportTaggable(housekeepingMetadata, MetadataType.HIVE_PARTITION); } - @Test public void dontReportWhenPartitionNotDropped() { when(housekeepingMetadata.getDatabaseName()).thenReturn(DATABASE); diff --git a/beekeeper-path-cleanup/pom.xml b/beekeeper-path-cleanup/pom.xml index 5d324fb5..ac1a2723 100644 --- a/beekeeper-path-cleanup/pom.xml +++ b/beekeeper-path-cleanup/pom.xml @@ -68,7 +68,6 @@ 27.1-jre - org.awaitility From e71a5aeb392116087fb27ee2f17b6e9441221076 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Tue, 26 Nov 2024 15:44:36 +0000 Subject: [PATCH 42/65] Added logging for table params --- .../filter/TableParameterListenerEventFilter.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/TableParameterListenerEventFilter.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/TableParameterListenerEventFilter.java index d3be3b3d..7a49d798 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/TableParameterListenerEventFilter.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/TableParameterListenerEventFilter.java @@ -17,19 +17,30 @@ import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; - import com.expediagroup.beekeeper.core.model.LifecycleEventType; @Component public class TableParameterListenerEventFilter implements ListenerEventFilter { + private static final Logger log = LoggerFactory.getLogger(TableParameterListenerEventFilter.class); + @Override public boolean isFiltered(ListenerEvent listenerEvent, LifecycleEventType lifecycleEventType) { Map tableParameters = listenerEvent.getTableParameters(); + // Log the table params + if (tableParameters != null && !tableParameters.isEmpty()) { + log.info("Processing table parameters for event: {}", tableParameters); + log.debug("Detailed table parameters: {}", tableParameters); + } else { + log.info("No table parameters found for event."); + } + if (tableParameters == null) { return true; } @@ -37,3 +48,4 @@ public boolean isFiltered(ListenerEvent listenerEvent, LifecycleEventType lifecy return !Boolean.parseBoolean(tableParameters.get(lifecycleEventType.getTableParameterName())); } } + From eea8403152a25b00c426bb48f7b5a53407d97b3b Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Tue, 26 Nov 2024 15:53:48 +0000 Subject: [PATCH 43/65] add logging --- .../scheduler/apiary/service/SchedulerApiary.java | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index c07a5381..4af8d8b9 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -1,5 +1,5 @@ /** - * Copyright (C) 2019-2024 Expedia, Inc. + * Copyright (C) 2019-2020 Expedia, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,6 +20,7 @@ import java.io.IOException; import java.util.EnumMap; import java.util.List; +import java.util.Map; import java.util.Optional; import org.slf4j.Logger; @@ -34,6 +35,7 @@ import com.expediagroup.beekeeper.core.model.HousekeepingEntity; import com.expediagroup.beekeeper.core.model.LifecycleEventType; import com.expediagroup.beekeeper.scheduler.apiary.messaging.BeekeeperEventReader; +import com.expediagroup.beekeeper.scheduler.apiary.messaging.MessageReaderAdapter; import com.expediagroup.beekeeper.scheduler.apiary.model.BeekeeperEvent; import com.expediagroup.beekeeper.scheduler.service.SchedulerService; @@ -64,6 +66,16 @@ public void scheduleBeekeeperEvent() { BeekeeperEvent beekeeperEvent = housekeepingEntitiesToBeScheduled.get(); List housekeepingEntities = beekeeperEvent.getHousekeepingEntities(); + Map tableParameters = beekeeperEvent.getMessageEvent().getEvent().getTableParameters(); + + // logging: Print all table parameters + if (tableParameters != null && !tableParameters.isEmpty()) { + log.info("Processing table parameters for event:"); + tableParameters.forEach((key, value) -> log.info(" - {}: {}", key, value)); + } else { + log.info("No table parameters found for event."); + } + for (HousekeepingEntity entity : housekeepingEntities) { try { icebergValidator.throwExceptionIfIceberg(entity.getDatabaseName(), entity.getTableName()); From 95e6c647f526918f51efb46aeb196acd0e7d8a95 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Tue, 26 Nov 2024 22:45:40 +0000 Subject: [PATCH 44/65] remove logs to check filters --- .../filter/TableParameterListenerEventFilter.java | 13 +------------ .../scheduler/apiary/service/SchedulerApiary.java | 12 ++---------- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/TableParameterListenerEventFilter.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/TableParameterListenerEventFilter.java index 7a49d798..2c46f936 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/TableParameterListenerEventFilter.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/TableParameterListenerEventFilter.java @@ -17,30 +17,19 @@ import java.util.Map; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.springframework.stereotype.Component; import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; + import com.expediagroup.beekeeper.core.model.LifecycleEventType; @Component public class TableParameterListenerEventFilter implements ListenerEventFilter { - private static final Logger log = LoggerFactory.getLogger(TableParameterListenerEventFilter.class); - @Override public boolean isFiltered(ListenerEvent listenerEvent, LifecycleEventType lifecycleEventType) { Map tableParameters = listenerEvent.getTableParameters(); - // Log the table params - if (tableParameters != null && !tableParameters.isEmpty()) { - log.info("Processing table parameters for event: {}", tableParameters); - log.debug("Detailed table parameters: {}", tableParameters); - } else { - log.info("No table parameters found for event."); - } - if (tableParameters == null) { return true; } diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index 4af8d8b9..e7722530 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -38,6 +38,7 @@ import com.expediagroup.beekeeper.scheduler.apiary.messaging.MessageReaderAdapter; import com.expediagroup.beekeeper.scheduler.apiary.model.BeekeeperEvent; import com.expediagroup.beekeeper.scheduler.service.SchedulerService; +import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; @Component public class SchedulerApiary { @@ -66,19 +67,10 @@ public void scheduleBeekeeperEvent() { BeekeeperEvent beekeeperEvent = housekeepingEntitiesToBeScheduled.get(); List housekeepingEntities = beekeeperEvent.getHousekeepingEntities(); - Map tableParameters = beekeeperEvent.getMessageEvent().getEvent().getTableParameters(); - - // logging: Print all table parameters - if (tableParameters != null && !tableParameters.isEmpty()) { - log.info("Processing table parameters for event:"); - tableParameters.forEach((key, value) -> log.info(" - {}: {}", key, value)); - } else { - log.info("No table parameters found for event."); - } - for (HousekeepingEntity entity : housekeepingEntities) { try { icebergValidator.throwExceptionIfIceberg(entity.getDatabaseName(), entity.getTableName()); + LifecycleEventType eventType = LifecycleEventType.valueOf(entity.getLifecycleType()); SchedulerService scheduler = schedulerServiceMap.get(eventType); scheduler.scheduleForHousekeeping(entity); From 804be2f17021f1cdd113c91683853aa765a92059 Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Wed, 27 Nov 2024 11:48:26 +0100 Subject: [PATCH 45/65] cleaning up --- .../beekeeper/cleanup/hive/HiveClient.java | 17 -------- .../cleanup/metadata/CleanerClient.java | 2 - .../cleanup/validation/IcebergValidator.java | 5 +-- ...etadataSchedulerApiaryIntegrationTest.java | 37 +++-------------- ...cedPathSchedulerApiaryIntegrationTest.java | 18 +-------- beekeeper-scheduler-apiary/pom.xml | 28 ------------- .../scheduler/apiary/context/CommonBeans.java | 40 ------------------- .../TableParameterListenerEventFilter.java | 1 - .../apiary/service/SchedulerApiary.java | 14 +------ .../apiary/service/SchedulerApiaryTest.java | 31 +------------- 10 files changed, 11 insertions(+), 182 deletions(-) diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveClient.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveClient.java index 007afeb1..d1009a60 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveClient.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/hive/HiveClient.java @@ -126,23 +126,6 @@ public Map getTableProperties(String databaseName, String tableN } } - @Override - public String getOutputFormat(String databaseName, String tableName) { - String result = null; - try { - Table table = client.getTable(databaseName, tableName); - if (table.getSd() != null) { - result = table.getSd().getOutputFormat(); - } - } catch (NoSuchObjectException e) { - log.warn("Table {}.{} does not exist", databaseName, tableName); - } catch (TException e) { - throw new BeekeeperException( - "Unexpected exception when getting output format for \"" + databaseName + "." + tableName + ".", e); - } - return result; - } - @Override public void close() { client.close(); diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/metadata/CleanerClient.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/metadata/CleanerClient.java index 1946302b..ee1a3a58 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/metadata/CleanerClient.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/metadata/CleanerClient.java @@ -27,6 +27,4 @@ public interface CleanerClient extends Closeable { boolean tableExists(String databaseName, String tableName); Map getTableProperties(String databaseName, String tableName); - - String getOutputFormat(String databaseName, String tableName); } diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java index c4167232..55fb07aa 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java @@ -49,9 +49,8 @@ public void throwExceptionIfIceberg(String databaseName, String tableName) { Map parameters = client.getTableProperties(databaseName, tableName); String tableType = parameters.getOrDefault("table_type", "").toLowerCase(); String format = parameters.getOrDefault("format", "").toLowerCase(); - String outputFormat = client.getOutputFormat(databaseName, tableName); - if (tableType.contains("iceberg") || format.contains("iceberg") || (outputFormat != null - && outputFormat.toLowerCase().contains("iceberg"))) { + String metadataLocation = parameters.getOrDefault("metadata_location", "").toLowerCase(); + if (tableType.contains("iceberg") || format.contains("iceberg") || !metadataLocation.isEmpty()) { throw new BeekeeperIcebergException( format("Iceberg table %s.%s is not currently supported in Beekeeper.", databaseName, tableName)); } diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java index 321a89ad..ccbf19c6 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java @@ -36,7 +36,6 @@ import java.util.Set; import java.util.concurrent.TimeUnit; -import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClientBuilder; @@ -46,7 +45,6 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; import org.testcontainers.containers.localstack.LocalStackContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; @@ -56,7 +54,6 @@ import io.micrometer.core.instrument.composite.CompositeMeterRegistry; import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.model.CreateQueueResult; import com.amazonaws.services.sqs.model.PurgeQueueRequest; import com.amazonaws.services.sqs.model.SendMessageRequest; @@ -67,27 +64,19 @@ import com.expediagroup.beekeeper.integration.model.AlterTableSqsMessage; import com.expediagroup.beekeeper.integration.model.CreateTableSqsMessage; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; -import com.expediagroup.beekeeper.integration.utils.HiveTestUtils; import com.expediagroup.beekeeper.scheduler.apiary.BeekeeperSchedulerApiary; -import com.hotels.beeju.extensions.ThriftHiveMetaStoreJUnitExtension; - @Testcontainers public class BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest extends BeekeeperIntegrationTestBase { private static final int TIMEOUT = 30; - private static final String DRY_RUN_ENABLED_PROPERTY = "properties.dry-run-enabled"; private static final String APIARY_QUEUE_URL_PROPERTY = "properties.apiary.queue-url"; - private static final String METASTORE_URI_PROPERTY = "properties.metastore-uri"; private static final String QUEUE = "apiary-receiver-queue"; private static final String SCHEDULED_EXPIRED_METRIC = "metadata-scheduled"; private static final String HEALTHCHECK_URI = "http://localhost:8080/actuator/health"; private static final String PROMETHEUS_URI = "http://localhost:8080/actuator/prometheus"; - private static final String S3_ACCESS_KEY = "access"; - private static final String S3_SECRET_KEY = "secret"; - private static final String PARTITION_KEYS = "{ \"event_date\": \"date\", \"event_hour\": \"smallint\"}"; private static final String PARTITION_A_VALUES = "[ \"2020-01-01\", \"0\" ]"; private static final String PARTITION_B_VALUES = "[ \"2020-01-01\", \"1\" ]"; @@ -95,44 +84,30 @@ public class BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest extends Beek private static final String PARTITION_B_NAME = "event_date=2020-01-01/event_hour=1"; private static final String LOCATION_A = "s3://bucket/table1/partition"; private static final String LOCATION_B = "s3://bucket/table2/partition"; - private static final String TABLE_PATH = "/tmp/bucket/" + DATABASE_NAME_VALUE + "/" + TABLE_NAME_VALUE + "/"; @Container private static final LocalStackContainer SQS_CONTAINER = ContainerTestUtils.awsContainer(SQS); private static AmazonSQS amazonSQS; - private static String queueUrl; - - @RegisterExtension - public ThriftHiveMetaStoreJUnitExtension thriftHiveMetaStore = new ThriftHiveMetaStoreJUnitExtension( - DATABASE_NAME_VALUE); - - private HiveTestUtils hiveTestUtils; - private HiveMetaStoreClient metastoreClient; @BeforeAll public static void init() { - System.setProperty(DRY_RUN_ENABLED_PROPERTY, "false"); - amazonSQS = ContainerTestUtils.sqsClient(SQS_CONTAINER, AWS_REGION); - CreateQueueResult queue = amazonSQS.createQueue(QUEUE); - queueUrl = queue.getQueueUrl(); + String queueUrl = ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE); System.setProperty(APIARY_QUEUE_URL_PROPERTY, queueUrl); + + amazonSQS = ContainerTestUtils.sqsClient(SQS_CONTAINER, AWS_REGION); + amazonSQS.createQueue(QUEUE); } @AfterAll public static void teardown() { System.clearProperty(APIARY_QUEUE_URL_PROPERTY); - System.clearProperty(DRY_RUN_ENABLED_PROPERTY); amazonSQS.shutdown(); } @BeforeEach public void setup() { - System.setProperty(METASTORE_URI_PROPERTY, thriftHiveMetaStore.getThriftConnectionUri()); - metastoreClient = thriftHiveMetaStore.client(); - hiveTestUtils = new HiveTestUtils(metastoreClient); - - amazonSQS.purgeQueue(new PurgeQueueRequest(queueUrl)); + amazonSQS.purgeQueue(new PurgeQueueRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE))); executorService.execute(() -> BeekeeperSchedulerApiary.main(new String[] {})); await().atMost(Duration.ONE_MINUTE).until(BeekeeperSchedulerApiary::isRunning); } @@ -255,7 +230,7 @@ public void prometheus() { } private SendMessageRequest sendMessageRequest(String payload) { - return new SendMessageRequest(queueUrl, payload); + return new SendMessageRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE), payload); } private void assertExpiredMetadata(HousekeepingMetadata actual, String expectedPath, String partitionName) { diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java index af66ac2e..1c3fd3a4 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest.java @@ -45,7 +45,6 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; import org.testcontainers.containers.localstack.LocalStackContainer; import org.testcontainers.junit.jupiter.Container; import org.testcontainers.junit.jupiter.Testcontainers; @@ -67,14 +66,11 @@ import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; import com.expediagroup.beekeeper.scheduler.apiary.BeekeeperSchedulerApiary; -import com.hotels.beeju.extensions.ThriftHiveMetaStoreJUnitExtension; - @Testcontainers public class BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest extends BeekeeperIntegrationTestBase { private static final int TIMEOUT = 5; private static final String APIARY_QUEUE_URL_PROPERTY = "properties.apiary.queue-url"; - private static final String DRY_RUN_ENABLED_PROPERTY = "properties.dry-run-enabled"; private static final String QUEUE = "apiary-receiver-queue"; private static final String SCHEDULED_ORPHANED_METRIC = "paths-scheduled"; @@ -85,10 +81,6 @@ public class BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest extends Bee private static final LocalStackContainer SQS_CONTAINER = ContainerTestUtils.awsContainer(SQS); private static AmazonSQS amazonSQS; - @RegisterExtension - public ThriftHiveMetaStoreJUnitExtension thriftHiveMetaStore = new ThriftHiveMetaStoreJUnitExtension( - DATABASE_NAME_VALUE); - @BeforeAll public static void init() { String queueUrl = ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE); @@ -101,17 +93,12 @@ public static void init() { @AfterAll public static void teardown() { System.clearProperty(APIARY_QUEUE_URL_PROPERTY); - System.clearProperty("properties.metastore-uri"); - System.clearProperty("properties.dry-run-enabled"); amazonSQS.shutdown(); } @BeforeEach public void setup() { - System.setProperty("properties.metastore-uri", thriftHiveMetaStore.getThriftConnectionUri()); - System.setProperty("properties.dry-run-enabled", "false"); - amazonSQS.purgeQueue(new PurgeQueueRequest(ContainerTestUtils.queueUrl(SQS_CONTAINER, QUEUE))); executorService.execute(() -> BeekeeperSchedulerApiary.main(new String[] {})); await().atMost(Duration.ONE_MINUTE).until(BeekeeperSchedulerApiary::isRunning); @@ -121,9 +108,6 @@ public void setup() { public void stop() throws InterruptedException { BeekeeperSchedulerApiary.stop(); executorService.awaitTermination(5, TimeUnit.SECONDS); - - System.clearProperty("properties.metastore-uri"); - System.clearProperty("properties.dry-run-enabled"); } @Test @@ -173,7 +157,7 @@ public void unreferencedAlterPartitionEvent() throws SQLException, IOException, public void unreferencedMultipleAlterPartitionEvent() throws IOException, SQLException, URISyntaxException { List .of(new AlterPartitionSqsMessage("s3://bucket/table/expiredTableLocation", - "s3://bucket/table/partitionLocation", "s3://bucket/table/unreferencedPartitionLocation", true, true), + "s3://bucket/table/partitionLocation", "s3://bucket/table/unreferencedPartitionLocation", true, true), new AlterPartitionSqsMessage("s3://bucket/table/expiredTableLocation2", "s3://bucket/table/partitionLocation2", "s3://bucket/table/partitionLocation", true, true)) .forEach(msg -> amazonSQS.sendMessage(sendMessageRequest(msg.getFormattedString()))); diff --git a/beekeeper-scheduler-apiary/pom.xml b/beekeeper-scheduler-apiary/pom.xml index 9ee238f2..bb0b2a7d 100644 --- a/beekeeper-scheduler-apiary/pom.xml +++ b/beekeeper-scheduler-apiary/pom.xml @@ -11,31 +11,8 @@ beekeeper-scheduler-apiary - - 2.8.1 - 2.3.7 - UTF-8 - - - - - org.apache.hadoop - hadoop-mapreduce-client-core - ${hadoop.version} - - - org.slf4j - slf4j-log4j12 - - - javax.servlet - servlet-api - - - - com.amazonaws aws-java-sdk-sts @@ -46,11 +23,6 @@ beekeeper-scheduler ${project.version} - - com.expediagroup - beekeeper-cleanup - ${project.version} - ch.qos.logback diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java index fc2ee418..492017c6 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java @@ -17,9 +17,7 @@ import java.util.EnumMap; import java.util.List; -import java.util.function.Supplier; -import org.apache.hadoop.hive.conf.HiveConf; import org.springframework.beans.factory.annotation.Qualifier; import org.springframework.beans.factory.annotation.Value; import org.springframework.boot.autoconfigure.domain.EntityScan; @@ -39,9 +37,6 @@ import com.expedia.apiary.extensions.receiver.common.messaging.MessageReader; import com.expedia.apiary.extensions.receiver.sqs.messaging.SqsMessageReader; -import com.expediagroup.beekeeper.cleanup.hive.HiveClientFactory; -import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory; -import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.model.LifecycleEventType; import com.expediagroup.beekeeper.scheduler.apiary.filter.EventTypeListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.filter.ListenerEventFilter; @@ -57,10 +52,6 @@ import com.expediagroup.beekeeper.scheduler.apiary.messaging.RetryingMessageReader; import com.expediagroup.beekeeper.scheduler.service.SchedulerService; -import com.hotels.hcommon.hive.metastore.client.api.CloseableMetaStoreClient; -import com.hotels.hcommon.hive.metastore.client.closeable.CloseableMetaStoreClientFactory; -import com.hotels.hcommon.hive.metastore.client.supplier.HiveMetaStoreClientSupplier; - @Configuration @ComponentScan(basePackages = { "com.expediagroup.beekeeper.core", "com.expediagroup.beekeeper.scheduler" }) @EntityScan(basePackages = { "com.expediagroup.beekeeper.core" }) @@ -148,35 +139,4 @@ public BeekeeperEventReader eventReader( return new MessageReaderAdapter(messageReader, handlers); } - - @Bean - public HiveConf hiveConf(@Value("${properties.metastore-uri}") String metastoreUri) { - HiveConf conf = new HiveConf(); - conf.setVar(HiveConf.ConfVars.METASTOREURIS, metastoreUri); - return conf; - } - - @Bean - public CloseableMetaStoreClientFactory metaStoreClientFactory() { - return new CloseableMetaStoreClientFactory(); - } - - @Bean - Supplier metaStoreClientSupplier( - CloseableMetaStoreClientFactory metaStoreClientFactory, HiveConf hiveConf) { - String name = "beekeeper-scheduler-apiary"; - return new HiveMetaStoreClientSupplier(metaStoreClientFactory, hiveConf, name); - } - - @Bean(name = "hiveClientFactory") - public CleanerClientFactory clientFactory( - Supplier metaStoreClientSupplier, - @Value("${properties.dry-run-enabled}") boolean dryRunEnabled) { - return new HiveClientFactory(metaStoreClientSupplier, dryRunEnabled); - } - - @Bean - public IcebergValidator icebergValidator(CleanerClientFactory clientFactory) { - return new IcebergValidator(clientFactory); - } } diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/TableParameterListenerEventFilter.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/TableParameterListenerEventFilter.java index 2c46f936..d3be3b3d 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/TableParameterListenerEventFilter.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/TableParameterListenerEventFilter.java @@ -37,4 +37,3 @@ public boolean isFiltered(ListenerEvent listenerEvent, LifecycleEventType lifecy return !Boolean.parseBoolean(tableParameters.get(lifecycleEventType.getTableParameterName())); } } - diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index e7722530..fc27e5be 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.util.EnumMap; import java.util.List; -import java.util.Map; import java.util.Optional; import org.slf4j.Logger; @@ -29,16 +28,12 @@ import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.Transactional; -import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.error.BeekeeperException; -import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; import com.expediagroup.beekeeper.core.model.HousekeepingEntity; import com.expediagroup.beekeeper.core.model.LifecycleEventType; import com.expediagroup.beekeeper.scheduler.apiary.messaging.BeekeeperEventReader; -import com.expediagroup.beekeeper.scheduler.apiary.messaging.MessageReaderAdapter; import com.expediagroup.beekeeper.scheduler.apiary.model.BeekeeperEvent; import com.expediagroup.beekeeper.scheduler.service.SchedulerService; -import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; @Component public class SchedulerApiary { @@ -47,17 +42,14 @@ public class SchedulerApiary { private final BeekeeperEventReader beekeeperEventReader; private final EnumMap schedulerServiceMap; - private final IcebergValidator icebergValidator; @Autowired public SchedulerApiary( BeekeeperEventReader beekeeperEventReader, - EnumMap schedulerServiceMap, - IcebergValidator icebergValidator + EnumMap schedulerServiceMap ) { this.beekeeperEventReader = beekeeperEventReader; this.schedulerServiceMap = schedulerServiceMap; - this.icebergValidator = icebergValidator; } @Transactional @@ -69,13 +61,9 @@ public void scheduleBeekeeperEvent() { for (HousekeepingEntity entity : housekeepingEntities) { try { - icebergValidator.throwExceptionIfIceberg(entity.getDatabaseName(), entity.getTableName()); - LifecycleEventType eventType = LifecycleEventType.valueOf(entity.getLifecycleType()); SchedulerService scheduler = schedulerServiceMap.get(eventType); scheduler.scheduleForHousekeeping(entity); - } catch (BeekeeperIcebergException e) { - log.warn("Iceberg table are not supported in Beekeeper. Deleting message from queue", e); } catch (Exception e) { throw new BeekeeperException(format( "Unable to schedule %s deletion for entity, this message will go back on the queue", diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java index 351c3562..73ab2b6d 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java @@ -20,7 +20,6 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.fail; import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.lenient; import static org.mockito.Mockito.mock; @@ -44,9 +43,7 @@ import com.expedia.apiary.extensions.receiver.common.messaging.MessageEvent; -import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator; import com.expediagroup.beekeeper.core.error.BeekeeperException; -import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; import com.expediagroup.beekeeper.core.model.HousekeepingEntity; import com.expediagroup.beekeeper.core.model.HousekeepingMetadata; import com.expediagroup.beekeeper.core.model.HousekeepingPath; @@ -65,7 +62,6 @@ public class SchedulerApiaryTest { @Mock private BeekeeperEventReader beekeeperEventReader; @Mock private HousekeepingPath path; @Mock private HousekeepingMetadata table; - @Mock private IcebergValidator icebergValidator; private SchedulerApiary scheduler; @@ -74,7 +70,7 @@ public void init() { EnumMap schedulerMap = new EnumMap<>(LifecycleEventType.class); schedulerMap.put(UNREFERENCED, pathSchedulerService); schedulerMap.put(EXPIRED, tableSchedulerService); - scheduler = new SchedulerApiary(beekeeperEventReader, schedulerMap, icebergValidator); + scheduler = new SchedulerApiary(beekeeperEventReader, schedulerMap); } @Test @@ -82,7 +78,6 @@ public void typicalPathSchedule() { Optional event = Optional.of(newHousekeepingEvent(path, UNREFERENCED)); when(beekeeperEventReader.read()).thenReturn(event); scheduler.scheduleBeekeeperEvent(); - verify(icebergValidator).throwExceptionIfIceberg(path.getDatabaseName(), path.getTableName()); verify(pathSchedulerService).scheduleForHousekeeping(path); verifyNoInteractions(tableSchedulerService); verify(beekeeperEventReader).delete(event.get()); @@ -94,7 +89,6 @@ public void typicalTableSchedule() { when(beekeeperEventReader.read()).thenReturn(event); scheduler.scheduleBeekeeperEvent(); - verify(icebergValidator).throwExceptionIfIceberg(table.getDatabaseName(), table.getTableName()); verify(tableSchedulerService).scheduleForHousekeeping(table); verifyNoInteractions(pathSchedulerService); verify(beekeeperEventReader).delete(event.get()); @@ -105,7 +99,6 @@ public void typicalNoSchedule() { when(beekeeperEventReader.read()).thenReturn(Optional.empty()); scheduler.scheduleBeekeeperEvent(); - verifyNoInteractions(icebergValidator); verifyNoInteractions(pathSchedulerService); verifyNoInteractions(tableSchedulerService); verify(beekeeperEventReader, times(0)).delete(any()); @@ -121,7 +114,6 @@ public void housekeepingPathRepositoryThrowsException() { scheduler.scheduleBeekeeperEvent(); fail("Should have thrown exception"); } catch (Exception e) { - verify(icebergValidator).throwExceptionIfIceberg(path.getDatabaseName(), path.getTableName()); verify(pathSchedulerService).scheduleForHousekeeping(path); verify(beekeeperEventReader, times(0)).delete(any()); verifyNoInteractions(tableSchedulerService); @@ -142,7 +134,6 @@ public void housekeepingTableRepositoryThrowsException() { scheduler.scheduleBeekeeperEvent(); fail("Should have thrown exception"); } catch (Exception e) { - verify(icebergValidator).throwExceptionIfIceberg(table.getDatabaseName(), table.getTableName()); verify(tableSchedulerService).scheduleForHousekeeping(table); verify(beekeeperEventReader, times(0)).delete(any()); verifyNoInteractions(pathSchedulerService); @@ -153,26 +144,6 @@ public void housekeepingTableRepositoryThrowsException() { } } - @Test - public void icebergValidatorThrowsException() { - String databaseName = "database"; - String tableName = "table"; - when(path.getDatabaseName()).thenReturn(databaseName); - when(path.getTableName()).thenReturn(tableName); - Optional event = Optional.of(newHousekeepingEvent(path, UNREFERENCED)); - when(beekeeperEventReader.read()).thenReturn(event); - - doThrow(new BeekeeperIcebergException("Iceberg table")) - .when(icebergValidator).throwExceptionIfIceberg(eq(databaseName), eq(tableName)); - - scheduler.scheduleBeekeeperEvent(); - - verify(icebergValidator).throwExceptionIfIceberg(databaseName, tableName); - verifyNoInteractions(pathSchedulerService); - verifyNoInteractions(tableSchedulerService); - verify(beekeeperEventReader).delete(event.get()); - } - @Test public void typicalClose() throws Exception { scheduler.close(); From ae265192c138ac4d8ce299f304bf771ec9325e24 Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Wed, 27 Nov 2024 11:56:48 +0100 Subject: [PATCH 46/65] fix validator tests --- .../cleanup/validation/IcebergValidator.java | 3 +- .../validation/IcebergValidatorTest.java | 40 ++++--------------- 2 files changed, 9 insertions(+), 34 deletions(-) diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java index 55fb07aa..048a3ba5 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java @@ -48,9 +48,8 @@ public void throwExceptionIfIceberg(String databaseName, String tableName) { try (CleanerClient client = cleanerClientFactory.newInstance()) { Map parameters = client.getTableProperties(databaseName, tableName); String tableType = parameters.getOrDefault("table_type", "").toLowerCase(); - String format = parameters.getOrDefault("format", "").toLowerCase(); String metadataLocation = parameters.getOrDefault("metadata_location", "").toLowerCase(); - if (tableType.contains("iceberg") || format.contains("iceberg") || !metadataLocation.isEmpty()) { + if (tableType.contains("iceberg") || !metadataLocation.isEmpty()) { throw new BeekeeperIcebergException( format("Iceberg table %s.%s is not currently supported in Beekeeper.", databaseName, tableName)); } diff --git a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java index dcaf308c..84eb88f8 100644 --- a/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java +++ b/beekeeper-cleanup/src/test/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidatorTest.java @@ -15,6 +15,7 @@ */ package com.expediagroup.beekeeper.cleanup.validation; +import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; @@ -49,7 +50,6 @@ public void shouldThrowExceptionWhenTableTypeIsIceberg() throws Exception { properties.put("table_type", "ICEBERG"); when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties); - when(cleanerClient.getOutputFormat("db", "table")).thenReturn(""); icebergValidator.throwExceptionIfIceberg("db", "table"); verify(cleanerClientFactory).newInstance(); @@ -57,12 +57,11 @@ public void shouldThrowExceptionWhenTableTypeIsIceberg() throws Exception { } @Test(expected = BeekeeperIcebergException.class) - public void shouldThrowExceptionWhenFormatIsIceberg() throws Exception { + public void shouldThrowExceptionWhenMetadataIsIceberg() throws Exception { Map properties = new HashMap<>(); - properties.put("format", "iceberg"); + properties.put("metadata_location", "s3://db/table/metadata/0000.json"); when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties); - when(cleanerClient.getOutputFormat("db", "table")).thenReturn(""); icebergValidator.throwExceptionIfIceberg("db", "table"); } @@ -73,44 +72,21 @@ public void shouldNotThrowExceptionForNonIcebergTable() throws Exception { properties.put("table_type", "HIVE_TABLE"); when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties); - when(cleanerClient.getOutputFormat("db", "table")) - .thenReturn("org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"); icebergValidator.throwExceptionIfIceberg("db", "table"); verify(cleanerClientFactory).newInstance(); verify(cleanerClient).close(); } - @Test(expected = BeekeeperIcebergException.class) - public void shouldThrowExceptionWhenOutputFormatContainsIceberg() throws Exception { - Map properties = new HashMap<>(); - - when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties); - when(cleanerClient.getOutputFormat("db", "table")) - .thenReturn("org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"); - - icebergValidator.throwExceptionIfIceberg("db", "table"); - } - - @Test(expected = BeekeeperIcebergException.class) - public void shouldThrowExceptionWhenFormatIsNullButTableTypeIsIceberg() throws Exception { - Map properties = new HashMap<>(); - properties.put("table_type", "ICEBERG"); - - when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties); - when(cleanerClient.getOutputFormat("db", "table")).thenReturn(""); - - icebergValidator.throwExceptionIfIceberg("db", "table"); - } - @Test - public void shouldNotThrowExceptionWhenOutputFormatIsNull() throws Exception { + public void shouldThrowExceptionWhenOutputFormatIsNull() throws Exception { Map properties = new HashMap<>(); - properties.put("table_type", "HIVE_TABLE"); + properties.put("table_type", null); + properties.put("metadata_location", null); when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties); - when(cleanerClient.getOutputFormat("db", "table")).thenReturn(null); - icebergValidator.throwExceptionIfIceberg("db", "table"); + assertThatThrownBy(() -> icebergValidator.throwExceptionIfIceberg("db", "table")).isInstanceOf( + BeekeeperIcebergException.class); } } From c2e0b3f4bc2807b59a3f251b5ec2499b8afc8ef2 Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Wed, 27 Nov 2024 12:06:21 +0100 Subject: [PATCH 47/65] clean up it tests --- .../BeekeeperMetadataCleanupIntegrationTest.java | 6 +----- .../beekeeper/integration/utils/HiveTestUtils.java | 14 +++++--------- .../metadata/cleanup/context/CommonBeans.java | 2 +- .../metadata/cleanup/context/CommonBeansTest.java | 2 +- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java index aed57e4f..e45ee0b9 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperMetadataCleanupIntegrationTest.java @@ -69,8 +69,6 @@ import com.google.common.collect.ImmutableMap; import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter; -import com.expediagroup.beekeeper.core.model.HousekeepingPath; -import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.integration.utils.ContainerTestUtils; import com.expediagroup.beekeeper.integration.utils.HiveTestUtils; import com.expediagroup.beekeeper.metadata.cleanup.BeekeeperMetadataCleanup; @@ -233,11 +231,9 @@ public void cleanupPartitionedTable() throws Exception { public void shouldSkipCleanupForIcebergTable() throws Exception { Map tableProperties = new HashMap<>(); tableProperties.put("table_type", "ICEBERG"); - tableProperties.put("format", "ICEBERG/PARQUET"); - String outputFormat = "org.apache.iceberg.mr.hive.HiveIcebergOutputFormat"; hiveTestUtils.createTableWithProperties( - PARTITIONED_TABLE_PATH, TABLE_NAME_VALUE, true, tableProperties, outputFormat, true); + PARTITIONED_TABLE_PATH, TABLE_NAME_VALUE, true, tableProperties, true); amazonS3.putObject(BUCKET, PARTITIONED_TABLE_OBJECT_KEY, TABLE_DATA); insertExpiredMetadata(PARTITIONED_TABLE_PATH, null); diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java index 222fafdd..b8d66219 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/utils/HiveTestUtils.java @@ -58,7 +58,7 @@ public Table createTable(String path, String tableName, boolean partitioned) thr } public Table createTable(String path, String tableName, boolean partitioned, boolean withBeekeeperProperty) - throws TException { + throws TException { Table hiveTable = new Table(); hiveTable.setDbName(DATABASE_NAME_VALUE); hiveTable.setTableName(tableName); @@ -89,7 +89,7 @@ public Table createTable(String path, String tableName, boolean partitioned, boo * @param hiveTable Table to add partitions to * @param partitionValues The list of partition values, e.g. ["2020-01-01", "0", "A"] * @throws Exception May be thrown if there is a problem when trying to write the data to the file, or when the client - * adds the partition to the table. + * adds the partition to the table. */ public void addPartitionsToTable(String path, Table hiveTable, List partitionValues) throws Exception { String eventDate = "/event_date=" + partitionValues.get(0); // 2020-01-01 @@ -113,7 +113,8 @@ private Partition newTablePartition(Table hiveTable, List values, URI lo return partition; } - public Table createTableWithProperties(String path, String tableName, boolean partitioned, Map tableProperties, String outputFormat, boolean withBeekeeperProperty) + public Table createTableWithProperties(String path, String tableName, boolean partitioned, + Map tableProperties, boolean withBeekeeperProperty) throws TException { Table hiveTable = new Table(); hiveTable.setDbName(DATABASE_NAME_VALUE); @@ -134,12 +135,7 @@ public Table createTableWithProperties(String path, String tableName, boolean pa sd.setCols(DATA_COLUMNS); sd.setLocation(path); sd.setParameters(new HashMap<>()); - - if (outputFormat != null) { - sd.setOutputFormat(outputFormat); - } else { - sd.setOutputFormat(TextOutputFormat.class.getName()); - } + sd.setOutputFormat(TextOutputFormat.class.getName()); sd.setSerdeInfo(new SerDeInfo()); sd.getSerdeInfo().setSerializationLib("org.apache.hadoop.hive.serde2.OpenCSVSerde"); hiveTable.setSd(sd); diff --git a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java index b5ddbb2a..60518cd0 100644 --- a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java +++ b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeans.java @@ -144,7 +144,7 @@ public S3Client s3Client(AmazonS3 amazonS3, @Value("${properties.dry-run-enabled @Bean(name = "s3PathCleaner") PathCleaner pathCleaner( S3Client s3Client, - BytesDeletedReporter bytesDeletedReporter, IcebergValidator icebergValidator) { + BytesDeletedReporter bytesDeletedReporter) { return new S3PathCleaner(s3Client, new S3SentinelFilesCleaner(s3Client), bytesDeletedReporter); } diff --git a/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeansTest.java b/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeansTest.java index e5659ea9..e2323868 100644 --- a/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeansTest.java +++ b/beekeeper-metadata-cleanup/src/test/java/com/expediagroup/beekeeper/metadata/cleanup/context/CommonBeansTest.java @@ -154,7 +154,7 @@ public void verifyS3Client() { void verifyS3pathCleaner() { BytesDeletedReporter reporter = commonBeans.bytesDeletedReporter(meterRegistry, false); S3Client s3Client = commonBeans.s3Client(commonBeans.amazonS3(), false); - PathCleaner pathCleaner = commonBeans.pathCleaner(s3Client, reporter, icebergValidator); + PathCleaner pathCleaner = commonBeans.pathCleaner(s3Client, reporter); assertThat(pathCleaner).isInstanceOf(S3PathCleaner.class); } From 07174b27f8eceb5fe1e8ed6bb8c4799307a1c177 Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Wed, 27 Nov 2024 12:19:12 +0100 Subject: [PATCH 48/65] change expired metadata handler --- .../handler/ExpiredMetadataHandler.java | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java index 9a0f628e..6d0a1df9 100644 --- a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java +++ b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java @@ -32,7 +32,7 @@ import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory; import com.expediagroup.beekeeper.cleanup.metadata.MetadataCleaner; import com.expediagroup.beekeeper.cleanup.path.PathCleaner; -import com.expediagroup.beekeeper.core.error.BeekeeperException; +import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; import com.expediagroup.beekeeper.core.model.HousekeepingMetadata; import com.expediagroup.beekeeper.core.model.HousekeepingStatus; import com.expediagroup.beekeeper.core.repository.HousekeepingMetadataRepository; @@ -78,6 +78,11 @@ public void cleanupMetadata(HousekeepingMetadata housekeepingMetadata, LocalDate if (deleted && !dryRunEnabled) { updateAttemptsAndStatus(housekeepingMetadata, DELETED); } + } catch (BeekeeperIcebergException e) { + updateAttemptsAndStatus(housekeepingMetadata, SKIPPED); + log + .warn("Table \"{}.{}\" is skipped because is iceberg or could not be identified ", + housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName(), e); } catch (Exception e) { updateAttemptsAndStatus(housekeepingMetadata, FAILED); log @@ -114,21 +119,18 @@ private boolean cleanUpTable(CleanerClient client, HousekeepingMetadata housekee String tableName = housekeepingMetadata.getTableName(); log.info("Cleaning up metadata for \"{}.{}\"", databaseName, tableName); if (metadataCleaner.tableExists(client, databaseName, tableName)) { - try { - metadataCleaner.dropTable(housekeepingMetadata, client); - pathCleaner.cleanupPath(housekeepingMetadata); - } catch (BeekeeperException e) { - log.warn("Skipping cleanup for Iceberg table \"{}.{}\": {}", databaseName, tableName, e.getMessage()); - updateStatus(housekeepingMetadata, SKIPPED, dryRunEnabled); - return false; - } + metadataCleaner.dropTable(housekeepingMetadata, client); + pathCleaner.cleanupPath(housekeepingMetadata); } else { log.info("Cannot drop table \"{}.{}\". Table does not exist.", databaseName, tableName); } return true; } - private boolean cleanupPartition(CleanerClient client, HousekeepingMetadata housekeepingMetadata, boolean dryRunEnabled) { + private boolean cleanupPartition( + CleanerClient client, + HousekeepingMetadata housekeepingMetadata, + boolean dryRunEnabled) { if (!S3PathValidator.validPartitionPath(housekeepingMetadata.getPath())) { log.warn("Will not clean up partition path \"{}\" because it is not valid.", housekeepingMetadata.getPath()); updateStatus(housekeepingMetadata, SKIPPED, dryRunEnabled); @@ -136,20 +138,16 @@ private boolean cleanupPartition(CleanerClient client, HousekeepingMetadata hous } String databaseName = housekeepingMetadata.getDatabaseName(); String tableName = housekeepingMetadata.getTableName(); - log.info("Cleaning up metadata for partition \"{}\" in table \"{}.{}\"", housekeepingMetadata.getPartitionName(), databaseName, tableName); + log.info("Cleaning up metadata for \"{}.{}\"", databaseName, tableName); if (metadataCleaner.tableExists(client, databaseName, tableName)) { - try { - boolean partitionDeleted = metadataCleaner.dropPartition(housekeepingMetadata, client); - if (partitionDeleted) { - pathCleaner.cleanupPath(housekeepingMetadata); - } - } catch (BeekeeperException e) { - log.warn("Skipping cleanup for Iceberg partition \"{}\" in table \"{}.{}\": {}", housekeepingMetadata.getPartitionName(), databaseName, tableName, e.getMessage()); - updateStatus(housekeepingMetadata, SKIPPED, dryRunEnabled); - return false; + boolean partitionDeleted = metadataCleaner.dropPartition(housekeepingMetadata, client); + if (partitionDeleted) { + pathCleaner.cleanupPath(housekeepingMetadata); } } else { - log.info("Cannot drop partition \"{}\" from table \"{}.{}\". Table does not exist.", housekeepingMetadata.getPartitionName(), databaseName, tableName); + log + .info("Cannot drop partition \"{}\" from table \"{}.{}\". Table does not exist.", + housekeepingMetadata.getPartitionName(), databaseName, tableName); } return true; } From 58c6e6554886d54aa20df2ce48083da0fbfa8055 Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Wed, 27 Nov 2024 12:23:51 +0100 Subject: [PATCH 49/65] fix leninet --- .../scheduler/apiary/service/SchedulerApiaryTest.java | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java index 73ab2b6d..4f45aef2 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiaryTest.java @@ -21,8 +21,6 @@ import static org.assertj.core.api.Assertions.fail; import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.doThrow; -import static org.mockito.Mockito.lenient; -import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.verifyNoInteractions; @@ -39,6 +37,7 @@ import org.junit.jupiter.api.Test; import org.junit.jupiter.api.extension.ExtendWith; import org.mockito.Mock; +import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; import com.expedia.apiary.extensions.receiver.common.messaging.MessageEvent; @@ -88,7 +87,6 @@ public void typicalTableSchedule() { Optional event = Optional.of(newHousekeepingEvent(table, EXPIRED)); when(beekeeperEventReader.read()).thenReturn(event); scheduler.scheduleBeekeeperEvent(); - verify(tableSchedulerService).scheduleForHousekeeping(table); verifyNoInteractions(pathSchedulerService); verify(beekeeperEventReader).delete(event.get()); @@ -98,7 +96,6 @@ public void typicalTableSchedule() { public void typicalNoSchedule() { when(beekeeperEventReader.read()).thenReturn(Optional.empty()); scheduler.scheduleBeekeeperEvent(); - verifyNoInteractions(pathSchedulerService); verifyNoInteractions(tableSchedulerService); verify(beekeeperEventReader, times(0)).delete(any()); @@ -152,9 +149,7 @@ public void typicalClose() throws Exception { private BeekeeperEvent newHousekeepingEvent(HousekeepingEntity housekeepingEntity, LifecycleEventType lifecycleEventType) { - lenient().when(housekeepingEntity.getLifecycleType()).thenReturn(lifecycleEventType.name()); - when(housekeepingEntity.getDatabaseName()).thenReturn("database"); - when(housekeepingEntity.getTableName()).thenReturn("table"); - return new BeekeeperEvent(List.of(housekeepingEntity), mock(MessageEvent.class)); + when(housekeepingEntity.getLifecycleType()).thenReturn(lifecycleEventType.name()); + return new BeekeeperEvent(List.of(housekeepingEntity), Mockito.mock(MessageEvent.class)); } } From a32e9d0ac4d47b61e44067daa202edf35a3400e4 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 27 Nov 2024 11:46:04 +0000 Subject: [PATCH 50/65] Add IcebergTableListenerEventFilter --- .../scheduler/apiary/context/CommonBeans.java | 7 +- .../IcebergTableListenerEventFilter.java | 58 +++++++++ .../apiary/handler/MessageEventHandler.java | 4 + .../apiary/context/CommonBeansTest.java | 17 +++ .../IcebergTableListenerEventFilterTest.java | 111 ++++++++++++++++++ 5 files changed, 195 insertions(+), 2 deletions(-) create mode 100644 beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java create mode 100644 beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java index 492017c6..8bfbae44 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeans.java @@ -39,6 +39,7 @@ import com.expediagroup.beekeeper.core.model.LifecycleEventType; import com.expediagroup.beekeeper.scheduler.apiary.filter.EventTypeListenerEventFilter; +import com.expediagroup.beekeeper.scheduler.apiary.filter.IcebergTableListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.filter.ListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.filter.LocationOnlyUpdateListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.filter.TableParameterListenerEventFilter; @@ -96,7 +97,8 @@ public MessageEventHandler unreferencedHousekeepingPathMessageEventHandler( new EventTypeListenerEventFilter(eventClasses), new LocationOnlyUpdateListenerEventFilter(), new TableParameterListenerEventFilter(), - new WhitelistedListenerEventFilter() + new WhitelistedListenerEventFilter(), + new IcebergTableListenerEventFilter() ); return new MessageEventHandler(generator, filters); @@ -120,7 +122,8 @@ public MessageEventHandler expiredHousekeepingMetadataMessageEventHandler( List filters = List.of( new EventTypeListenerEventFilter(eventClasses), - new TableParameterListenerEventFilter() + new TableParameterListenerEventFilter(), + new IcebergTableListenerEventFilter() ); return new MessageEventHandler(generator, filters); diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java new file mode 100644 index 00000000..8edeb381 --- /dev/null +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java @@ -0,0 +1,58 @@ +/** + * Copyright (C) 2019-2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.expediagroup.beekeeper.scheduler.apiary.filter; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.springframework.stereotype.Component; + +import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; +import com.expediagroup.beekeeper.core.model.LifecycleEventType; + +import java.util.Locale; +import java.util.Map; + +@Component +public class IcebergTableListenerEventFilter implements ListenerEventFilter { + + private static final Logger log = LogManager.getLogger(IcebergTableListenerEventFilter.class); + + private static final String METADATA_LOCATION_KEY = "metadata_location"; + private static final String TABLE_TYPE_KEY = "table_type"; + private static final String TABLE_TYPE_ICEBERG_VALUE = "iceberg"; + + @Override + public boolean isFiltered(ListenerEvent event, LifecycleEventType type) { + Map tableParameters = event.getTableParameters(); + + if (tableParameters != null) { + String metadataLocation = tableParameters.getOrDefault(METADATA_LOCATION_KEY,null); + String tableType = tableParameters.getOrDefault(TABLE_TYPE_KEY,null); + + boolean hasMetadataLocation = metadataLocation != null && !metadataLocation.trim().isEmpty(); + boolean isIcebergType = tableType != null && tableType.toLowerCase().contains(TABLE_TYPE_ICEBERG_VALUE); + + if (hasMetadataLocation || isIcebergType) { + log.info("Iceberg table '{}.{}' is not currently supported in Beekeeper.", + event.getDbName(), event.getTableName()); + return true; + } + } + return false; + } +} + + diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/handler/MessageEventHandler.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/handler/MessageEventHandler.java index 44f0e2f2..d07912b2 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/handler/MessageEventHandler.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/handler/MessageEventHandler.java @@ -58,4 +58,8 @@ private boolean shouldFilterMessage(ListenerEvent listenerEvent) { private List generateHousekeepingEntities(ListenerEvent listenerEvent) { return generator.generate(listenerEvent, CLIENT_ID); } + + public List getFilters() { + return filters; + } } diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeansTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeansTest.java index 2aa5f17b..cd1f776a 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeansTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/context/CommonBeansTest.java @@ -20,6 +20,7 @@ import java.util.Collections; import java.util.EnumMap; +import java.util.List; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeEach; @@ -32,6 +33,8 @@ import com.expedia.apiary.extensions.receiver.sqs.messaging.SqsMessageReader; import com.expediagroup.beekeeper.core.model.LifecycleEventType; +import com.expediagroup.beekeeper.scheduler.apiary.filter.IcebergTableListenerEventFilter; +import com.expediagroup.beekeeper.scheduler.apiary.filter.ListenerEventFilter; import com.expediagroup.beekeeper.scheduler.apiary.generator.ExpiredHousekeepingMetadataGenerator; import com.expediagroup.beekeeper.scheduler.apiary.generator.HousekeepingEntityGenerator; import com.expediagroup.beekeeper.scheduler.apiary.generator.UnreferencedHousekeepingPathGenerator; @@ -117,4 +120,18 @@ public void validatePathEventReader() { mock(MessageEventHandler.class)); assertThat(reader).isInstanceOf(BeekeeperEventReader.class); } + + @Test + public void validateUnreferencedHousekeepingPathMessageEventHandlerIncludesIcebergFilter() { + MessageEventHandler handler = commonBeans.unreferencedHousekeepingPathMessageEventHandler(unreferencedHousekeepingPathGenerator); + List filters = handler.getFilters(); + assertThat(filters).hasAtLeastOneElementOfType(IcebergTableListenerEventFilter.class); + } + + @Test + public void validateExpiredHousekeepingMetadataMessageEventHandlerIncludesIcebergFilter() { + MessageEventHandler handler = commonBeans.expiredHousekeepingMetadataMessageEventHandler(expiredHousekeepingMetadataGenerator); + List filters = handler.getFilters(); + assertThat(filters).hasAtLeastOneElementOfType(IcebergTableListenerEventFilter.class); + } } diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java new file mode 100644 index 00000000..98be98f5 --- /dev/null +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java @@ -0,0 +1,111 @@ +/** + * Copyright (C) 2019-2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.expediagroup.beekeeper.scheduler.apiary.filter; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.HashMap; +import java.util.Map; + +import org.junit.jupiter.api.Test; + +import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; +import com.expediagroup.beekeeper.core.model.LifecycleEventType; + +public class IcebergTableListenerEventFilterTest { + + private final IcebergTableListenerEventFilter filter = new IcebergTableListenerEventFilter(); + + @Test + public void shouldFilterWhenTableTypeIsIceberg() { + ListenerEvent event = createListenerEventWithTableType("ICEBERG"); + boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); + assertThat(isFiltered).isTrue(); + } + + @Test + public void shouldNotFilterWhenTableTypeIsNotIceberg() { + ListenerEvent event = createListenerEventWithTableType("HIVE"); + boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); + assertThat(isFiltered).isFalse(); + } + + @Test + public void shouldFilterWhenTableTypeIsIcebergIgnoreCase() { + ListenerEvent event = createListenerEventWithTableType("iceberg"); + boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); + assertThat(isFiltered).isTrue(); + } + + @Test + public void shouldFilterWhenMetadataLocationIsPresent() { + ListenerEvent event = createListenerEventWithMetadataLocation("s3://example/path/to/metadata"); + boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); + assertThat(isFiltered).isTrue(); + } + + @Test + public void shouldNotFilterWhenMetadataLocationIsEmpty() { + ListenerEvent event = createListenerEventWithMetadataLocation(""); + boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); + assertThat(isFiltered).isFalse(); + } + + @Test + public void shouldNotFilterWhenMetadataLocationIsNull() { + ListenerEvent event = createListenerEventWithMetadataLocation(null); + boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); + assertThat(isFiltered).isFalse(); + } + + @Test + public void shouldHandleNullTableParameters() { + ListenerEvent event = createListenerEventWithTableParameters(null); + boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); + assertThat(isFiltered).isFalse(); + } + + private ListenerEvent createListenerEventWithTableType(String tableType) { + Map tableParameters = new HashMap<>(); + tableParameters.put("table_type", tableType); + return createListenerEventWithTableParameters(tableParameters); + } + + private ListenerEvent createListenerEventWithMetadataLocation(String metadataLocation) { + Map tableParameters = new HashMap<>(); + tableParameters.put("metadata_location", metadataLocation); + return createListenerEventWithTableParameters(tableParameters); + } + + private ListenerEvent createListenerEventWithTableParameters(Map tableParameters) { + return new ListenerEvent() { + @Override + public String getDbName() { + return "test_db"; + } + + @Override + public String getTableName() { + return "test_table"; + } + + @Override + public Map getTableParameters() { + return tableParameters; + } + }; + } +} From 9a93bd77e3eb352607623ec42c870d372aefa450 Mon Sep 17 00:00:00 2001 From: javsanbel2 Date: Wed, 27 Nov 2024 12:58:38 +0100 Subject: [PATCH 51/65] add event --- ...redMetadataSchedulerApiaryIntegrationTest.java | 15 +++++++++++++++ .../integration/model/AlterTableSqsMessage.java | 15 +++++++++++++++ .../beekeeper/integration/model/SqsMessage.java | 6 ++++++ 3 files changed, 36 insertions(+) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java index ccbf19c6..bfdc5c58 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java @@ -213,6 +213,21 @@ public void expiredMetadataMultipleAlterPartitionTableEvents() throws SQLExcepti assertExpiredMetadata(expiredMetadata.get(1), LOCATION_B, PARTITION_B_NAME); } + @Test + public void expiredMetadataAlterIcebergTableEventShouldBeIgnored() + throws SQLException, IOException, URISyntaxException { + insertExpiredMetadata(LOCATION_A + "-old", null); + + AlterTableSqsMessage alterTableSqsMessage = new AlterTableSqsMessage(LOCATION_A, true, true); + amazonSQS.sendMessage(sendMessageRequest(alterTableSqsMessage.getFormattedString())); + + await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUpdatedExpiredMetadataRowCount() == 1); + + // iceberg table event should be ignored + List expiredMetadata = getExpiredMetadata(); + assertThat(expiredMetadata.size()).isEqualTo(0); + } + @Test public void healthCheck() { CloseableHttpClient client = HttpClientBuilder.create().build(); diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/AlterTableSqsMessage.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/AlterTableSqsMessage.java index 9fbff756..2a4e52b3 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/AlterTableSqsMessage.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/AlterTableSqsMessage.java @@ -51,6 +51,21 @@ public AlterTableSqsMessage( setExpired(isExpired); } + public AlterTableSqsMessage( + String tableLocation, + boolean isExpired, + boolean isIceberg + ) throws IOException, URISyntaxException { + super(ALTER_TABLE); + setTableLocation(tableLocation); + setOldTableLocation(DUMMY_LOCATION); + setOldTableName(TABLE_NAME_VALUE); + setExpired(isExpired); + if (isIceberg) { + setIceberg(); + } + } + public void setOldTableLocation(String oldTableLocation) { apiaryEventMessageJsonObject.add(EVENT_TABLE_OLD_LOCATION_KEY, new JsonPrimitive(oldTableLocation)); } diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java index c51b2513..da3412ad 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/SqsMessage.java @@ -93,6 +93,12 @@ public void setExpired(boolean isExpired) { tableParameters.add(EXPIRED_DATA_RETENTION_PERIOD_PROPERTY_KEY, new JsonPrimitive(SHORT_CLEANUP_DELAY_VALUE)); } + public void setIceberg() { + JsonObject tableParameters = apiaryEventMessageJsonObject.getAsJsonObject(EVENT_TABLE_PARAMETERS_KEY); + tableParameters.add("table_format", new JsonPrimitive("ICEBERG")); + tableParameters.add("metadata_location", new JsonPrimitive("s3://bucket/metadata")); + } + public void setWhitelisted(boolean isWhitelisted) { String whitelist = isWhitelisted ? eventType.toString() : ""; JsonObject tableParameters = apiaryEventMessageJsonObject.getAsJsonObject(EVENT_TABLE_PARAMETERS_KEY); From db1352af1562a2f49615b2606b4e20eb7e353e52 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 27 Nov 2024 12:41:47 +0000 Subject: [PATCH 52/65] Add integration test for scheduler --- ...redMetadataSchedulerApiaryIntegrationTest.java | 12 ++++-------- .../integration/model/AlterTableSqsMessage.java | 15 --------------- .../integration/model/CreateTableSqsMessage.java | 13 +++++++++++++ 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java index bfdc5c58..66382669 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest.java @@ -214,16 +214,12 @@ public void expiredMetadataMultipleAlterPartitionTableEvents() throws SQLExcepti } @Test - public void expiredMetadataAlterIcebergTableEventShouldBeIgnored() - throws SQLException, IOException, URISyntaxException { - insertExpiredMetadata(LOCATION_A + "-old", null); - - AlterTableSqsMessage alterTableSqsMessage = new AlterTableSqsMessage(LOCATION_A, true, true); - amazonSQS.sendMessage(sendMessageRequest(alterTableSqsMessage.getFormattedString())); + public void expiredMetadataCreateIcebergTableEvent() throws SQLException, IOException, URISyntaxException { + CreateTableSqsMessage createTableSqsMessage = new CreateTableSqsMessage(LOCATION_A, true, true); + amazonSQS.sendMessage(sendMessageRequest(createTableSqsMessage.getFormattedString())); - await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getUpdatedExpiredMetadataRowCount() == 1); + await().atMost(TIMEOUT, TimeUnit.SECONDS).until(() -> getExpiredMetadataRowCount() == 0); - // iceberg table event should be ignored List expiredMetadata = getExpiredMetadata(); assertThat(expiredMetadata.size()).isEqualTo(0); } diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/AlterTableSqsMessage.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/AlterTableSqsMessage.java index 2a4e52b3..9fbff756 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/AlterTableSqsMessage.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/AlterTableSqsMessage.java @@ -51,21 +51,6 @@ public AlterTableSqsMessage( setExpired(isExpired); } - public AlterTableSqsMessage( - String tableLocation, - boolean isExpired, - boolean isIceberg - ) throws IOException, URISyntaxException { - super(ALTER_TABLE); - setTableLocation(tableLocation); - setOldTableLocation(DUMMY_LOCATION); - setOldTableName(TABLE_NAME_VALUE); - setExpired(isExpired); - if (isIceberg) { - setIceberg(); - } - } - public void setOldTableLocation(String oldTableLocation) { apiaryEventMessageJsonObject.add(EVENT_TABLE_OLD_LOCATION_KEY, new JsonPrimitive(oldTableLocation)); } diff --git a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/CreateTableSqsMessage.java b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/CreateTableSqsMessage.java index 7e6e31a0..17349ed3 100644 --- a/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/CreateTableSqsMessage.java +++ b/beekeeper-integration-tests/src/test/java/com/expediagroup/beekeeper/integration/model/CreateTableSqsMessage.java @@ -30,4 +30,17 @@ public CreateTableSqsMessage( setTableLocation(tableLocation); setExpired(isExpired); } + + public CreateTableSqsMessage( + String tableLocation, + boolean isIceberg, + boolean isExpired + ) throws IOException, URISyntaxException { + super(CREATE_TABLE); + setTableLocation(tableLocation); + setExpired(isExpired); + if (isIceberg) { + setIceberg(); + } + } } From f300e60cf2ccf1085ade56a158fa2a6e27ec5b8b Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 27 Nov 2024 15:03:52 +0000 Subject: [PATCH 53/65] Revert versions used for testing & changelog --- CHANGELOG.md | 5 +++++ beekeeper-api/pom.xml | 2 +- beekeeper-cleanup/pom.xml | 2 +- beekeeper-core/pom.xml | 2 +- beekeeper-integration-tests/pom.xml | 2 +- beekeeper-metadata-cleanup/pom.xml | 2 +- beekeeper-path-cleanup/pom.xml | 2 +- beekeeper-scheduler-apiary/pom.xml | 2 +- .../apiary/filter/IcebergTableListenerEventFilter.java | 8 ++++---- .../filter/IcebergTableListenerEventFilterTest.java | 7 ------- beekeeper-scheduler/pom.xml | 2 +- beekeeper-vacuum-tool/pom.xml | 2 +- 12 files changed, 18 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72016af8..c1a70d80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.6.0] - 2024-10-27 +### Added +- Added filter for Iceberg tables in `beekeeper-scheduler-apiary` to prevent scheduling paths and metadata for deletion. +- Added `IcebergValidator` to ensure Iceberg tables are identified and excluded from cleanup operations. + ## [3.5.7] - 2024-10-25 ### Changed - Added error handling for bad requests with incorrect sort parameters. diff --git a/beekeeper-api/pom.xml b/beekeeper-api/pom.xml index 85fc8b81..6ea88ffb 100644 --- a/beekeeper-api/pom.xml +++ b/beekeeper-api/pom.xml @@ -4,7 +4,7 @@ beekeeper-parent com.expediagroup - 3.5.800-SNAPSHOT + 3.5.8-SNAPSHOT beekeeper-api diff --git a/beekeeper-cleanup/pom.xml b/beekeeper-cleanup/pom.xml index d32e498a..e3f59c07 100644 --- a/beekeeper-cleanup/pom.xml +++ b/beekeeper-cleanup/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.800-SNAPSHOT + 3.5.8-SNAPSHOT beekeeper-cleanup diff --git a/beekeeper-core/pom.xml b/beekeeper-core/pom.xml index 4c3914ad..58e3760d 100644 --- a/beekeeper-core/pom.xml +++ b/beekeeper-core/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.800-SNAPSHOT + 3.5.8-SNAPSHOT beekeeper-core diff --git a/beekeeper-integration-tests/pom.xml b/beekeeper-integration-tests/pom.xml index 65c8ef69..98c1f209 100644 --- a/beekeeper-integration-tests/pom.xml +++ b/beekeeper-integration-tests/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.800-SNAPSHOT + 3.5.8-SNAPSHOT beekeeper-integration-tests diff --git a/beekeeper-metadata-cleanup/pom.xml b/beekeeper-metadata-cleanup/pom.xml index 499d00c0..daa61483 100644 --- a/beekeeper-metadata-cleanup/pom.xml +++ b/beekeeper-metadata-cleanup/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.800-SNAPSHOT + 3.5.8-SNAPSHOT beekeeper-metadata-cleanup diff --git a/beekeeper-path-cleanup/pom.xml b/beekeeper-path-cleanup/pom.xml index ac1a2723..ae9c43f3 100644 --- a/beekeeper-path-cleanup/pom.xml +++ b/beekeeper-path-cleanup/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.800-SNAPSHOT + 3.5.8-SNAPSHOT beekeeper-path-cleanup diff --git a/beekeeper-scheduler-apiary/pom.xml b/beekeeper-scheduler-apiary/pom.xml index bb0b2a7d..258c3d24 100644 --- a/beekeeper-scheduler-apiary/pom.xml +++ b/beekeeper-scheduler-apiary/pom.xml @@ -6,7 +6,7 @@ beekeeper-parent com.expediagroup - 3.5.800-SNAPSHOT + 3.5.8-SNAPSHOT beekeeper-scheduler-apiary diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java index 8edeb381..5b34689f 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java @@ -39,11 +39,11 @@ public boolean isFiltered(ListenerEvent event, LifecycleEventType type) { Map tableParameters = event.getTableParameters(); if (tableParameters != null) { - String metadataLocation = tableParameters.getOrDefault(METADATA_LOCATION_KEY,null); - String tableType = tableParameters.getOrDefault(TABLE_TYPE_KEY,null); + String metadataLocation = tableParameters.getOrDefault(METADATA_LOCATION_KEY,""); + String tableType = tableParameters.getOrDefault(TABLE_TYPE_KEY,""); - boolean hasMetadataLocation = metadataLocation != null && !metadataLocation.trim().isEmpty(); - boolean isIcebergType = tableType != null && tableType.toLowerCase().contains(TABLE_TYPE_ICEBERG_VALUE); + boolean hasMetadataLocation = !metadataLocation.trim().isEmpty(); + boolean isIcebergType = tableType.toLowerCase().contains(TABLE_TYPE_ICEBERG_VALUE); if (hasMetadataLocation || isIcebergType) { log.info("Iceberg table '{}.{}' is not currently supported in Beekeeper.", diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java index 98be98f5..8a57424b 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java @@ -64,13 +64,6 @@ public void shouldNotFilterWhenMetadataLocationIsEmpty() { assertThat(isFiltered).isFalse(); } - @Test - public void shouldNotFilterWhenMetadataLocationIsNull() { - ListenerEvent event = createListenerEventWithMetadataLocation(null); - boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); - assertThat(isFiltered).isFalse(); - } - @Test public void shouldHandleNullTableParameters() { ListenerEvent event = createListenerEventWithTableParameters(null); diff --git a/beekeeper-scheduler/pom.xml b/beekeeper-scheduler/pom.xml index 71ff5ad9..ccdaf7bc 100644 --- a/beekeeper-scheduler/pom.xml +++ b/beekeeper-scheduler/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.800-SNAPSHOT + 3.5.8-SNAPSHOT beekeeper-scheduler diff --git a/beekeeper-vacuum-tool/pom.xml b/beekeeper-vacuum-tool/pom.xml index 7399b317..eac17a57 100644 --- a/beekeeper-vacuum-tool/pom.xml +++ b/beekeeper-vacuum-tool/pom.xml @@ -5,7 +5,7 @@ beekeeper-parent com.expediagroup - 3.5.800-SNAPSHOT + 3.5.8-SNAPSHOT beekeeper-vacuum-tool From bacd477129042ba4ceb2fb88a79332c618409ea1 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 27 Nov 2024 15:12:41 +0000 Subject: [PATCH 54/65] Revert testing version --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b10ec464..a2008896 100644 --- a/pom.xml +++ b/pom.xml @@ -10,7 +10,7 @@ beekeeper-parent - 3.5.800-SNAPSHOT + 3.5.8-SNAPSHOT Beekeeper is a service which manages the cleanup of tables and unreferenced S3 paths. 2019 pom From 04bb80691f838415dbba23d18986e4f6e5b68a05 Mon Sep 17 00:00:00 2001 From: Hamza Jugon <104994559+HamzaJugon@users.noreply.github.com> Date: Wed, 27 Nov 2024 15:29:48 +0000 Subject: [PATCH 55/65] Update beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java Co-authored-by: Jay Green-Stevens --- .../apiary/filter/IcebergTableListenerEventFilter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java index 5b34689f..5b0728fa 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java @@ -39,8 +39,8 @@ public boolean isFiltered(ListenerEvent event, LifecycleEventType type) { Map tableParameters = event.getTableParameters(); if (tableParameters != null) { - String metadataLocation = tableParameters.getOrDefault(METADATA_LOCATION_KEY,""); - String tableType = tableParameters.getOrDefault(TABLE_TYPE_KEY,""); + String metadataLocation = tableParameters.getOrDefault(METADATA_LOCATION_KEY, ""); + String tableType = tableParameters.getOrDefault(TABLE_TYPE_KEY, ""); boolean hasMetadataLocation = !metadataLocation.trim().isEmpty(); boolean isIcebergType = tableType.toLowerCase().contains(TABLE_TYPE_ICEBERG_VALUE); From f94ba5d2ec31c36a1b73149fb76eed4ba9198980 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 27 Nov 2024 15:37:15 +0000 Subject: [PATCH 56/65] Updating asserts and remove unused logging --- .../scheduler/apiary/service/SchedulerApiary.java | 2 -- .../IcebergTableListenerEventFilterTest.java | 15 ++++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index fc27e5be..3ad876a1 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -38,8 +38,6 @@ @Component public class SchedulerApiary { - private static final Logger log = LoggerFactory.getLogger(SchedulerApiary.class); - private final BeekeeperEventReader beekeeperEventReader; private final EnumMap schedulerServiceMap; diff --git a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java index 8a57424b..66dceebd 100644 --- a/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java +++ b/beekeeper-scheduler-apiary/src/test/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilterTest.java @@ -15,7 +15,8 @@ */ package com.expediagroup.beekeeper.scheduler.apiary.filter; -import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; import java.util.HashMap; import java.util.Map; @@ -33,42 +34,42 @@ public class IcebergTableListenerEventFilterTest { public void shouldFilterWhenTableTypeIsIceberg() { ListenerEvent event = createListenerEventWithTableType("ICEBERG"); boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); - assertThat(isFiltered).isTrue(); + assertTrue(isFiltered); } @Test public void shouldNotFilterWhenTableTypeIsNotIceberg() { ListenerEvent event = createListenerEventWithTableType("HIVE"); boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); - assertThat(isFiltered).isFalse(); + assertFalse(isFiltered); } @Test public void shouldFilterWhenTableTypeIsIcebergIgnoreCase() { ListenerEvent event = createListenerEventWithTableType("iceberg"); boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); - assertThat(isFiltered).isTrue(); + assertTrue(isFiltered); } @Test public void shouldFilterWhenMetadataLocationIsPresent() { ListenerEvent event = createListenerEventWithMetadataLocation("s3://example/path/to/metadata"); boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); - assertThat(isFiltered).isTrue(); + assertTrue(isFiltered); } @Test public void shouldNotFilterWhenMetadataLocationIsEmpty() { ListenerEvent event = createListenerEventWithMetadataLocation(""); boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); - assertThat(isFiltered).isFalse(); + assertFalse(isFiltered); } @Test public void shouldHandleNullTableParameters() { ListenerEvent event = createListenerEventWithTableParameters(null); boolean isFiltered = filter.isFiltered(event, LifecycleEventType.EXPIRED); - assertThat(isFiltered).isFalse(); + assertFalse(isFiltered); } private ListenerEvent createListenerEventWithTableType(String tableType) { From 5517c7f98c4ee8a0e07d9fd4f76d28518cb2f618 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 27 Nov 2024 16:15:10 +0000 Subject: [PATCH 57/65] Implement IsIcebergTablePredicate --- .../cleanup/validation/IcebergValidator.java | 10 ++- .../predicate/IsIcebergTablePredicate.java | 41 +++++++++ .../IsIcebergTablePredicateTest.java | 90 +++++++++++++++++++ .../IcebergTableListenerEventFilter.java | 25 ++---- 4 files changed, 143 insertions(+), 23 deletions(-) create mode 100644 beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/predicate/IsIcebergTablePredicate.java create mode 100644 beekeeper-core/src/test/java/com/expediagroup/beekeeper/core/predicate/IsIcebergTablePredicateTest.java diff --git a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java index 048a3ba5..7ec15a3f 100644 --- a/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java +++ b/beekeeper-cleanup/src/main/java/com/expediagroup/beekeeper/cleanup/validation/IcebergValidator.java @@ -25,15 +25,18 @@ import com.expediagroup.beekeeper.cleanup.metadata.CleanerClient; import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory; import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException; +import com.expediagroup.beekeeper.core.predicate.IsIcebergTablePredicate; public class IcebergValidator { private static final Logger log = LoggerFactory.getLogger(IcebergValidator.class); private final CleanerClientFactory cleanerClientFactory; + private final IsIcebergTablePredicate isIcebergTablePredicate; public IcebergValidator(CleanerClientFactory cleanerClientFactory) { this.cleanerClientFactory = cleanerClientFactory; + this.isIcebergTablePredicate = new IsIcebergTablePredicate(); } /** @@ -46,10 +49,9 @@ public IcebergValidator(CleanerClientFactory cleanerClientFactory) { */ public void throwExceptionIfIceberg(String databaseName, String tableName) { try (CleanerClient client = cleanerClientFactory.newInstance()) { - Map parameters = client.getTableProperties(databaseName, tableName); - String tableType = parameters.getOrDefault("table_type", "").toLowerCase(); - String metadataLocation = parameters.getOrDefault("metadata_location", "").toLowerCase(); - if (tableType.contains("iceberg") || !metadataLocation.isEmpty()) { + Map tableParameters = client.getTableProperties(databaseName, tableName); + + if (isIcebergTablePredicate.test(tableParameters)) { throw new BeekeeperIcebergException( format("Iceberg table %s.%s is not currently supported in Beekeeper.", databaseName, tableName)); } diff --git a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/predicate/IsIcebergTablePredicate.java b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/predicate/IsIcebergTablePredicate.java new file mode 100644 index 00000000..fb180d94 --- /dev/null +++ b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/predicate/IsIcebergTablePredicate.java @@ -0,0 +1,41 @@ +/** + * Copyright (C) 2019-2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.expediagroup.beekeeper.core.predicate; + +import java.util.Map; +import java.util.function.Predicate; + +public class IsIcebergTablePredicate implements Predicate> { + + private static final String METADATA_LOCATION_KEY = "metadata_location"; + private static final String TABLE_TYPE_KEY = "table_type"; + private static final String TABLE_TYPE_ICEBERG_VALUE = "iceberg"; + + @Override + public boolean test(Map tableParameters) { + if (tableParameters == null) { + return false; + } + + String metadataLocation = tableParameters.getOrDefault(METADATA_LOCATION_KEY, "").trim(); + String tableType = tableParameters.getOrDefault(TABLE_TYPE_KEY, ""); + + boolean hasMetadataLocation = !metadataLocation.isEmpty(); + boolean isIcebergType = tableType.toLowerCase().contains(TABLE_TYPE_ICEBERG_VALUE); + + return hasMetadataLocation || isIcebergType; + } +} diff --git a/beekeeper-core/src/test/java/com/expediagroup/beekeeper/core/predicate/IsIcebergTablePredicateTest.java b/beekeeper-core/src/test/java/com/expediagroup/beekeeper/core/predicate/IsIcebergTablePredicateTest.java new file mode 100644 index 00000000..fcd1b7dc --- /dev/null +++ b/beekeeper-core/src/test/java/com/expediagroup/beekeeper/core/predicate/IsIcebergTablePredicateTest.java @@ -0,0 +1,90 @@ +/** + * Copyright (C) 2019-2024 Expedia, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.expediagroup.beekeeper.core.predicate; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.HashMap; +import java.util.Map; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class IsIcebergTablePredicateTest { + + private IsIcebergTablePredicate predicate; + + @BeforeEach + void setUp() { + predicate = new IsIcebergTablePredicate(); + } + + @Test + void testNullTableParameters() { + assertFalse(predicate.test(null)); + } + + @Test + void testEmptyTableParameters() { + Map tableParameters = new HashMap<>(); + assertFalse(predicate.test(tableParameters)); + } + + @Test + void testNoMetadataLocationOrTableType() { + Map tableParameters = Map.of("some_key", "some_value"); + assertFalse(predicate.test(tableParameters)); + } + + @Test + void testHasMetadataLocation() { + Map tableParameters = Map.of("metadata_location", "some/location/path"); + assertTrue(predicate.test(tableParameters)); + } + + @Test + void testHasIcebergTableType() { + Map tableParameters = Map.of("table_type", "ICEBERG"); + assertTrue(predicate.test(tableParameters)); + } + + @Test + void testBothMetadataLocationAndTableType() { + Map tableParameters = Map.of( + "metadata_location", "some/location/path", + "table_type", "iceberg"); + assertTrue(predicate.test(tableParameters)); + } + + @Test + void testCaseInsensitiveIcebergType() { + Map tableParameters = Map.of("table_type", "IcEbErG"); + assertTrue(predicate.test(tableParameters)); + } + + @Test + void testWhitespaceInMetadataLocation() { + Map tableParameters = Map.of("metadata_location", " "); + assertFalse(predicate.test(tableParameters)); + } + + @Test + void testIrrelevantTableType() { + Map tableParameters = Map.of("table_type", "hive"); + assertFalse(predicate.test(tableParameters)); + } +} diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java index 5b0728fa..70a9fde2 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java @@ -21,38 +21,25 @@ import com.expedia.apiary.extensions.receiver.common.event.ListenerEvent; import com.expediagroup.beekeeper.core.model.LifecycleEventType; +import com.expediagroup.beekeeper.core.predicate.IsIcebergTablePredicate; -import java.util.Locale; import java.util.Map; @Component public class IcebergTableListenerEventFilter implements ListenerEventFilter { private static final Logger log = LogManager.getLogger(IcebergTableListenerEventFilter.class); - - private static final String METADATA_LOCATION_KEY = "metadata_location"; - private static final String TABLE_TYPE_KEY = "table_type"; - private static final String TABLE_TYPE_ICEBERG_VALUE = "iceberg"; + private final IsIcebergTablePredicate isIcebergPredicate = new IsIcebergTablePredicate(); @Override public boolean isFiltered(ListenerEvent event, LifecycleEventType type) { Map tableParameters = event.getTableParameters(); - if (tableParameters != null) { - String metadataLocation = tableParameters.getOrDefault(METADATA_LOCATION_KEY, ""); - String tableType = tableParameters.getOrDefault(TABLE_TYPE_KEY, ""); - - boolean hasMetadataLocation = !metadataLocation.trim().isEmpty(); - boolean isIcebergType = tableType.toLowerCase().contains(TABLE_TYPE_ICEBERG_VALUE); - - if (hasMetadataLocation || isIcebergType) { - log.info("Iceberg table '{}.{}' is not currently supported in Beekeeper.", - event.getDbName(), event.getTableName()); - return true; - } + if (isIcebergPredicate.test(tableParameters)) { + log.info("Iceberg table '{}.{}' is not currently supported in Beekeeper.", + event.getDbName(), event.getTableName()); + return true; } return false; } } - - From e66982e7bab1b4cf1cab6c9569ad22f0333d4015 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 27 Nov 2024 16:33:48 +0000 Subject: [PATCH 58/65] revert changes to schedulerApiary --- .../scheduler/apiary/service/SchedulerApiary.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index 3ad876a1..ed9a1825 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -22,8 +22,6 @@ import java.util.List; import java.util.Optional; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import org.springframework.transaction.annotation.Transactional; @@ -53,7 +51,7 @@ public SchedulerApiary( @Transactional public void scheduleBeekeeperEvent() { Optional housekeepingEntitiesToBeScheduled = beekeeperEventReader.read(); - if (housekeepingEntitiesToBeScheduled.isEmpty()) {return;} + if (housekeepingEntitiesToBeScheduled.isEmpty()) { return; } BeekeeperEvent beekeeperEvent = housekeepingEntitiesToBeScheduled.get(); List housekeepingEntities = beekeeperEvent.getHousekeepingEntities(); @@ -65,13 +63,15 @@ public void scheduleBeekeeperEvent() { } catch (Exception e) { throw new BeekeeperException(format( "Unable to schedule %s deletion for entity, this message will go back on the queue", - entity.getLifecycleType()), e); + entity.getLifecycleType()), + e); } } + beekeeperEventReader.delete(beekeeperEvent); } public void close() throws IOException { beekeeperEventReader.close(); } -} +} \ No newline at end of file From 5e67a64042b5ab144a932f0bb5fb628442da8e8a Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 27 Nov 2024 16:34:13 +0000 Subject: [PATCH 59/65] Update SchedulerApiary.java --- .../beekeeper/scheduler/apiary/service/SchedulerApiary.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java index ed9a1825..4cc1e165 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/service/SchedulerApiary.java @@ -74,4 +74,4 @@ public void scheduleBeekeeperEvent() { public void close() throws IOException { beekeeperEventReader.close(); } -} \ No newline at end of file +} From a65f066ecae150a831668e85750503200335491e Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 27 Nov 2024 16:51:04 +0000 Subject: [PATCH 60/65] Updating logging so we only see stack trace on debug level --- .../cleanup/handler/ExpiredMetadataHandler.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java index 6d0a1df9..f4d78835 100644 --- a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java +++ b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java @@ -80,14 +80,16 @@ public void cleanupMetadata(HousekeepingMetadata housekeepingMetadata, LocalDate } } catch (BeekeeperIcebergException e) { updateAttemptsAndStatus(housekeepingMetadata, SKIPPED); - log - .warn("Table \"{}.{}\" is skipped because is iceberg or could not be identified ", - housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName(), e); + log.warn("Table \"{}.{}\" is skipped because it is iceberg or could not be identified.", + housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName()); + log.debug("Table \"{}.{}\" is skipped because it is iceberg or could not be identified.", + housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName(), e); } catch (Exception e) { updateAttemptsAndStatus(housekeepingMetadata, FAILED); - log - .warn("Unexpected exception when deleting metadata for table \"{}.{}\"", - housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName(), e); + log.warn("Unexpected exception when deleting metadata for table \"{}.{}\".", + housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName()); + log.debug("Unexpected exception when deleting metadata for table \"{}.{}\".", + housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName(), e); } } From fd6bd88ba8b9ec62f7743d6c8e215fbd312d76cd Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 27 Nov 2024 17:01:17 +0000 Subject: [PATCH 61/65] Update logging in ExpiredMetadataHandler --- .../cleanup/handler/ExpiredMetadataHandler.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java index f4d78835..69238ec0 100644 --- a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java +++ b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java @@ -80,16 +80,16 @@ public void cleanupMetadata(HousekeepingMetadata housekeepingMetadata, LocalDate } } catch (BeekeeperIcebergException e) { updateAttemptsAndStatus(housekeepingMetadata, SKIPPED); - log.warn("Table \"{}.{}\" is skipped because it is iceberg or could not be identified.", + String logMessage = String.format("Table \"%s.%s\" is skipped because it is iceberg or could not be identified.", housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName()); - log.debug("Table \"{}.{}\" is skipped because it is iceberg or could not be identified.", - housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName(), e); + log.warn(logMessage); + log.debug(logMessage, e); } catch (Exception e) { updateAttemptsAndStatus(housekeepingMetadata, FAILED); - log.warn("Unexpected exception when deleting metadata for table \"{}.{}\".", + String logMessage = String.format("Unexpected exception when deleting metadata for table \"%s.%s\".", housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName()); - log.debug("Unexpected exception when deleting metadata for table \"{}.{}\".", - housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName(), e); + log.warn(logMessage); + log.debug(logMessage, e); } } From 026e769a7b4bc1941389f621cb4d9f470b85598a Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Wed, 27 Nov 2024 17:26:32 +0000 Subject: [PATCH 62/65] Updating for minor comments --- .../beekeeper/core/predicate/IsIcebergTablePredicate.java | 2 +- .../apiary/filter/IcebergTableListenerEventFilter.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/predicate/IsIcebergTablePredicate.java b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/predicate/IsIcebergTablePredicate.java index fb180d94..33e067cc 100644 --- a/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/predicate/IsIcebergTablePredicate.java +++ b/beekeeper-core/src/main/java/com/expediagroup/beekeeper/core/predicate/IsIcebergTablePredicate.java @@ -26,7 +26,7 @@ public class IsIcebergTablePredicate implements Predicate> { @Override public boolean test(Map tableParameters) { - if (tableParameters == null) { + if (tableParameters == null || tableParameters.isEmpty()) { return false; } diff --git a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java index 70a9fde2..3b2c2b20 100644 --- a/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java +++ b/beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java @@ -36,7 +36,7 @@ public boolean isFiltered(ListenerEvent event, LifecycleEventType type) { Map tableParameters = event.getTableParameters(); if (isIcebergPredicate.test(tableParameters)) { - log.info("Iceberg table '{}.{}' is not currently supported in Beekeeper.", + log.info("Ignoring table '{}.{}'. Iceberg tables are not supported in Beekeeper.", event.getDbName(), event.getTableName()); return true; } From 070b34ded6e08de6c047338340b03dcd45c63733 Mon Sep 17 00:00:00 2001 From: Hamza Jugon Date: Thu, 28 Nov 2024 09:53:19 +0000 Subject: [PATCH 63/65] Update logging --- .../metadata/cleanup/handler/ExpiredMetadataHandler.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java index 69238ec0..39fe7d5e 100644 --- a/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java +++ b/beekeeper-metadata-cleanup/src/main/java/com/expediagroup/beekeeper/metadata/cleanup/handler/ExpiredMetadataHandler.java @@ -82,13 +82,13 @@ public void cleanupMetadata(HousekeepingMetadata housekeepingMetadata, LocalDate updateAttemptsAndStatus(housekeepingMetadata, SKIPPED); String logMessage = String.format("Table \"%s.%s\" is skipped because it is iceberg or could not be identified.", housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName()); - log.warn(logMessage); + log.info(logMessage); log.debug(logMessage, e); } catch (Exception e) { updateAttemptsAndStatus(housekeepingMetadata, FAILED); String logMessage = String.format("Unexpected exception when deleting metadata for table \"%s.%s\".", housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName()); - log.warn(logMessage); + log.info(logMessage); log.debug(logMessage, e); } } From 1418f1b4655564480053844bcb9976258d6e3673 Mon Sep 17 00:00:00 2001 From: Hamza Jugon <104994559+HamzaJugon@users.noreply.github.com> Date: Thu, 28 Nov 2024 11:02:49 +0000 Subject: [PATCH 64/65] Update CHANGELOG.md Co-authored-by: Jay Green-Stevens --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1a70d80..7a939405 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [3.6.0] - 2024-10-27 +## [3.6.0] - 2024-10-28 ### Added - Added filter for Iceberg tables in `beekeeper-scheduler-apiary` to prevent scheduling paths and metadata for deletion. - Added `IcebergValidator` to ensure Iceberg tables are identified and excluded from cleanup operations. From b80e71d26530f9c210b534eac79ba0a74ea7cd02 Mon Sep 17 00:00:00 2001 From: Hamza Jugon <104994559+HamzaJugon@users.noreply.github.com> Date: Thu, 28 Nov 2024 11:08:34 +0000 Subject: [PATCH 65/65] Update CHANGELOG.md Co-authored-by: Jay Green-Stevens --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a939405..a268ed6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [3.6.0] - 2024-10-28 +## [3.6.0] - 2024-11-28 ### Added - Added filter for Iceberg tables in `beekeeper-scheduler-apiary` to prevent scheduling paths and metadata for deletion. - Added `IcebergValidator` to ensure Iceberg tables are identified and excluded from cleanup operations.