Skip to content

Commit

Permalink
Add feature to ignore Iceberg tables (#185)
Browse files Browse the repository at this point in the history
* Updated to BK-core

* Updated to path-cleanup

* Update PagingCleanupServiceTest.java

* main business logic

* adding exception

* Add DB & table name to exception message

* Update IcebergValidator.java

* Create IcebergValidatorTest.java

* Update HiveMetadataCleanerTest.java

* Updating and adding S3PathCleaner tests

Integrated LocalStack with containers. Rule is from Junit 4

* Adding IcebergValidator to constructors

* Updating Junit imports

* Update SchedulerApiaryTest.java

* Update CommonBeans

* clean-up add comment

* Remove extra deletion

* adding beans

* fix tests

* fixing it tests for metadata cleanup

* fix path cleanup

* fix main problem with tests

* Fix BeekeeperDryRunPathCleanupIntegrationTest

* revert changes to fix BeekeeperExpiredMetadataSchedulerApiaryIntegrationTest

* Added missing properties to fix BeekeeperUnreferencedPathSchedulerApiaryIntegrationTest

* Add integration test for metadatacleanup

* Update metadataHandler to catch beekeeperException

* Update path-cleanup housekeeping status

* Update beekeeper to runtime exception

* bump versions for testing

* Add Hadoop dependencies

* Update pom.xml

* Revert changes to beekeeper-path

* revert more path-cleanup

* Revert path-cleanup

* cleanup

* Added logging for table params

* add logging

* remove logs to check filters

* cleaning up

* fix validator tests

* clean up it tests

* change expired metadata handler

* fix lenient

* Add IcebergTableListenerEventFilter

* Add integration test for scheduler

* Revert versions used for testing & changelog

* Revert testing version

* Update beekeeper-scheduler-apiary/src/main/java/com/expediagroup/beekeeper/scheduler/apiary/filter/IcebergTableListenerEventFilter.java

Co-authored-by: Jay Green-Stevens <[email protected]>

* Updating asserts and remove unused logging

* Implement IsIcebergTablePredicate

* revert changes to schedulerApiary

* Update SchedulerApiary.java

* Updating logging so we only see stack trace on debug level

* Update logging in ExpiredMetadataHandler

* Updating for minor comments

* Update logging

* Update CHANGELOG.md

Co-authored-by: Jay Green-Stevens <[email protected]>

* Update CHANGELOG.md

Co-authored-by: Jay Green-Stevens <[email protected]>

---------

Co-authored-by: Hamza Jugon <[email protected]>
Co-authored-by: Hamza Jugon <[email protected]>
Co-authored-by: Jay Green-Stevens <[email protected]>
  • Loading branch information
4 people authored Nov 28, 2024
1 parent fdd37b5 commit 80e8854
Show file tree
Hide file tree
Showing 31 changed files with 685 additions and 39 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [3.6.0] - 2024-11-28
### Added
- Added filter for Iceberg tables in `beekeeper-scheduler-apiary` to prevent scheduling paths and metadata for deletion.
- Added `IcebergValidator` to ensure Iceberg tables are identified and excluded from cleanup operations.

## [3.5.7] - 2024-10-25
### Changed
- Added error handling for bad requests with incorrect sort parameters.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,35 @@
import com.expediagroup.beekeeper.cleanup.metadata.CleanerClient;
import com.expediagroup.beekeeper.cleanup.metadata.MetadataCleaner;
import com.expediagroup.beekeeper.cleanup.monitoring.DeletedMetadataReporter;
import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator;
import com.expediagroup.beekeeper.core.config.MetadataType;
import com.expediagroup.beekeeper.core.model.HousekeepingMetadata;
import com.expediagroup.beekeeper.core.monitoring.TimedTaggable;

public class HiveMetadataCleaner implements MetadataCleaner {

private DeletedMetadataReporter deletedMetadataReporter;
private IcebergValidator icebergValidator;

public HiveMetadataCleaner(DeletedMetadataReporter deletedMetadataReporter) {
public HiveMetadataCleaner(DeletedMetadataReporter deletedMetadataReporter, IcebergValidator icebergValidator) {
this.deletedMetadataReporter = deletedMetadataReporter;
this.icebergValidator = icebergValidator;
}

@Override
@TimedTaggable("hive-table-deleted")
public void dropTable(HousekeepingMetadata housekeepingMetadata, CleanerClient client) {
icebergValidator.throwExceptionIfIceberg(housekeepingMetadata.getDatabaseName(),
housekeepingMetadata.getTableName());
client.dropTable(housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName());
deletedMetadataReporter.reportTaggable(housekeepingMetadata, MetadataType.HIVE_TABLE);
}

@Override
@TimedTaggable("hive-partition-deleted")
public boolean dropPartition(HousekeepingMetadata housekeepingMetadata, CleanerClient client) {
icebergValidator.throwExceptionIfIceberg(housekeepingMetadata.getDatabaseName(),
housekeepingMetadata.getTableName());
boolean partitionDeleted = client
.dropPartition(housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName(),
housekeepingMetadata.getPartitionName());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/**
* Copyright (C) 2019-2024 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.expediagroup.beekeeper.cleanup.validation;

import static java.lang.String.format;

import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.expediagroup.beekeeper.cleanup.metadata.CleanerClient;
import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory;
import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException;
import com.expediagroup.beekeeper.core.predicate.IsIcebergTablePredicate;

public class IcebergValidator {

private static final Logger log = LoggerFactory.getLogger(IcebergValidator.class);

private final CleanerClientFactory cleanerClientFactory;
private final IsIcebergTablePredicate isIcebergTablePredicate;

public IcebergValidator(CleanerClientFactory cleanerClientFactory) {
this.cleanerClientFactory = cleanerClientFactory;
this.isIcebergTablePredicate = new IsIcebergTablePredicate();
}

/**
* Beekeeper currently does not support the Iceberg format. Iceberg tables in the Hive Metastore do not store partition information,
* causing Beekeeper to attempt to clean up the entire table due to the missing information. This method checks if
* the table is an Iceberg table and throws a BeekeeperIcebergException to stop the process.
*
* @param databaseName
* @param tableName
*/
public void throwExceptionIfIceberg(String databaseName, String tableName) {
try (CleanerClient client = cleanerClientFactory.newInstance()) {
Map<String, String> tableParameters = client.getTableProperties(databaseName, tableName);

if (isIcebergTablePredicate.test(tableParameters)) {
throw new BeekeeperIcebergException(
format("Iceberg table %s.%s is not currently supported in Beekeeper.", databaseName, tableName));
}
} catch (Exception e) {
throw new BeekeeperIcebergException(
format("Unexpected exception when identifying if table %s.%s is Iceberg.", databaseName, tableName), e);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (C) 2019-2023 Expedia, Inc.
* Copyright (C) 2019-2024 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -23,13 +23,13 @@
import java.time.LocalDateTime;

import org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider;
import org.junit.Rule;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.testcontainers.containers.localstack.LocalStackContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
import org.testcontainers.utility.DockerImageName;

Expand Down Expand Up @@ -58,20 +58,18 @@ class S3DryRunPathCleanerTest {
private HousekeepingPath housekeepingPath;
private AmazonS3 amazonS3;
private @Mock BytesDeletedReporter bytesDeletedReporter;

private boolean dryRunEnabled = true;

private S3PathCleaner s3DryRunPathCleaner;

@Rule
@Container
public static LocalStackContainer awsContainer = new LocalStackContainer(
DockerImageName.parse("localstack/localstack:0.14.2")).withServices(S3);
static {
awsContainer.start();
}
public static String S3_ENDPOINT = awsContainer.getEndpointConfiguration(S3).getServiceEndpoint();

@BeforeEach
void setUp() {
String S3_ENDPOINT = awsContainer.getEndpointConfiguration(S3).getServiceEndpoint();
amazonS3 = AmazonS3ClientBuilder
.standard()
.withCredentials(new BasicAWSCredentialsProvider("accesskey", "secretkey"))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (C) 2019-2021 Expedia, Inc.
* Copyright (C) 2019-2024 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,6 +15,8 @@
*/
package com.expediagroup.beekeeper.cleanup.hive;

import static org.junit.Assert.assertThrows;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
Expand All @@ -26,7 +28,9 @@
import org.mockito.junit.jupiter.MockitoExtension;

import com.expediagroup.beekeeper.cleanup.monitoring.DeletedMetadataReporter;
import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator;
import com.expediagroup.beekeeper.core.config.MetadataType;
import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException;
import com.expediagroup.beekeeper.core.model.HousekeepingMetadata;

@ExtendWith(MockitoExtension.class)
Expand All @@ -35,6 +39,7 @@ public class HiveMetadataCleanerTest {
private @Mock HousekeepingMetadata housekeepingMetadata;
private @Mock DeletedMetadataReporter deletedMetadataReporter;
private @Mock HiveClient hiveClient;
private @Mock IcebergValidator icebergValidator;

private HiveMetadataCleaner cleaner;
private static final String DATABASE = "database";
Expand All @@ -43,14 +48,18 @@ public class HiveMetadataCleanerTest {

@BeforeEach
public void init() {
cleaner = new HiveMetadataCleaner(deletedMetadataReporter);
cleaner = new HiveMetadataCleaner(deletedMetadataReporter, icebergValidator);
}

@Test
public void typicalDropTable() {
when(housekeepingMetadata.getDatabaseName()).thenReturn(DATABASE);
when(housekeepingMetadata.getTableName()).thenReturn(TABLE_NAME);

cleaner.dropTable(housekeepingMetadata, hiveClient);

verify(icebergValidator).throwExceptionIfIceberg(DATABASE, TABLE_NAME);
verify(hiveClient).dropTable(DATABASE, TABLE_NAME);
verify(deletedMetadataReporter).reportTaggable(housekeepingMetadata, MetadataType.HIVE_TABLE);
}

Expand All @@ -62,6 +71,9 @@ public void typicalDropPartition() {
when(hiveClient.dropPartition(DATABASE, TABLE_NAME, PARTITION_NAME)).thenReturn(true);

cleaner.dropPartition(housekeepingMetadata, hiveClient);

verify(icebergValidator).throwExceptionIfIceberg(DATABASE, TABLE_NAME);
verify(hiveClient).dropPartition(DATABASE, TABLE_NAME, PARTITION_NAME);
verify(deletedMetadataReporter).reportTaggable(housekeepingMetadata, MetadataType.HIVE_PARTITION);
}

Expand All @@ -81,4 +93,36 @@ public void tableExists() {
cleaner.tableExists(hiveClient, DATABASE, TABLE_NAME);
verify(hiveClient).tableExists(DATABASE, TABLE_NAME);
}

@Test
public void doesNotDropTableWhenIcebergTable() {
when(housekeepingMetadata.getDatabaseName()).thenReturn(DATABASE);
when(housekeepingMetadata.getTableName()).thenReturn(TABLE_NAME);
doThrow(new BeekeeperIcebergException("Iceberg table"))
.when(icebergValidator).throwExceptionIfIceberg(DATABASE, TABLE_NAME);

assertThrows(
BeekeeperIcebergException.class,
() -> cleaner.dropTable(housekeepingMetadata, hiveClient)
);

verify(hiveClient, never()).dropTable(DATABASE, TABLE_NAME);
verify(deletedMetadataReporter, never()).reportTaggable(housekeepingMetadata, MetadataType.HIVE_TABLE);
}

@Test
public void doesNotDropPartitionWhenIcebergTable() {
when(housekeepingMetadata.getDatabaseName()).thenReturn(DATABASE);
when(housekeepingMetadata.getTableName()).thenReturn(TABLE_NAME);
doThrow(new BeekeeperIcebergException("Iceberg table"))
.when(icebergValidator).throwExceptionIfIceberg(DATABASE, TABLE_NAME);

assertThrows(
BeekeeperIcebergException.class,
() -> cleaner.dropPartition(housekeepingMetadata, hiveClient)
);

verify(hiveClient, never()).dropPartition(DATABASE, TABLE_NAME, PARTITION_NAME);
verify(deletedMetadataReporter, never()).reportTaggable(housekeepingMetadata, MetadataType.HIVE_PARTITION);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/**
* Copyright (C) 2019-2024 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.expediagroup.beekeeper.cleanup.validation;

import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import java.util.HashMap;
import java.util.Map;

import org.junit.Before;
import org.junit.Test;

import com.expediagroup.beekeeper.cleanup.metadata.CleanerClient;
import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory;
import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException;

public class IcebergValidatorTest {

private CleanerClientFactory cleanerClientFactory;
private CleanerClient cleanerClient;
private IcebergValidator icebergValidator;

@Before
public void setUp() throws Exception {
cleanerClientFactory = mock(CleanerClientFactory.class);
cleanerClient = mock(CleanerClient.class);
when(cleanerClientFactory.newInstance()).thenReturn(cleanerClient);
icebergValidator = new IcebergValidator(cleanerClientFactory);
}

@Test(expected = BeekeeperIcebergException.class)
public void shouldThrowExceptionWhenTableTypeIsIceberg() throws Exception {
Map<String, String> properties = new HashMap<>();
properties.put("table_type", "ICEBERG");

when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties);

icebergValidator.throwExceptionIfIceberg("db", "table");
verify(cleanerClientFactory).newInstance();
verify(cleanerClient).close();
}

@Test(expected = BeekeeperIcebergException.class)
public void shouldThrowExceptionWhenMetadataIsIceberg() throws Exception {
Map<String, String> properties = new HashMap<>();
properties.put("metadata_location", "s3://db/table/metadata/0000.json");

when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties);

icebergValidator.throwExceptionIfIceberg("db", "table");
}

@Test
public void shouldNotThrowExceptionForNonIcebergTable() throws Exception {
Map<String, String> properties = new HashMap<>();
properties.put("table_type", "HIVE_TABLE");

when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties);

icebergValidator.throwExceptionIfIceberg("db", "table");
verify(cleanerClientFactory).newInstance();
verify(cleanerClient).close();
}

@Test
public void shouldThrowExceptionWhenOutputFormatIsNull() throws Exception {
Map<String, String> properties = new HashMap<>();
properties.put("table_type", null);
properties.put("metadata_location", null);

when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties);

assertThatThrownBy(() -> icebergValidator.throwExceptionIfIceberg("db", "table")).isInstanceOf(
BeekeeperIcebergException.class);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/**
* Copyright (C) 2019-2024 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.expediagroup.beekeeper.core.error;

public class BeekeeperIcebergException extends BeekeeperException {

private static final long serialVersionUID = 1L;

public BeekeeperIcebergException(String message, Exception e) {
super(message, e);
}

public BeekeeperIcebergException(String message, Throwable e) {
super(message, e);
}

public BeekeeperIcebergException(String message) {
super(message);
}
}
Loading

0 comments on commit 80e8854

Please sign in to comment.