Skip to content

Commit

Permalink
Merge main. Update changelog
Browse files Browse the repository at this point in the history
  • Loading branch information
JayGreeeen committed Nov 28, 2024
2 parents bd86171 + 347aabd commit b084721
Show file tree
Hide file tree
Showing 38 changed files with 695 additions and 44 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [3.7.0] - 2024-11-28
## Added
- Added a db migration file and implementation of a new `beekeeper-history` table to track beekeeper activity.

## [3.6.0] - 2024-11-28
### Added
- Added filter for Iceberg tables in `beekeeper-scheduler-apiary` to prevent scheduling paths and metadata for deletion.
- Added `IcebergValidator` to ensure Iceberg tables are identified and excluded from cleanup operations.

## [3.5.7] - 2024-10-25
### Changed
- Added error handling for bad requests with incorrect sort parameters.
Expand Down
2 changes: 1 addition & 1 deletion beekeeper-api/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<parent>
<artifactId>beekeeper-parent</artifactId>
<groupId>com.expediagroup</groupId>
<version>3.5.8-SNAPSHOT</version>
<version>3.5.9-SNAPSHOT</version>
</parent>

<artifactId>beekeeper-api</artifactId>
Expand Down
2 changes: 1 addition & 1 deletion beekeeper-cleanup/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>beekeeper-parent</artifactId>
<groupId>com.expediagroup</groupId>
<version>3.5.8-SNAPSHOT</version>
<version>3.5.9-SNAPSHOT</version>
</parent>

<artifactId>beekeeper-cleanup</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,35 @@
import com.expediagroup.beekeeper.cleanup.metadata.CleanerClient;
import com.expediagroup.beekeeper.cleanup.metadata.MetadataCleaner;
import com.expediagroup.beekeeper.cleanup.monitoring.DeletedMetadataReporter;
import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator;
import com.expediagroup.beekeeper.core.config.MetadataType;
import com.expediagroup.beekeeper.core.model.HousekeepingMetadata;
import com.expediagroup.beekeeper.core.monitoring.TimedTaggable;

public class HiveMetadataCleaner implements MetadataCleaner {

private DeletedMetadataReporter deletedMetadataReporter;
private IcebergValidator icebergValidator;

public HiveMetadataCleaner(DeletedMetadataReporter deletedMetadataReporter) {
public HiveMetadataCleaner(DeletedMetadataReporter deletedMetadataReporter, IcebergValidator icebergValidator) {
this.deletedMetadataReporter = deletedMetadataReporter;
this.icebergValidator = icebergValidator;
}

@Override
@TimedTaggable("hive-table-deleted")
public void dropTable(HousekeepingMetadata housekeepingMetadata, CleanerClient client) {
icebergValidator.throwExceptionIfIceberg(housekeepingMetadata.getDatabaseName(),
housekeepingMetadata.getTableName());
client.dropTable(housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName());
deletedMetadataReporter.reportTaggable(housekeepingMetadata, MetadataType.HIVE_TABLE);
}

@Override
@TimedTaggable("hive-partition-deleted")
public boolean dropPartition(HousekeepingMetadata housekeepingMetadata, CleanerClient client) {
icebergValidator.throwExceptionIfIceberg(housekeepingMetadata.getDatabaseName(),
housekeepingMetadata.getTableName());
boolean partitionDeleted = client
.dropPartition(housekeepingMetadata.getDatabaseName(), housekeepingMetadata.getTableName(),
housekeepingMetadata.getPartitionName());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/**
* Copyright (C) 2019-2024 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.expediagroup.beekeeper.cleanup.validation;

import static java.lang.String.format;

import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.expediagroup.beekeeper.cleanup.metadata.CleanerClient;
import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory;
import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException;
import com.expediagroup.beekeeper.core.predicate.IsIcebergTablePredicate;

public class IcebergValidator {

private static final Logger log = LoggerFactory.getLogger(IcebergValidator.class);

private final CleanerClientFactory cleanerClientFactory;
private final IsIcebergTablePredicate isIcebergTablePredicate;

public IcebergValidator(CleanerClientFactory cleanerClientFactory) {
this.cleanerClientFactory = cleanerClientFactory;
this.isIcebergTablePredicate = new IsIcebergTablePredicate();
}

/**
* Beekeeper currently does not support the Iceberg format. Iceberg tables in the Hive Metastore do not store partition information,
* causing Beekeeper to attempt to clean up the entire table due to the missing information. This method checks if
* the table is an Iceberg table and throws a BeekeeperIcebergException to stop the process.
*
* @param databaseName
* @param tableName
*/
public void throwExceptionIfIceberg(String databaseName, String tableName) {
try (CleanerClient client = cleanerClientFactory.newInstance()) {
Map<String, String> tableParameters = client.getTableProperties(databaseName, tableName);

if (isIcebergTablePredicate.test(tableParameters)) {
throw new BeekeeperIcebergException(
format("Iceberg table %s.%s is not currently supported in Beekeeper.", databaseName, tableName));
}
} catch (Exception e) {
throw new BeekeeperIcebergException(
format("Unexpected exception when identifying if table %s.%s is Iceberg.", databaseName, tableName), e);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (C) 2019-2023 Expedia, Inc.
* Copyright (C) 2019-2024 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -23,13 +23,13 @@
import java.time.LocalDateTime;

import org.apache.hadoop.fs.s3a.BasicAWSCredentialsProvider;
import org.junit.Rule;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.testcontainers.containers.localstack.LocalStackContainer;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.junit.jupiter.Testcontainers;
import org.testcontainers.utility.DockerImageName;

Expand Down Expand Up @@ -58,20 +58,18 @@ class S3DryRunPathCleanerTest {
private HousekeepingPath housekeepingPath;
private AmazonS3 amazonS3;
private @Mock BytesDeletedReporter bytesDeletedReporter;

private boolean dryRunEnabled = true;

private S3PathCleaner s3DryRunPathCleaner;

@Rule
@Container
public static LocalStackContainer awsContainer = new LocalStackContainer(
DockerImageName.parse("localstack/localstack:0.14.2")).withServices(S3);
static {
awsContainer.start();
}
public static String S3_ENDPOINT = awsContainer.getEndpointConfiguration(S3).getServiceEndpoint();

@BeforeEach
void setUp() {
String S3_ENDPOINT = awsContainer.getEndpointConfiguration(S3).getServiceEndpoint();
amazonS3 = AmazonS3ClientBuilder
.standard()
.withCredentials(new BasicAWSCredentialsProvider("accesskey", "secretkey"))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (C) 2019-2021 Expedia, Inc.
* Copyright (C) 2019-2024 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,6 +15,8 @@
*/
package com.expediagroup.beekeeper.cleanup.hive;

import static org.junit.Assert.assertThrows;
import static org.mockito.Mockito.doThrow;
import static org.mockito.Mockito.never;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
Expand All @@ -26,7 +28,9 @@
import org.mockito.junit.jupiter.MockitoExtension;

import com.expediagroup.beekeeper.cleanup.monitoring.DeletedMetadataReporter;
import com.expediagroup.beekeeper.cleanup.validation.IcebergValidator;
import com.expediagroup.beekeeper.core.config.MetadataType;
import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException;
import com.expediagroup.beekeeper.core.model.HousekeepingMetadata;

@ExtendWith(MockitoExtension.class)
Expand All @@ -35,6 +39,7 @@ public class HiveMetadataCleanerTest {
private @Mock HousekeepingMetadata housekeepingMetadata;
private @Mock DeletedMetadataReporter deletedMetadataReporter;
private @Mock HiveClient hiveClient;
private @Mock IcebergValidator icebergValidator;

private HiveMetadataCleaner cleaner;
private static final String DATABASE = "database";
Expand All @@ -43,14 +48,18 @@ public class HiveMetadataCleanerTest {

@BeforeEach
public void init() {
cleaner = new HiveMetadataCleaner(deletedMetadataReporter);
cleaner = new HiveMetadataCleaner(deletedMetadataReporter, icebergValidator);
}

@Test
public void typicalDropTable() {
when(housekeepingMetadata.getDatabaseName()).thenReturn(DATABASE);
when(housekeepingMetadata.getTableName()).thenReturn(TABLE_NAME);

cleaner.dropTable(housekeepingMetadata, hiveClient);

verify(icebergValidator).throwExceptionIfIceberg(DATABASE, TABLE_NAME);
verify(hiveClient).dropTable(DATABASE, TABLE_NAME);
verify(deletedMetadataReporter).reportTaggable(housekeepingMetadata, MetadataType.HIVE_TABLE);
}

Expand All @@ -62,6 +71,9 @@ public void typicalDropPartition() {
when(hiveClient.dropPartition(DATABASE, TABLE_NAME, PARTITION_NAME)).thenReturn(true);

cleaner.dropPartition(housekeepingMetadata, hiveClient);

verify(icebergValidator).throwExceptionIfIceberg(DATABASE, TABLE_NAME);
verify(hiveClient).dropPartition(DATABASE, TABLE_NAME, PARTITION_NAME);
verify(deletedMetadataReporter).reportTaggable(housekeepingMetadata, MetadataType.HIVE_PARTITION);
}

Expand All @@ -81,4 +93,36 @@ public void tableExists() {
cleaner.tableExists(hiveClient, DATABASE, TABLE_NAME);
verify(hiveClient).tableExists(DATABASE, TABLE_NAME);
}

@Test
public void doesNotDropTableWhenIcebergTable() {
when(housekeepingMetadata.getDatabaseName()).thenReturn(DATABASE);
when(housekeepingMetadata.getTableName()).thenReturn(TABLE_NAME);
doThrow(new BeekeeperIcebergException("Iceberg table"))
.when(icebergValidator).throwExceptionIfIceberg(DATABASE, TABLE_NAME);

assertThrows(
BeekeeperIcebergException.class,
() -> cleaner.dropTable(housekeepingMetadata, hiveClient)
);

verify(hiveClient, never()).dropTable(DATABASE, TABLE_NAME);
verify(deletedMetadataReporter, never()).reportTaggable(housekeepingMetadata, MetadataType.HIVE_TABLE);
}

@Test
public void doesNotDropPartitionWhenIcebergTable() {
when(housekeepingMetadata.getDatabaseName()).thenReturn(DATABASE);
when(housekeepingMetadata.getTableName()).thenReturn(TABLE_NAME);
doThrow(new BeekeeperIcebergException("Iceberg table"))
.when(icebergValidator).throwExceptionIfIceberg(DATABASE, TABLE_NAME);

assertThrows(
BeekeeperIcebergException.class,
() -> cleaner.dropPartition(housekeepingMetadata, hiveClient)
);

verify(hiveClient, never()).dropPartition(DATABASE, TABLE_NAME, PARTITION_NAME);
verify(deletedMetadataReporter, never()).reportTaggable(housekeepingMetadata, MetadataType.HIVE_PARTITION);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/**
* Copyright (C) 2019-2024 Expedia, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.expediagroup.beekeeper.cleanup.validation;

import static org.assertj.core.api.AssertionsForClassTypes.assertThatThrownBy;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;

import java.util.HashMap;
import java.util.Map;

import org.junit.Before;
import org.junit.Test;

import com.expediagroup.beekeeper.cleanup.metadata.CleanerClient;
import com.expediagroup.beekeeper.cleanup.metadata.CleanerClientFactory;
import com.expediagroup.beekeeper.core.error.BeekeeperIcebergException;

public class IcebergValidatorTest {

private CleanerClientFactory cleanerClientFactory;
private CleanerClient cleanerClient;
private IcebergValidator icebergValidator;

@Before
public void setUp() throws Exception {
cleanerClientFactory = mock(CleanerClientFactory.class);
cleanerClient = mock(CleanerClient.class);
when(cleanerClientFactory.newInstance()).thenReturn(cleanerClient);
icebergValidator = new IcebergValidator(cleanerClientFactory);
}

@Test(expected = BeekeeperIcebergException.class)
public void shouldThrowExceptionWhenTableTypeIsIceberg() throws Exception {
Map<String, String> properties = new HashMap<>();
properties.put("table_type", "ICEBERG");

when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties);

icebergValidator.throwExceptionIfIceberg("db", "table");
verify(cleanerClientFactory).newInstance();
verify(cleanerClient).close();
}

@Test(expected = BeekeeperIcebergException.class)
public void shouldThrowExceptionWhenMetadataIsIceberg() throws Exception {
Map<String, String> properties = new HashMap<>();
properties.put("metadata_location", "s3://db/table/metadata/0000.json");

when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties);

icebergValidator.throwExceptionIfIceberg("db", "table");
}

@Test
public void shouldNotThrowExceptionForNonIcebergTable() throws Exception {
Map<String, String> properties = new HashMap<>();
properties.put("table_type", "HIVE_TABLE");

when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties);

icebergValidator.throwExceptionIfIceberg("db", "table");
verify(cleanerClientFactory).newInstance();
verify(cleanerClient).close();
}

@Test
public void shouldThrowExceptionWhenOutputFormatIsNull() throws Exception {
Map<String, String> properties = new HashMap<>();
properties.put("table_type", null);
properties.put("metadata_location", null);

when(cleanerClient.getTableProperties("db", "table")).thenReturn(properties);

assertThatThrownBy(() -> icebergValidator.throwExceptionIfIceberg("db", "table")).isInstanceOf(
BeekeeperIcebergException.class);
}
}
2 changes: 1 addition & 1 deletion beekeeper-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>beekeeper-parent</artifactId>
<groupId>com.expediagroup</groupId>
<version>3.5.8-SNAPSHOT</version>
<version>3.5.9-SNAPSHOT</version>
</parent>

<artifactId>beekeeper-core</artifactId>
Expand Down
Loading

0 comments on commit b084721

Please sign in to comment.