From 30dd00cb6ce19d1e944e1f964964cde3642cdb57 Mon Sep 17 00:00:00 2001 From: Danny Mccormick Date: Fri, 27 Dec 2024 12:32:57 -0500 Subject: [PATCH] Remove amazon-web-services and kinesis folders + settings.gradle cleanup --- sdks/java/io/amazon-web-services/build.gradle | 74 - .../beam/sdk/io/aws/coders/AwsCoders.java | 141 -- .../beam/sdk/io/aws/coders/package-info.java | 19 - .../io/aws/dynamodb/AttributeValueCoder.java | 166 --- .../AttributeValueCoderProviderRegistrar.java | 37 - .../io/aws/dynamodb/AwsClientsProvider.java | 40 - .../aws/dynamodb/BasicDynamoDBProvider.java | 67 - .../beam/sdk/io/aws/dynamodb/DynamoDBIO.java | 630 --------- .../sdk/io/aws/dynamodb/package-info.java | 19 - .../beam/sdk/io/aws/options/AwsModule.java | 390 ------ .../beam/sdk/io/aws/options/AwsOptions.java | 130 -- .../options/AwsPipelineOptionsRegistrar.java | 36 - .../aws/options/S3ClientBuilderFactory.java | 25 - .../beam/sdk/io/aws/options/S3Options.java | 106 -- .../beam/sdk/io/aws/options/package-info.java | 22 - .../aws/s3/DefaultS3ClientBuilderFactory.java | 51 - .../DefaultS3FileSystemSchemeRegistrar.java | 38 - .../beam/sdk/io/aws/s3/S3FileSystem.java | 671 --------- .../io/aws/s3/S3FileSystemConfiguration.java | 124 -- .../sdk/io/aws/s3/S3FileSystemRegistrar.java | 55 - .../aws/s3/S3FileSystemSchemeRegistrar.java | 47 - .../aws/s3/S3ReadableSeekableByteChannel.java | 180 --- .../beam/sdk/io/aws/s3/S3ResourceId.java | 229 --- .../sdk/io/aws/s3/S3WritableByteChannel.java | 212 --- .../beam/sdk/io/aws/s3/package-info.java | 19 - .../sdk/io/aws/sns/AwsClientsProvider.java | 40 - .../beam/sdk/io/aws/sns/BasicSnsProvider.java | 66 - .../sdk/io/aws/sns/PublishResultCoders.java | 124 -- .../io/aws/sns/SnsCoderProviderRegistrar.java | 38 - .../org/apache/beam/sdk/io/aws/sns/SnsIO.java | 420 ------ .../beam/sdk/io/aws/sns/package-info.java | 19 - .../sdk/io/aws/sqs/SqsCheckpointMark.java | 101 -- .../beam/sdk/io/aws/sqs/SqsConfiguration.java | 81 -- .../org/apache/beam/sdk/io/aws/sqs/SqsIO.java | 250 ---- .../beam/sdk/io/aws/sqs/SqsMessageCoder.java | 89 -- .../sdk/io/aws/sqs/SqsUnboundedReader.java | 944 ------------- .../sdk/io/aws/sqs/SqsUnboundedSource.java | 88 -- .../beam/sdk/io/aws/sqs/package-info.java | 19 - .../apache/beam/sdk/io/aws/ITEnvironment.java | 148 -- .../beam/sdk/io/aws/coders/AwsCodersTest.java | 68 - .../aws/dynamodb/AttributeValueCoderTest.java | 211 --- .../sdk/io/aws/dynamodb/DynamoDBIOIT.java | 222 --- .../io/aws/dynamodb/DynamoDBIOReadTest.java | 224 --- .../io/aws/dynamodb/DynamoDBIOWriteTest.java | 430 ------ .../dynamodb/StaticAwsClientsProvider.java | 55 - .../AwsHttpClientConfigurationTest.java | 51 - .../sdk/io/aws/options/AwsModuleTest.java | 265 ---- .../sdk/io/aws/s3/MatchResultMatcher.java | 118 -- .../beam/sdk/io/aws/s3/S3FileSystemIT.java | 135 -- .../beam/sdk/io/aws/s3/S3FileSystemTest.java | 1248 ----------------- .../beam/sdk/io/aws/s3/S3ResourceIdTest.java | 348 ----- .../beam/sdk/io/aws/s3/S3TestUtils.java | 169 --- .../io/aws/s3/S3WritableByteChannelTest.java | 225 --- .../io/aws/sns/PublishResultCodersTest.java | 91 -- .../apache/beam/sdk/io/aws/sns/SnsIOIT.java | 159 --- .../apache/beam/sdk/io/aws/sns/SnsIOTest.java | 223 --- .../sdk/io/aws/sqs/EmbeddedSqsServer.java | 69 - .../apache/beam/sdk/io/aws/sqs/SqsIOIT.java | 112 -- .../apache/beam/sdk/io/aws/sqs/SqsIOTest.java | 72 - .../sdk/io/aws/sqs/SqsMessageCoderTest.java | 62 - .../io/aws/sqs/SqsUnboundedReaderTest.java | 196 --- .../io/aws/sqs/SqsUnboundedSourceTest.java | 51 - sdks/java/io/kinesis/build.gradle | 57 - .../io/kinesis/expansion-service/build.gradle | 39 - .../sdk/io/kinesis/AWSClientsProvider.java | 38 - .../sdk/io/kinesis/BasicKinesisProvider.java | 122 -- .../sdk/io/kinesis/CheckpointGenerator.java | 29 - .../beam/sdk/io/kinesis/CustomOptional.java | 102 -- .../kinesis/DynamicCheckpointGenerator.java | 59 - .../io/kinesis/GetKinesisRecordsResult.java | 60 - .../KinesisClientThrottledException.java | 28 - .../apache/beam/sdk/io/kinesis/KinesisIO.java | 1116 --------------- .../sdk/io/kinesis/KinesisPartitioner.java | 27 - .../beam/sdk/io/kinesis/KinesisReader.java | 222 --- .../io/kinesis/KinesisReaderCheckpoint.java | 81 -- .../beam/sdk/io/kinesis/KinesisRecord.java | 133 -- .../sdk/io/kinesis/KinesisRecordCoder.java | 76 - .../kinesis/KinesisShardClosedException.java | 26 - .../beam/sdk/io/kinesis/KinesisSource.java | 164 --- .../io/kinesis/KinesisTransformRegistrar.java | 266 ---- .../beam/sdk/io/kinesis/RateLimitPolicy.java | 37 - .../io/kinesis/RateLimitPolicyFactory.java | 146 -- .../beam/sdk/io/kinesis/RecordFilter.java | 41 - .../beam/sdk/io/kinesis/ShardCheckpoint.java | 180 --- .../beam/sdk/io/kinesis/ShardReadersPool.java | 394 ------ .../sdk/io/kinesis/ShardRecordsIterator.java | 153 -- .../io/kinesis/SimplifiedKinesisClient.java | 359 ----- .../beam/sdk/io/kinesis/StartingPoint.java | 88 -- .../io/kinesis/StaticCheckpointGenerator.java | 41 - .../io/kinesis/TransientKinesisException.java | 28 - .../sdk/io/kinesis/WatermarkParameters.java | 98 -- .../beam/sdk/io/kinesis/WatermarkPolicy.java | 29 - .../io/kinesis/WatermarkPolicyFactory.java | 152 -- .../beam/sdk/io/kinesis/package-info.java | 20 - .../beam/sdk/io/kinesis/serde/AwsModule.java | 213 --- .../kinesis/serde/AwsSerializableUtils.java | 48 - .../sdk/io/kinesis/serde/package-info.java | 19 - .../sdk/io/kinesis/AmazonKinesisMock.java | 504 ------- .../BasicKinesisClientProviderTest.java | 53 - .../sdk/io/kinesis/CustomOptionalTest.java | 44 - .../DynamicCheckpointGeneratorTest.java | 55 - .../beam/sdk/io/kinesis/KinesisIOIT.java | 261 ---- .../sdk/io/kinesis/KinesisIOReadTest.java | 136 -- .../sdk/io/kinesis/KinesisIOWriteTest.java | 130 -- .../sdk/io/kinesis/KinesisMockReadTest.java | 110 -- .../sdk/io/kinesis/KinesisMockWriteTest.java | 255 ---- .../sdk/io/kinesis/KinesisProducerMock.java | 130 -- .../kinesis/KinesisReaderCheckpointTest.java | 64 - .../sdk/io/kinesis/KinesisReaderTest.java | 184 --- .../io/kinesis/KinesisRecordCoderTest.java | 46 - .../sdk/io/kinesis/KinesisServiceMock.java | 84 -- .../sdk/io/kinesis/KinesisTestOptions.java | 87 -- .../kinesis/RateLimitPolicyFactoryTest.java | 144 -- .../beam/sdk/io/kinesis/RecordFilterTest.java | 62 - .../sdk/io/kinesis/ShardCheckpointTest.java | 162 --- .../sdk/io/kinesis/ShardReadersPoolTest.java | 355 ----- .../io/kinesis/ShardRecordsIteratorTest.java | 186 --- .../kinesis/SimplifiedKinesisClientTest.java | 614 -------- .../sdk/io/kinesis/WatermarkPolicyTest.java | 166 --- .../sdk/io/kinesis/serde/AwsModuleTest.java | 172 --- .../serde/AwsSerializableUtilsTest.java | 174 --- settings.gradle.kts | 2 - 122 files changed, 19630 deletions(-) delete mode 100644 sdks/java/io/amazon-web-services/build.gradle delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/coders/AwsCoders.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/coders/package-info.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/AttributeValueCoder.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/AttributeValueCoderProviderRegistrar.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/AwsClientsProvider.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/BasicDynamoDBProvider.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIO.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/package-info.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsModule.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsOptions.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsPipelineOptionsRegistrar.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3ClientBuilderFactory.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3Options.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/package-info.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/DefaultS3ClientBuilderFactory.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/DefaultS3FileSystemSchemeRegistrar.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystem.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemConfiguration.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemRegistrar.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemSchemeRegistrar.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3ReadableSeekableByteChannel.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3ResourceId.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3WritableByteChannel.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/package-info.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/AwsClientsProvider.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/BasicSnsProvider.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/PublishResultCoders.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/SnsCoderProviderRegistrar.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/SnsIO.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/package-info.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsCheckpointMark.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsConfiguration.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsIO.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsMessageCoder.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedReader.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedSource.java delete mode 100644 sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/package-info.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/ITEnvironment.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/coders/AwsCodersTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/AttributeValueCoderTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOIT.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOReadTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOWriteTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/StaticAwsClientsProvider.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/options/AwsHttpClientConfigurationTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/options/AwsModuleTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/MatchResultMatcher.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemIT.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3ResourceIdTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3TestUtils.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3WritableByteChannelTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sns/PublishResultCodersTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sns/SnsIOIT.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sns/SnsIOTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/EmbeddedSqsServer.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsIOIT.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsIOTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsMessageCoderTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedReaderTest.java delete mode 100644 sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedSourceTest.java delete mode 100644 sdks/java/io/kinesis/build.gradle delete mode 100644 sdks/java/io/kinesis/expansion-service/build.gradle delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/AWSClientsProvider.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/BasicKinesisProvider.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CheckpointGenerator.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CustomOptional.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGenerator.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/GetKinesisRecordsResult.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisClientThrottledException.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisIO.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisPartitioner.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReader.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpoint.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecord.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoder.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisShardClosedException.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisSource.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisTransformRegistrar.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RateLimitPolicy.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RateLimitPolicyFactory.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RecordFilter.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardCheckpoint.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardReadersPool.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIterator.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClient.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StartingPoint.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StaticCheckpointGenerator.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/TransientKinesisException.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/WatermarkParameters.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/WatermarkPolicy.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/WatermarkPolicyFactory.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/package-info.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/serde/AwsModule.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/serde/AwsSerializableUtils.java delete mode 100644 sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/serde/package-info.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/AmazonKinesisMock.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/BasicKinesisClientProviderTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/CustomOptionalTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGeneratorTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisIOIT.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisIOReadTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisIOWriteTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisMockReadTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisMockWriteTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisProducerMock.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpointTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoderTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisServiceMock.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisTestOptions.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RateLimitPolicyFactoryTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RecordFilterTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardReadersPoolTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIteratorTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClientTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/WatermarkPolicyTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/serde/AwsModuleTest.java delete mode 100644 sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/serde/AwsSerializableUtilsTest.java diff --git a/sdks/java/io/amazon-web-services/build.gradle b/sdks/java/io/amazon-web-services/build.gradle deleted file mode 100644 index b9ed51fbbf77..000000000000 --- a/sdks/java/io/amazon-web-services/build.gradle +++ /dev/null @@ -1,74 +0,0 @@ -import groovy.json.JsonOutput - -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -plugins { - id 'org.apache.beam.module' -} - -applyJavaNature( - automaticModuleName: 'org.apache.beam.sdk.io.aws', -) - -provideIntegrationTestingDependencies() -enableJavaPerformanceTesting() - -description = "Apache Beam :: SDKs :: Java :: IO :: Amazon Web Services" -ext.summary = "IO library to read and write Amazon Web Services services from Beam." - -dependencies { - implementation library.java.vendored_guava_32_1_2_jre - implementation project(path: ":sdks:java:core", configuration: "shadow") - implementation library.java.aws_java_sdk_cloudwatch - implementation library.java.aws_java_sdk_core - implementation library.java.aws_java_sdk_dynamodb - implementation library.java.aws_java_sdk_s3 - implementation library.java.aws_java_sdk_sns - implementation library.java.aws_java_sdk_sqs - implementation library.java.aws_java_sdk_sts - implementation library.java.jackson_core - implementation library.java.jackson_annotations - implementation library.java.jackson_databind - implementation library.java.slf4j_api - implementation library.java.joda_time - implementation library.java.http_core - runtimeOnly library.java.commons_codec - runtimeOnly "org.apache.httpcomponents:httpclient:4.5.12" - testImplementation project(path: ":sdks:java:core", configuration: "shadowTest") - testImplementation project(path: ":sdks:java:extensions:avro", configuration: "testRuntimeMigration") - testImplementation project(path: ":sdks:java:io:common") - testImplementation "io.findify:s3mock_2.12:0.2.6" - testImplementation library.java.commons_lang3 - testImplementation library.java.hamcrest - testImplementation library.java.mockito_core - testImplementation library.java.junit - testImplementation library.java.testcontainers_localstack - testImplementation "org.assertj:assertj-core:3.11.1" - testImplementation 'org.elasticmq:elasticmq-rest-sqs_2.12:0.15.6' - testRuntimeOnly library.java.slf4j_jdk14 - testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") -} - -test { - systemProperty "beamTestPipelineOptions", JsonOutput.toJson([ - '--awsRegion=us-west-2', - '--awsCredentialsProvider={"@type" : "AWSStaticCredentialsProvider", "awsAccessKeyId" : "key_id_value","awsSecretKey" : "secret_value"}' - ]) - maxParallelForks 4 -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/coders/AwsCoders.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/coders/AwsCoders.java deleted file mode 100644 index 501bfc015860..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/coders/AwsCoders.java +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.coders; - -import com.amazonaws.ResponseMetadata; -import com.amazonaws.http.HttpResponse; -import com.amazonaws.http.SdkHttpMetadata; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Map; -import java.util.Optional; -import org.apache.beam.sdk.coders.AtomicCoder; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.CoderException; -import org.apache.beam.sdk.coders.CustomCoder; -import org.apache.beam.sdk.coders.MapCoder; -import org.apache.beam.sdk.coders.NullableCoder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.coders.VarIntCoder; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; - -/** {@link Coder}s for common AWS SDK objects. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public final class AwsCoders { - - private AwsCoders() {} - - /** - * Returns a new coder for ResponseMetadata. - * - * @return the ResponseMetadata coder - */ - public static Coder responseMetadata() { - return ResponseMetadataCoder.of(); - } - - /** - * Returns a new coder for SdkHttpMetadata. - * - * @return the SdkHttpMetadata coder - */ - public static Coder sdkHttpMetadata() { - return new SdkHttpMetadataCoder(true); - } - - /** - * Returns a new coder for SdkHttpMetadata that does not serialize the response headers. - * - * @return the SdkHttpMetadata coder - */ - public static Coder sdkHttpMetadataWithoutHeaders() { - return new SdkHttpMetadataCoder(false); - } - - private static class ResponseMetadataCoder extends AtomicCoder { - - private static final Coder> METADATA_ENCODER = - NullableCoder.of(MapCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())); - private static final ResponseMetadataCoder INSTANCE = new ResponseMetadataCoder(); - - private ResponseMetadataCoder() {} - - public static ResponseMetadataCoder of() { - return INSTANCE; - } - - @Override - public void encode(ResponseMetadata value, OutputStream outStream) - throws CoderException, IOException { - METADATA_ENCODER.encode( - ImmutableMap.of(ResponseMetadata.AWS_REQUEST_ID, value.getRequestId()), outStream); - } - - @Override - public ResponseMetadata decode(InputStream inStream) throws CoderException, IOException { - return new ResponseMetadata(METADATA_ENCODER.decode(inStream)); - } - } - - private static class SdkHttpMetadataCoder extends CustomCoder { - - private static final Coder STATUS_CODE_CODER = VarIntCoder.of(); - private static final Coder> HEADERS_ENCODER = - NullableCoder.of(MapCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())); - - private final boolean includeHeaders; - - protected SdkHttpMetadataCoder(boolean includeHeaders) { - this.includeHeaders = includeHeaders; - } - - @Override - public void encode(SdkHttpMetadata value, OutputStream outStream) - throws CoderException, IOException { - STATUS_CODE_CODER.encode(value.getHttpStatusCode(), outStream); - if (includeHeaders) { - HEADERS_ENCODER.encode(value.getHttpHeaders(), outStream); - } - } - - @Override - public SdkHttpMetadata decode(InputStream inStream) throws CoderException, IOException { - final int httpStatusCode = STATUS_CODE_CODER.decode(inStream); - HttpResponse httpResponse = new HttpResponse(null, null); - httpResponse.setStatusCode(httpStatusCode); - if (includeHeaders) { - Optional.ofNullable(HEADERS_ENCODER.decode(inStream)) - .ifPresent( - headers -> - headers.keySet().forEach(k -> httpResponse.addHeader(k, headers.get(k)))); - } - return SdkHttpMetadata.from(httpResponse); - } - - @Override - public void verifyDeterministic() throws NonDeterministicException { - STATUS_CODE_CODER.verifyDeterministic(); - if (includeHeaders) { - HEADERS_ENCODER.verifyDeterministic(); - } - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/coders/package-info.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/coders/package-info.java deleted file mode 100644 index 1b76a71ae647..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/coders/package-info.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** Defines common coders for Amazon Web Services. */ -package org.apache.beam.sdk.io.aws.coders; diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/AttributeValueCoder.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/AttributeValueCoder.java deleted file mode 100644 index 4bdf8b51d3b2..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/AttributeValueCoder.java +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.dynamodb; - -import com.amazonaws.services.dynamodbv2.model.AttributeValue; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.ByteBuffer; -import java.util.List; -import java.util.stream.Collectors; -import org.apache.beam.sdk.coders.AtomicCoder; -import org.apache.beam.sdk.coders.BooleanCoder; -import org.apache.beam.sdk.coders.ByteArrayCoder; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.CoderException; -import org.apache.beam.sdk.coders.ListCoder; -import org.apache.beam.sdk.coders.MapCoder; -import org.apache.beam.sdk.coders.StringUtf8Coder; - -/** A {@link Coder} that serializes and deserializes the {@link AttributeValue} objects. */ -public class AttributeValueCoder extends AtomicCoder { - - /** Data type of each value type in AttributeValue object. */ - private enum AttributeValueType { - s, // for String - n, // for Number - b, // for Byte - sS, // for List of String - nS, // for List of Number - bS, // for List of Byte - m, // for Map of String and AttributeValue - l, // for list of AttributeValue - bOOL, // for Boolean - nULLValue, // for null - } - - private static final AttributeValueCoder INSTANCE = new AttributeValueCoder(); - - private static final ListCoder LIST_STRING_CODER = ListCoder.of(StringUtf8Coder.of()); - private static final ListCoder LIST_BYTE_CODER = ListCoder.of(ByteArrayCoder.of()); - - private static final ListCoder LIST_ATTRIBUTE_CODER = - ListCoder.of(AttributeValueCoder.of()); - private static final MapCoder MAP_ATTRIBUTE_CODER = - MapCoder.of(StringUtf8Coder.of(), AttributeValueCoder.of()); - - private AttributeValueCoder() {} - - public static AttributeValueCoder of() { - return INSTANCE; - } - - @Override - public void encode(AttributeValue value, OutputStream outStream) throws IOException { - - if (value.getS() != null) { - StringUtf8Coder.of().encode(AttributeValueType.s.toString(), outStream); - StringUtf8Coder.of().encode(value.getS(), outStream); - } else if (value.getN() != null) { - StringUtf8Coder.of().encode(AttributeValueType.n.toString(), outStream); - StringUtf8Coder.of().encode(value.getN(), outStream); - } else if (value.getBOOL() != null) { - StringUtf8Coder.of().encode(AttributeValueType.bOOL.toString(), outStream); - BooleanCoder.of().encode(value.getBOOL(), outStream); - } else if (value.getB() != null) { - StringUtf8Coder.of().encode(AttributeValueType.b.toString(), outStream); - ByteArrayCoder.of().encode(convertToByteArray(value.getB()), outStream); - } else if (value.getSS() != null) { - StringUtf8Coder.of().encode(AttributeValueType.sS.toString(), outStream); - LIST_STRING_CODER.encode(value.getSS(), outStream); - } else if (value.getNS() != null) { - StringUtf8Coder.of().encode(AttributeValueType.nS.toString(), outStream); - LIST_STRING_CODER.encode(value.getNS(), outStream); - } else if (value.getBS() != null) { - StringUtf8Coder.of().encode(AttributeValueType.bS.toString(), outStream); - LIST_BYTE_CODER.encode(convertToListByteArray(value.getBS()), outStream); - } else if (value.getL() != null) { - StringUtf8Coder.of().encode(AttributeValueType.l.toString(), outStream); - LIST_ATTRIBUTE_CODER.encode(value.getL(), outStream); - } else if (value.getM() != null) { - StringUtf8Coder.of().encode(AttributeValueType.m.toString(), outStream); - MAP_ATTRIBUTE_CODER.encode(value.getM(), outStream); - } else if (value.getNULL() != null) { - StringUtf8Coder.of().encode(AttributeValueType.nULLValue.toString(), outStream); - BooleanCoder.of().encode(value.getNULL(), outStream); - } else { - throw new CoderException("Unknown Type"); - } - } - - @Override - public AttributeValue decode(InputStream inStream) throws IOException { - AttributeValue attrValue = new AttributeValue(); - - String type = StringUtf8Coder.of().decode(inStream); - AttributeValueType attrType = AttributeValueType.valueOf(type); - - switch (attrType) { - case s: - attrValue.setS(StringUtf8Coder.of().decode(inStream)); - break; - case n: - attrValue.setN(StringUtf8Coder.of().decode(inStream)); - break; - case bOOL: - attrValue.setBOOL(BooleanCoder.of().decode(inStream)); - break; - case b: - attrValue.setB(ByteBuffer.wrap(ByteArrayCoder.of().decode(inStream))); - break; - case sS: - attrValue.setSS(LIST_STRING_CODER.decode(inStream)); - break; - case nS: - attrValue.setNS(LIST_STRING_CODER.decode(inStream)); - break; - case bS: - attrValue.setBS(convertToListByteBuffer(LIST_BYTE_CODER.decode(inStream))); - break; - case l: - attrValue.setL(LIST_ATTRIBUTE_CODER.decode(inStream)); - break; - case m: - attrValue.setM(MAP_ATTRIBUTE_CODER.decode(inStream)); - break; - case nULLValue: - attrValue.setNULL(BooleanCoder.of().decode(inStream)); - break; - default: - throw new CoderException("Unknown Type"); - } - - return attrValue; - } - - private List convertToListByteArray(List listByteBuffer) { - return listByteBuffer.stream().map(this::convertToByteArray).collect(Collectors.toList()); - } - - private byte[] convertToByteArray(ByteBuffer buffer) { - byte[] bytes = new byte[buffer.remaining()]; - buffer.get(bytes); - buffer.position(buffer.position() - bytes.length); - return bytes; - } - - private List convertToListByteBuffer(List listByteArr) { - return listByteArr.stream().map(ByteBuffer::wrap).collect(Collectors.toList()); - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/AttributeValueCoderProviderRegistrar.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/AttributeValueCoderProviderRegistrar.java deleted file mode 100644 index 5a187e734d66..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/AttributeValueCoderProviderRegistrar.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.dynamodb; - -import com.amazonaws.services.dynamodbv2.model.AttributeValue; -import com.google.auto.service.AutoService; -import java.util.List; -import org.apache.beam.sdk.coders.CoderProvider; -import org.apache.beam.sdk.coders.CoderProviderRegistrar; -import org.apache.beam.sdk.coders.CoderProviders; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; - -/** A {@link CoderProviderRegistrar} for standard types used with {@link DynamoDBIO}. */ -@AutoService(CoderProviderRegistrar.class) -public class AttributeValueCoderProviderRegistrar implements CoderProviderRegistrar { - @Override - public List getCoderProviders() { - return ImmutableList.of( - CoderProviders.forCoder(TypeDescriptor.of(AttributeValue.class), AttributeValueCoder.of())); - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/AwsClientsProvider.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/AwsClientsProvider.java deleted file mode 100644 index f2d13b144e8d..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/AwsClientsProvider.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.dynamodb; - -import com.amazonaws.services.cloudwatch.AmazonCloudWatch; -import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; -import java.io.Serializable; - -/** - * Provides instances of AWS clients. - * - *

Please note, that any instance of {@link AwsClientsProvider} must be {@link Serializable} to - * ensure it can be sent to worker machines. - */ -public interface AwsClientsProvider extends Serializable { - - /** @deprecated DynamoDBIO doesn't require a CloudWatch client */ - @Deprecated - @SuppressWarnings("return") - default AmazonCloudWatch getCloudWatchClient() { - return null; - } - - AmazonDynamoDB createDynamoDB(); -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/BasicDynamoDBProvider.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/BasicDynamoDBProvider.java deleted file mode 100644 index b4ee1be74abe..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/BasicDynamoDBProvider.java +++ /dev/null @@ -1,67 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.dynamodb; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; -import com.amazonaws.regions.Regions; -import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; -import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** Basic implementation of {@link AwsClientsProvider} used by default in {@link DynamoDBIO}. */ -public class BasicDynamoDBProvider implements AwsClientsProvider { - private final String accessKey; - private final String secretKey; - private final Regions region; - private final @Nullable String serviceEndpoint; - - BasicDynamoDBProvider( - String accessKey, String secretKey, Regions region, @Nullable String serviceEndpoint) { - checkArgument(accessKey != null, "accessKey can not be null"); - checkArgument(secretKey != null, "secretKey can not be null"); - checkArgument(region != null, "region can not be null"); - this.accessKey = accessKey; - this.secretKey = secretKey; - this.region = region; - this.serviceEndpoint = serviceEndpoint; - } - - private AWSCredentialsProvider getCredentialsProvider() { - return new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKey, secretKey)); - } - - @Override - public AmazonDynamoDB createDynamoDB() { - AmazonDynamoDBClientBuilder clientBuilder = - AmazonDynamoDBClientBuilder.standard().withCredentials(getCredentialsProvider()); - - if (serviceEndpoint == null) { - clientBuilder.withRegion(region); - } else { - clientBuilder.withEndpointConfiguration( - new EndpointConfiguration(serviceEndpoint, region.getName())); - } - - return clientBuilder.build(); - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIO.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIO.java deleted file mode 100644 index e2c04c58b45d..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIO.java +++ /dev/null @@ -1,630 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.dynamodb; - -import static java.util.stream.Collectors.groupingBy; -import static java.util.stream.Collectors.mapping; -import static java.util.stream.Collectors.toList; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; - -import com.amazonaws.regions.Regions; -import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; -import com.amazonaws.services.dynamodbv2.model.AmazonDynamoDBException; -import com.amazonaws.services.dynamodbv2.model.AttributeValue; -import com.amazonaws.services.dynamodbv2.model.BatchWriteItemRequest; -import com.amazonaws.services.dynamodbv2.model.BatchWriteItemResult; -import com.amazonaws.services.dynamodbv2.model.ScanRequest; -import com.amazonaws.services.dynamodbv2.model.ScanResult; -import com.amazonaws.services.dynamodbv2.model.WriteRequest; -import com.google.auto.value.AutoValue; -import java.io.IOException; -import java.io.Serializable; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.function.Predicate; -import java.util.stream.Collectors; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.ListCoder; -import org.apache.beam.sdk.coders.MapCoder; -import org.apache.beam.sdk.coders.SerializableCoder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.Reshuffle; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.util.BackOff; -import org.apache.beam.sdk.util.BackOffUtils; -import org.apache.beam.sdk.util.FluentBackoff; -import org.apache.beam.sdk.util.Sleeper; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PBegin; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet; -import org.apache.http.HttpStatus; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Duration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * {@link PTransform}s to read/write from/to Amazon - * DynamoDB. - * - *

Writing to DynamoDB

- * - *

Example usage: - * - *

{@code
- * PCollection data = ...;
- * data.apply(
- *           DynamoDBIO.write()
- *               .withWriteRequestMapperFn(
- *                   (SerializableFunction>)
- *                       //Transforming your T data into KV
- *                       t -> KV.of(tableName, writeRequest))
- *               .withRetryConfiguration(
- *                    DynamoDBIO.RetryConfiguration.create(5, Duration.standardMinutes(1)))
- *               .withAwsClientsProvider(new BasicDynamoDbProvider(accessKey, secretKey, region));
- * }
- * - *

As a client, you need to provide at least the following things: - * - *

    - *
  • Retry configuration - *
  • Specify AwsClientsProvider. You can pass on the default one BasicDynamoDbProvider - *
  • Mapper function with a table name to map or transform your object into KV - *
- * - * Note: AWS does not allow writing duplicate keys within a single batch operation. If - * primary keys possibly repeat in your stream (i.e. an upsert stream), you may encounter a - * `ValidationError`. To address this you have to provide the key names corresponding to your - * primary key using {@link Write#withDeduplicateKeys(List)}. Based on these keys only the last - * observed element is kept. Nevertheless, if no deduplication keys are provided, identical elements - * are still deduplicated. - * - *

Reading from DynamoDB

- * - *

Example usage: - * - *

{@code
- * PCollection>> output =
- *     pipeline.apply(
- *             DynamoDBIO.>>read()
- *                 .withAwsClientsProvider(new BasicDynamoDBProvider(accessKey, secretKey, region))
- *                 .withScanRequestFn(
- *                     (SerializableFunction)
- *                         input -> new ScanRequest(tableName).withTotalSegments(1))
- *                 .items());
- * }
- * - *

As a client, you need to provide at least the following things: - * - *

    - *
  • Specify AwsClientsProvider. You can pass on the default one BasicDynamoDBProvider - *
  • ScanRequestFn, which you build a ScanRequest object with at least table name and total - * number of segment. Note This number should base on the number of your workers - *
- * - * @deprecated Module beam-sdks-java-io-amazon-web-services is deprecated and will be - * eventually removed. Please migrate to {@link org.apache.beam.sdk.io.aws2.dynamodb.DynamoDBIO} - * in module beam-sdks-java-io-amazon-web-services2. - */ -@Deprecated -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public final class DynamoDBIO { - public static Read read() { - return new AutoValue_DynamoDBIO_Read.Builder().build(); - } - - public static Write write() { - return new AutoValue_DynamoDBIO_Write.Builder() - .setDeduplicateKeys(new ArrayList<>()) - .build(); - } - - /** Read data from DynamoDB and return ScanResult. */ - @AutoValue - public abstract static class Read extends PTransform> { - - abstract @Nullable AwsClientsProvider getAwsClientsProvider(); - - abstract @Nullable SerializableFunction getScanRequestFn(); - - abstract @Nullable Integer getSegmentId(); - - abstract @Nullable SerializableFunction getScanResultMapperFn(); - - abstract @Nullable Coder getCoder(); - - abstract Builder toBuilder(); - - @AutoValue.Builder - abstract static class Builder { - - abstract Builder setAwsClientsProvider(AwsClientsProvider awsClientsProvider); - - abstract Builder setScanRequestFn(SerializableFunction fn); - - abstract Builder setSegmentId(Integer segmentId); - - abstract Builder setScanResultMapperFn( - SerializableFunction scanResultMapperFn); - - abstract Builder setCoder(Coder coder); - - abstract Read build(); - } - - public Read withAwsClientsProvider(AwsClientsProvider awsClientsProvider) { - return toBuilder().setAwsClientsProvider(awsClientsProvider).build(); - } - - public Read withAwsClientsProvider( - String awsAccessKey, String awsSecretKey, Regions region, String serviceEndpoint) { - return withAwsClientsProvider( - new BasicDynamoDBProvider(awsAccessKey, awsSecretKey, region, serviceEndpoint)); - } - - public Read withAwsClientsProvider( - String awsAccessKey, String awsSecretKey, Regions region) { - return withAwsClientsProvider(awsAccessKey, awsSecretKey, region, null); - } - - /** - * Can't pass ScanRequest object directly from client since this object is not full - * serializable. - */ - public Read withScanRequestFn(SerializableFunction fn) { - return toBuilder().setScanRequestFn(fn).build(); - } - - private Read withSegmentId(Integer segmentId) { - checkArgument(segmentId != null, "segmentId can not be null"); - return toBuilder().setSegmentId(segmentId).build(); - } - - public Read withScanResultMapperFn(SerializableFunction scanResultMapperFn) { - checkArgument(scanResultMapperFn != null, "scanResultMapper can not be null"); - return toBuilder().setScanResultMapperFn(scanResultMapperFn).build(); - } - - public Read>> items() { - // safe cast as both mapper and coder are updated accordingly - Read>> self = (Read>>) this; - return self.withScanResultMapperFn(new DynamoDBIO.Read.ItemsMapper()) - .withCoder(ListCoder.of(MapCoder.of(StringUtf8Coder.of(), AttributeValueCoder.of()))); - } - - public Read withCoder(Coder coder) { - checkArgument(coder != null, "coder can not be null"); - return toBuilder().setCoder(coder).build(); - } - - @Override - public PCollection expand(PBegin input) { - LoggerFactory.getLogger(DynamoDBIO.class) - .warn( - "You are using a deprecated IO for DynamoDB. Please migrate to module " - + "'org.apache.beam:beam-sdks-java-io-amazon-web-services2'."); - - checkArgument((getScanRequestFn() != null), "withScanRequestFn() is required"); - checkArgument((getAwsClientsProvider() != null), "withAwsClientsProvider() is required"); - ScanRequest scanRequest = getScanRequestFn().apply(null); - checkArgument( - (scanRequest.getTotalSegments() != null && scanRequest.getTotalSegments() > 0), - "TotalSegments is required with withScanRequestFn() and greater zero"); - - PCollection> splits = - input.apply("Create", Create.of(this)).apply("Split", ParDo.of(new SplitFn<>())); - splits.setCoder(SerializableCoder.of(new TypeDescriptor>() {})); - - PCollection output = - splits - .apply("Reshuffle", Reshuffle.viaRandomKey()) - .apply("Read", ParDo.of(new ReadFn<>())); - output.setCoder(getCoder()); - return output; - } - - /** A {@link DoFn} to split {@link Read} elements by segment id. */ - private static class SplitFn extends DoFn, Read> { - @ProcessElement - public void processElement(@Element Read spec, OutputReceiver> out) { - ScanRequest scanRequest = spec.getScanRequestFn().apply(null); - for (int i = 0; i < scanRequest.getTotalSegments(); i++) { - out.output(spec.withSegmentId(i)); - } - } - } - - /** A {@link DoFn} executing the ScanRequest to read from DynamoDB. */ - private static class ReadFn extends DoFn, T> { - @ProcessElement - public void processElement(@Element Read spec, OutputReceiver out) { - AmazonDynamoDB client = spec.getAwsClientsProvider().createDynamoDB(); - Map lastEvaluatedKey = null; - - do { - ScanRequest scanRequest = spec.getScanRequestFn().apply(null); - scanRequest.setSegment(spec.getSegmentId()); - if (lastEvaluatedKey != null) { - scanRequest.withExclusiveStartKey(lastEvaluatedKey); - } - - ScanResult scanResult = client.scan(scanRequest); - out.output(spec.getScanResultMapperFn().apply(scanResult)); - lastEvaluatedKey = scanResult.getLastEvaluatedKey(); - } while (lastEvaluatedKey != null); // iterate until all records are fetched - } - } - - static final class ItemsMapper - implements SerializableFunction>> { - @Override - public List> apply(@Nullable ScanResult scanResult) { - if (scanResult == null) { - return Collections.emptyList(); - } - return scanResult.getItems(); - } - } - } - - /** - * A POJO encapsulating a configuration for retry behavior when issuing requests to DynamoDB. A - * retry will be attempted until the maxAttempts or maxDuration is exceeded, whichever comes - * first, for any of the following exceptions: - * - *
    - *
  • {@link IOException} - *
- */ - @AutoValue - public abstract static class RetryConfiguration implements Serializable { - private static final Duration DEFAULT_INITIAL_DURATION = Duration.standardSeconds(5); - - @VisibleForTesting - static final RetryPredicate DEFAULT_RETRY_PREDICATE = new DefaultRetryPredicate(); - - abstract int getMaxAttempts(); - - abstract Duration getMaxDuration(); - - abstract Duration getInitialDuration(); - - abstract DynamoDBIO.RetryConfiguration.RetryPredicate getRetryPredicate(); - - abstract DynamoDBIO.RetryConfiguration.Builder builder(); - - public static DynamoDBIO.RetryConfiguration create(int maxAttempts, Duration maxDuration) { - return create(maxAttempts, maxDuration, DEFAULT_INITIAL_DURATION); - } - - static DynamoDBIO.RetryConfiguration create( - int maxAttempts, Duration maxDuration, Duration initialDuration) { - checkArgument(maxAttempts > 0, "maxAttempts should be greater than 0"); - checkArgument( - maxDuration != null && maxDuration.isLongerThan(Duration.ZERO), - "maxDuration should be greater than 0"); - checkArgument( - initialDuration != null && initialDuration.isLongerThan(Duration.ZERO), - "initialDuration should be greater than 0"); - - return new AutoValue_DynamoDBIO_RetryConfiguration.Builder() - .setMaxAttempts(maxAttempts) - .setMaxDuration(maxDuration) - .setInitialDuration(initialDuration) - .setRetryPredicate(DEFAULT_RETRY_PREDICATE) - .build(); - } - - @AutoValue.Builder - abstract static class Builder { - abstract DynamoDBIO.RetryConfiguration.Builder setMaxAttempts(int maxAttempts); - - abstract DynamoDBIO.RetryConfiguration.Builder setMaxDuration(Duration maxDuration); - - abstract DynamoDBIO.RetryConfiguration.Builder setInitialDuration(Duration initialDuration); - - abstract DynamoDBIO.RetryConfiguration.Builder setRetryPredicate( - RetryPredicate retryPredicate); - - abstract DynamoDBIO.RetryConfiguration build(); - } - - /** - * An interface used to control if we retry the BatchWriteItemRequest call when a {@link - * Throwable} occurs. If {@link RetryPredicate#test(Object)} returns true, {@link Write} tries - * to resend the requests to the DynamoDB server if the {@link RetryConfiguration} permits it. - */ - @FunctionalInterface - interface RetryPredicate extends Predicate, Serializable {} - - private static class DefaultRetryPredicate implements RetryPredicate { - private static final ImmutableSet ELIGIBLE_CODES = - ImmutableSet.of(HttpStatus.SC_SERVICE_UNAVAILABLE); - - @Override - public boolean test(Throwable throwable) { - return (throwable instanceof IOException - || (throwable instanceof AmazonDynamoDBException) - || (throwable instanceof AmazonDynamoDBException - && ELIGIBLE_CODES.contains(((AmazonDynamoDBException) throwable).getStatusCode()))); - } - } - } - - /** Write a PCollection data into DynamoDB. */ - @AutoValue - public abstract static class Write extends PTransform, PCollection> { - - abstract @Nullable AwsClientsProvider getAwsClientsProvider(); - - abstract @Nullable RetryConfiguration getRetryConfiguration(); - - abstract @Nullable SerializableFunction> getWriteItemMapperFn(); - - abstract List getDeduplicateKeys(); - - abstract Builder builder(); - - @AutoValue.Builder - abstract static class Builder { - - abstract Builder setAwsClientsProvider(AwsClientsProvider awsClientsProvider); - - abstract Builder setRetryConfiguration(RetryConfiguration retryConfiguration); - - abstract Builder setWriteItemMapperFn( - SerializableFunction> writeItemMapperFn); - - abstract Builder setDeduplicateKeys(List deduplicateKeys); - - abstract Write build(); - } - - public Write withAwsClientsProvider(AwsClientsProvider awsClientsProvider) { - return builder().setAwsClientsProvider(awsClientsProvider).build(); - } - - public Write withAwsClientsProvider( - String awsAccessKey, String awsSecretKey, Regions region, String serviceEndpoint) { - return withAwsClientsProvider( - new BasicDynamoDBProvider(awsAccessKey, awsSecretKey, region, serviceEndpoint)); - } - - public Write withAwsClientsProvider( - String awsAccessKey, String awsSecretKey, Regions region) { - return withAwsClientsProvider(awsAccessKey, awsSecretKey, region, null); - } - - /** - * Provides configuration to retry a failed request to publish a set of records to DynamoDB. - * Users should consider that retrying might compound the underlying problem which caused the - * initial failure. Users should also be aware that once retrying is exhausted the error is - * surfaced to the runner which may then opt to retry the current partition in entirety - * or abort if the max number of retries of the runner is completed. Retrying uses an - * exponential backoff algorithm, with minimum backoff of 5 seconds and then surfacing the error - * once the maximum number of retries or maximum configuration duration is exceeded. - * - *

Example use: - * - *

{@code
-     * DynamoDBIO.write()
-     *   .withRetryConfiguration(DynamoDBIO.RetryConfiguration.create(5, Duration.standardMinutes(1))
-     *   ...
-     * }
- * - * @param retryConfiguration the rules which govern the retry behavior - * @return the {@link DynamoDBIO.Write} with retrying configured - */ - public Write withRetryConfiguration(RetryConfiguration retryConfiguration) { - checkArgument(retryConfiguration != null, "retryConfiguration is required"); - return builder().setRetryConfiguration(retryConfiguration).build(); - } - - public Write withWriteRequestMapperFn( - SerializableFunction> writeItemMapperFn) { - return builder().setWriteItemMapperFn(writeItemMapperFn).build(); - } - - public Write withDeduplicateKeys(List deduplicateKeys) { - return builder().setDeduplicateKeys(deduplicateKeys).build(); - } - - @Override - public PCollection expand(PCollection input) { - LoggerFactory.getLogger(DynamoDBIO.class) - .warn( - "You are using a deprecated IO for DynamoDB. Please migrate to module " - + "'org.apache.beam:beam-sdks-java-io-amazon-web-services2'."); - - return input.apply(ParDo.of(new WriteFn<>(this))); - } - - static class WriteFn extends DoFn { - @VisibleForTesting - static final String RETRY_ERROR_LOG = "Error writing items to DynamoDB [attempts:{}]: {}"; - - private static final String RESUME_ERROR_LOG = - "Error writing remaining unprocessed items to DynamoDB: {}"; - - private static final String ERROR_NO_RETRY = - "Error writing to DynamoDB. No attempt made to retry"; - private static final String ERROR_RETRIES_EXCEEDED = - "Error writing to DynamoDB after %d attempt(s). No more attempts allowed"; - private static final String ERROR_UNPROCESSED_ITEMS = - "Error writing to DynamoDB. Unprocessed items remaining"; - - private transient FluentBackoff resumeBackoff; // resume from partial failures (unlimited) - private transient FluentBackoff retryBackoff; // retry erroneous calls (default: none) - - private static final Logger LOG = LoggerFactory.getLogger(WriteFn.class); - private static final Counter DYNAMO_DB_WRITE_FAILURES = - Metrics.counter(WriteFn.class, "DynamoDB_Write_Failures"); - - private static final int BATCH_SIZE = 25; - private transient AmazonDynamoDB client; - private final DynamoDBIO.Write spec; - private Map>, KV> batch; - - WriteFn(DynamoDBIO.Write spec) { - this.spec = spec; - } - - @Setup - public void setup() { - client = spec.getAwsClientsProvider().createDynamoDB(); - resumeBackoff = FluentBackoff.DEFAULT; // resume from partial failures (unlimited) - retryBackoff = FluentBackoff.DEFAULT.withMaxRetries(0); // retry on errors (default: none) - - RetryConfiguration retryConfig = spec.getRetryConfiguration(); - if (retryConfig != null) { - resumeBackoff = resumeBackoff.withInitialBackoff(retryConfig.getInitialDuration()); - retryBackoff = - retryBackoff - .withMaxRetries(retryConfig.getMaxAttempts() - 1) - .withInitialBackoff(retryConfig.getInitialDuration()) - .withMaxCumulativeBackoff(retryConfig.getMaxDuration()); - } - } - - @StartBundle - public void startBundle(StartBundleContext context) { - batch = new HashMap<>(); - } - - @ProcessElement - public void processElement(ProcessContext context) throws Exception { - final KV writeRequest = - spec.getWriteItemMapperFn().apply(context.element()); - batch.put( - KV.of(writeRequest.getKey(), extractDeduplicateKeyValues(writeRequest.getValue())), - writeRequest); - if (batch.size() >= BATCH_SIZE) { - flushBatch(); - } - } - - private Map extractDeduplicateKeyValues(WriteRequest request) { - List deduplicationKeys = spec.getDeduplicateKeys(); - Map attributes = Collections.emptyMap(); - - if (request.getPutRequest() != null) { - attributes = request.getPutRequest().getItem(); - } else if (request.getDeleteRequest() != null) { - attributes = request.getDeleteRequest().getKey(); - } - - if (attributes.isEmpty() || deduplicationKeys.isEmpty()) { - return attributes; - } - - return attributes.entrySet().stream() - .filter(entry -> deduplicationKeys.contains(entry.getKey())) - .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); - } - - @FinishBundle - public void finishBundle(FinishBundleContext context) throws Exception { - flushBatch(); - } - - private void flushBatch() throws IOException, InterruptedException { - if (batch.isEmpty()) { - return; - } - try { - // Group values KV by tableName - // Note: The original order of arrival is lost reading the map entries. - Map> writesPerTable = - batch.values().stream() - .collect(groupingBy(KV::getKey, mapping(KV::getValue, toList()))); - - // Backoff used to resume from partial failures - BackOff resume = resumeBackoff.backoff(); - do { - BatchWriteItemRequest batchRequest = new BatchWriteItemRequest(writesPerTable); - // If unprocessed items remain, we have to resume the operation (with backoff) - writesPerTable = writeWithRetries(batchRequest).getUnprocessedItems(); - } while (!writesPerTable.isEmpty() && BackOffUtils.next(Sleeper.DEFAULT, resume)); - - if (!writesPerTable.isEmpty()) { - DYNAMO_DB_WRITE_FAILURES.inc(); - LOG.error(RESUME_ERROR_LOG, writesPerTable); - throw new IOException(ERROR_UNPROCESSED_ITEMS); - } - } finally { - batch.clear(); - } - } - - /** - * Write batch of items to DynamoDB and potentially retry in case of exceptions. Though, in - * case of a partial failure, unprocessed items remain but the request succeeds. This has to - * be handled by the caller. - */ - private BatchWriteItemResult writeWithRetries(BatchWriteItemRequest request) - throws IOException, InterruptedException { - BackOff backoff = retryBackoff.backoff(); - Exception lastThrown; - - int attempt = 0; - do { - attempt++; - try { - return client.batchWriteItem(request); - } catch (Exception ex) { - lastThrown = ex; - } - } while (canRetry(lastThrown) && BackOffUtils.next(Sleeper.DEFAULT, backoff)); - - DYNAMO_DB_WRITE_FAILURES.inc(); - LOG.warn(RETRY_ERROR_LOG, attempt, request.getRequestItems()); - throw new IOException( - canRetry(lastThrown) ? String.format(ERROR_RETRIES_EXCEEDED, attempt) : ERROR_NO_RETRY, - lastThrown); - } - - private boolean canRetry(Exception ex) { - return spec.getRetryConfiguration() != null - && spec.getRetryConfiguration().getRetryPredicate().test(ex); - } - - @Teardown - public void tearDown() { - if (client != null) { - client.shutdown(); - client = null; - } - } - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/package-info.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/package-info.java deleted file mode 100644 index 0a7ea559fb9b..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/dynamodb/package-info.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** Defines IO connectors for Amazon Web Services DynamoDB. */ -package org.apache.beam.sdk.io.aws.dynamodb; diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsModule.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsModule.java deleted file mode 100644 index 326758f1d1bb..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsModule.java +++ /dev/null @@ -1,390 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.options; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.amazonaws.ClientConfiguration; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.BasicSessionCredentials; -import com.amazonaws.auth.ClasspathPropertiesFileCredentialsProvider; -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; -import com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper; -import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; -import com.amazonaws.auth.PropertiesFileCredentialsProvider; -import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider; -import com.amazonaws.auth.SystemPropertiesCredentialsProvider; -import com.amazonaws.auth.profile.ProfileCredentialsProvider; -import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams; -import com.amazonaws.services.s3.model.SSECustomerKey; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonSerializer; -import com.fasterxml.jackson.databind.Module; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; -import com.fasterxml.jackson.databind.annotation.JsonSerialize; -import com.fasterxml.jackson.databind.jsontype.TypeDeserializer; -import com.fasterxml.jackson.databind.jsontype.TypeSerializer; -import com.fasterxml.jackson.databind.module.SimpleModule; -import com.google.auto.service.AutoService; -import java.io.IOException; -import java.util.Map; -import org.apache.beam.repackaged.core.org.apache.commons.lang3.reflect.FieldUtils; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet; - -/** - * A Jackson {@link Module} that registers a {@link JsonSerializer} and {@link JsonDeserializer} for - * {@link AWSCredentialsProvider} and some subclasses. The serialized form is a JSON map. - * - *

It also adds serializers for S3 encryption objects {@link SSECustomerKey} and {@link - * SSEAwsKeyManagementParams}. - */ -@AutoService(Module.class) -public class AwsModule extends SimpleModule { - - private static final String AWS_ACCESS_KEY_ID = "awsAccessKeyId"; - private static final String AWS_SECRET_KEY = "awsSecretKey"; - private static final String SESSION_TOKEN = "sessionToken"; - private static final String CREDENTIALS_FILE_PATH = "credentialsFilePath"; - public static final String CLIENT_EXECUTION_TIMEOUT = "clientExecutionTimeout"; - public static final String CONNECTION_MAX_IDLE_TIME = "connectionMaxIdleTime"; - public static final String CONNECTION_TIMEOUT = "connectionTimeout"; - public static final String CONNECTION_TIME_TO_LIVE = "connectionTimeToLive"; - public static final String MAX_CONNECTIONS = "maxConnections"; - public static final String REQUEST_TIMEOUT = "requestTimeout"; - public static final String SOCKET_TIMEOUT = "socketTimeout"; - public static final String PROXY_HOST = "proxyHost"; - public static final String PROXY_PORT = "proxyPort"; - public static final String PROXY_USERNAME = "proxyUsername"; - public static final String PROXY_PASSWORD = "proxyPassword"; - private static final String ROLE_ARN = "roleArn"; - private static final String ROLE_SESSION_NAME = "roleSessionName"; - - @SuppressWarnings({"nullness"}) - public AwsModule() { - super("AwsModule"); - setMixInAnnotation(AWSCredentialsProvider.class, AWSCredentialsProviderMixin.class); - setMixInAnnotation(SSECustomerKey.class, SSECustomerKeyMixin.class); - setMixInAnnotation(SSEAwsKeyManagementParams.class, SSEAwsKeyManagementParamsMixin.class); - setMixInAnnotation(ClientConfiguration.class, AwsHttpClientConfigurationMixin.class); - } - - /** A mixin to add Jackson annotations to {@link AWSCredentialsProvider}. */ - @JsonDeserialize(using = AWSCredentialsProviderDeserializer.class) - @JsonSerialize(using = AWSCredentialsProviderSerializer.class) - @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY) - private static class AWSCredentialsProviderMixin {} - - private static class AWSCredentialsProviderDeserializer - extends JsonDeserializer { - - @Override - public AWSCredentialsProvider deserialize(JsonParser jsonParser, DeserializationContext context) - throws IOException { - return context.readValue(jsonParser, AWSCredentialsProvider.class); - } - - @Override - public AWSCredentialsProvider deserializeWithType( - JsonParser jsonParser, DeserializationContext context, TypeDeserializer typeDeserializer) - throws IOException { - Map asMap = - checkNotNull( - jsonParser.readValueAs(new TypeReference>() {}), - "Serialized AWS credentials provider is null"); - - String typeNameKey = typeDeserializer.getPropertyName(); - String typeName = getNotNull(asMap, typeNameKey, "unknown"); - - if (hasName(AWSStaticCredentialsProvider.class, typeName)) { - boolean isSession = asMap.containsKey(SESSION_TOKEN); - if (isSession) { - return new AWSStaticCredentialsProvider( - new BasicSessionCredentials( - getNotNull(asMap, AWS_ACCESS_KEY_ID, typeName), - getNotNull(asMap, AWS_SECRET_KEY, typeName), - getNotNull(asMap, SESSION_TOKEN, typeName))); - } else { - return new AWSStaticCredentialsProvider( - new BasicAWSCredentials( - getNotNull(asMap, AWS_ACCESS_KEY_ID, typeName), - getNotNull(asMap, AWS_SECRET_KEY, typeName))); - } - } else if (hasName(PropertiesFileCredentialsProvider.class, typeName)) { - return new PropertiesFileCredentialsProvider( - getNotNull(asMap, CREDENTIALS_FILE_PATH, typeName)); - } else if (hasName(ClasspathPropertiesFileCredentialsProvider.class, typeName)) { - return new ClasspathPropertiesFileCredentialsProvider( - getNotNull(asMap, CREDENTIALS_FILE_PATH, typeName)); - } else if (hasName(DefaultAWSCredentialsProviderChain.class, typeName)) { - return new DefaultAWSCredentialsProviderChain(); - } else if (hasName(EnvironmentVariableCredentialsProvider.class, typeName)) { - return new EnvironmentVariableCredentialsProvider(); - } else if (hasName(SystemPropertiesCredentialsProvider.class, typeName)) { - return new SystemPropertiesCredentialsProvider(); - } else if (hasName(ProfileCredentialsProvider.class, typeName)) { - return new ProfileCredentialsProvider(); - } else if (hasName(EC2ContainerCredentialsProviderWrapper.class, typeName)) { - return new EC2ContainerCredentialsProviderWrapper(); - } else if (hasName(STSAssumeRoleSessionCredentialsProvider.class, typeName)) { - return new STSAssumeRoleSessionCredentialsProvider.Builder( - getNotNull(asMap, ROLE_ARN, typeName), - getNotNull(asMap, ROLE_SESSION_NAME, typeName)) - .build(); - } else { - throw new IOException( - String.format("AWS credential provider type '%s' is not supported", typeName)); - } - } - - @SuppressWarnings({"nullness"}) - private String getNotNull(Map map, String key, String typeName) { - return checkNotNull( - map.get(key), "AWS credentials provider type '%s' is missing '%s'", typeName, key); - } - - private boolean hasName(Class clazz, String typeName) { - return clazz.getSimpleName().equals(typeName); - } - } - - private static class AWSCredentialsProviderSerializer - extends JsonSerializer { - // These providers are singletons, so don't require any serialization, other than type. - private static final ImmutableSet SINGLETON_CREDENTIAL_PROVIDERS = - ImmutableSet.of( - DefaultAWSCredentialsProviderChain.class, - EnvironmentVariableCredentialsProvider.class, - SystemPropertiesCredentialsProvider.class, - ProfileCredentialsProvider.class, - EC2ContainerCredentialsProviderWrapper.class); - - @Override - public void serialize( - AWSCredentialsProvider credentialsProvider, - JsonGenerator jsonGenerator, - SerializerProvider serializers) - throws IOException { - serializers.defaultSerializeValue(credentialsProvider, jsonGenerator); - } - - @Override - public void serializeWithType( - AWSCredentialsProvider credentialsProvider, - JsonGenerator jsonGenerator, - SerializerProvider serializers, - TypeSerializer typeSerializer) - throws IOException { - // BEAM-11958 Use deprecated Jackson APIs to be compatible with older versions of jackson - typeSerializer.writeTypePrefixForObject(credentialsProvider, jsonGenerator); - - Class providerClass = credentialsProvider.getClass(); - if (providerClass.equals(AWSStaticCredentialsProvider.class)) { - AWSCredentials credentials = credentialsProvider.getCredentials(); - if (credentials.getClass().equals(BasicSessionCredentials.class)) { - BasicSessionCredentials sessionCredentials = (BasicSessionCredentials) credentials; - jsonGenerator.writeStringField(AWS_ACCESS_KEY_ID, sessionCredentials.getAWSAccessKeyId()); - jsonGenerator.writeStringField(AWS_SECRET_KEY, sessionCredentials.getAWSSecretKey()); - jsonGenerator.writeStringField(SESSION_TOKEN, sessionCredentials.getSessionToken()); - } else { - jsonGenerator.writeStringField(AWS_ACCESS_KEY_ID, credentials.getAWSAccessKeyId()); - jsonGenerator.writeStringField(AWS_SECRET_KEY, credentials.getAWSSecretKey()); - } - } else if (providerClass.equals(PropertiesFileCredentialsProvider.class)) { - String filePath = (String) readField(credentialsProvider, CREDENTIALS_FILE_PATH); - jsonGenerator.writeStringField(CREDENTIALS_FILE_PATH, filePath); - } else if (providerClass.equals(ClasspathPropertiesFileCredentialsProvider.class)) { - String filePath = (String) readField(credentialsProvider, CREDENTIALS_FILE_PATH); - jsonGenerator.writeStringField(CREDENTIALS_FILE_PATH, filePath); - } else if (providerClass.equals(STSAssumeRoleSessionCredentialsProvider.class)) { - String arn = (String) readField(credentialsProvider, ROLE_ARN); - String sessionName = (String) readField(credentialsProvider, ROLE_SESSION_NAME); - jsonGenerator.writeStringField(ROLE_ARN, arn); - jsonGenerator.writeStringField(ROLE_SESSION_NAME, sessionName); - } else if (!SINGLETON_CREDENTIAL_PROVIDERS.contains(providerClass)) { - throw new IllegalArgumentException( - "Unsupported AWS credentials provider type " + providerClass); - } - // BEAM-11958 Use deprecated Jackson APIs to be compatible with older versions of jackson - typeSerializer.writeTypeSuffixForObject(credentialsProvider, jsonGenerator); - } - - private Object readField(AWSCredentialsProvider provider, String fieldName) throws IOException { - try { - return FieldUtils.readField(provider, fieldName, true); - } catch (IllegalArgumentException | IllegalAccessException e) { - throw new IOException( - String.format( - "Failed to access private field '%s' of AWS credential provider type '%s' with reflection", - fieldName, provider.getClass().getSimpleName()), - e); - } - } - } - - @SuppressWarnings({"nullness"}) - private static String getNotNull(Map map, String key, Class clazz) { - return checkNotNull(map.get(key), "`%s` required in serialized %s", key, clazz.getSimpleName()); - } - - /** A mixin to add Jackson annotations to {@link SSECustomerKey}. */ - @JsonDeserialize(using = SSECustomerKeyDeserializer.class) - private static class SSECustomerKeyMixin {} - - private static class SSECustomerKeyDeserializer extends JsonDeserializer { - @Override - public SSECustomerKey deserialize(JsonParser parser, DeserializationContext context) - throws IOException { - Map asMap = - checkNotNull( - parser.readValueAs(new TypeReference>() {}), - "Serialized SSECustomerKey is null"); - - SSECustomerKey sseCustomerKey = - new SSECustomerKey(getNotNull(asMap, "key", SSECustomerKey.class)); - final String algorithm = asMap.get("algorithm"); - final String md5 = asMap.get("md5"); - if (algorithm != null) { - sseCustomerKey.setAlgorithm(algorithm); - } - if (md5 != null) { - sseCustomerKey.setMd5(md5); - } - return sseCustomerKey; - } - } - - /** A mixin to add Jackson annotations to {@link SSEAwsKeyManagementParams}. */ - @JsonDeserialize(using = SSEAwsKeyManagementParamsDeserializer.class) - private static class SSEAwsKeyManagementParamsMixin {} - - private static class SSEAwsKeyManagementParamsDeserializer - extends JsonDeserializer { - @Override - public SSEAwsKeyManagementParams deserialize(JsonParser parser, DeserializationContext context) - throws IOException { - Map asMap = - checkNotNull( - parser.readValueAs(new TypeReference>() {}), - "Serialized SSEAwsKeyManagementParams is null"); - - return new SSEAwsKeyManagementParams( - getNotNull(asMap, "awsKmsKeyId", SSEAwsKeyManagementParams.class)); - } - } - - /** A mixin to add Jackson annotations to {@link ClientConfiguration}. */ - @JsonSerialize(using = AwsHttpClientConfigurationSerializer.class) - @JsonDeserialize(using = AwsHttpClientConfigurationDeserializer.class) - private static class AwsHttpClientConfigurationMixin {} - - private static class AwsHttpClientConfigurationDeserializer - extends JsonDeserializer { - @Override - public ClientConfiguration deserialize(JsonParser jsonParser, DeserializationContext context) - throws IOException { - Map map = - checkNotNull( - jsonParser.readValueAs(new TypeReference>() {}), - "Serialized ClientConfiguration is null"); - - ClientConfiguration clientConfiguration = new ClientConfiguration(); - - if (map.containsKey(PROXY_HOST)) { - clientConfiguration.setProxyHost((String) map.get(PROXY_HOST)); - } - if (map.containsKey(PROXY_PORT)) { - clientConfiguration.setProxyPort(((Number) map.get(PROXY_PORT)).intValue()); - } - if (map.containsKey(PROXY_USERNAME)) { - clientConfiguration.setProxyUsername((String) map.get(PROXY_USERNAME)); - } - if (map.containsKey(PROXY_PASSWORD)) { - clientConfiguration.setProxyPassword((String) map.get(PROXY_PASSWORD)); - } - if (map.containsKey(CLIENT_EXECUTION_TIMEOUT)) { - clientConfiguration.setClientExecutionTimeout( - ((Number) map.get(CLIENT_EXECUTION_TIMEOUT)).intValue()); - } - if (map.containsKey(CONNECTION_MAX_IDLE_TIME)) { - clientConfiguration.setConnectionMaxIdleMillis( - ((Number) map.get(CONNECTION_MAX_IDLE_TIME)).longValue()); - } - if (map.containsKey(CONNECTION_TIMEOUT)) { - clientConfiguration.setConnectionTimeout(((Number) map.get(CONNECTION_TIMEOUT)).intValue()); - } - if (map.containsKey(CONNECTION_TIME_TO_LIVE)) { - clientConfiguration.setConnectionTTL( - ((Number) map.get(CONNECTION_TIME_TO_LIVE)).longValue()); - } - if (map.containsKey(MAX_CONNECTIONS)) { - clientConfiguration.setMaxConnections(((Number) map.get(MAX_CONNECTIONS)).intValue()); - } - if (map.containsKey(REQUEST_TIMEOUT)) { - clientConfiguration.setRequestTimeout(((Number) map.get(REQUEST_TIMEOUT)).intValue()); - } - if (map.containsKey(SOCKET_TIMEOUT)) { - clientConfiguration.setSocketTimeout(((Number) map.get(SOCKET_TIMEOUT)).intValue()); - } - return clientConfiguration; - } - } - - private static class AwsHttpClientConfigurationSerializer - extends JsonSerializer { - - @Override - public void serialize( - ClientConfiguration clientConfiguration, - JsonGenerator jsonGenerator, - SerializerProvider serializer) - throws IOException { - - jsonGenerator.writeStartObject(); - jsonGenerator.writeObjectField(PROXY_HOST /*string*/, clientConfiguration.getProxyHost()); - jsonGenerator.writeObjectField(PROXY_PORT /*int*/, clientConfiguration.getProxyPort()); - jsonGenerator.writeObjectField( - PROXY_USERNAME /*string*/, clientConfiguration.getProxyUsername()); - jsonGenerator.writeObjectField( - PROXY_PASSWORD /*string*/, clientConfiguration.getProxyPassword()); - jsonGenerator.writeObjectField( - CLIENT_EXECUTION_TIMEOUT /*int*/, clientConfiguration.getClientExecutionTimeout()); - jsonGenerator.writeObjectField( - CONNECTION_MAX_IDLE_TIME /*long*/, clientConfiguration.getConnectionMaxIdleMillis()); - jsonGenerator.writeObjectField( - CONNECTION_TIMEOUT /*int*/, clientConfiguration.getConnectionTimeout()); - jsonGenerator.writeObjectField( - CONNECTION_TIME_TO_LIVE /*long*/, clientConfiguration.getConnectionTTL()); - jsonGenerator.writeObjectField( - MAX_CONNECTIONS /*int*/, clientConfiguration.getMaxConnections()); - jsonGenerator.writeObjectField( - REQUEST_TIMEOUT /*int*/, clientConfiguration.getRequestTimeout()); - jsonGenerator.writeObjectField( - SOCKET_TIMEOUT /*int*/, clientConfiguration.getSocketTimeout()); - jsonGenerator.writeEndObject(); - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsOptions.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsOptions.java deleted file mode 100644 index 42e3a5614b09..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsOptions.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.options; - -import com.amazonaws.ClientConfiguration; -import com.amazonaws.SdkClientException; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; -import com.amazonaws.regions.DefaultAwsRegionProviderChain; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** - * Options used to configure Amazon Web Services specific options such as credentials and region. - */ -public interface AwsOptions extends PipelineOptions { - - /** AWS region used by the AWS client. */ - @Description("AWS region used by the AWS client") - @Default.InstanceFactory(AwsRegionFactory.class) - String getAwsRegion(); - - void setAwsRegion(String value); - - /** Attempt to load default region. */ - class AwsRegionFactory implements DefaultValueFactory<@Nullable String> { - @Override - @Nullable - public String create(PipelineOptions options) { - try { - return new DefaultAwsRegionProviderChain().getRegion(); - } catch (SdkClientException e) { - return null; - } - } - } - - /** The AWS service endpoint used by the AWS client. */ - @Description("AWS service endpoint used by the AWS client") - String getAwsServiceEndpoint(); - - void setAwsServiceEndpoint(String value); - - /** - * The credential instance that should be used to authenticate against AWS services. The option - * value must contain a "@type" field and an AWS Credentials Provider class as the field value. - * Refer to {@link DefaultAWSCredentialsProviderChain} Javadoc for usage help. - * - *

For example, to specify the AWS key ID and secret, specify the following: - * {"@type" : "AWSStaticCredentialsProvider", "awsAccessKeyId" : "key_id_value", - * "awsSecretKey" : "secret_value"} - * - */ - @Description( - "The credential instance that should be used to authenticate " - + "against AWS services. The option value must contain \"@type\" field " - + "and an AWS Credentials Provider class name as the field value. " - + "Refer to DefaultAWSCredentialsProviderChain Javadoc for usage help. " - + "For example, to specify the AWS key ID and secret, specify the following: " - + "{\"@type\": \"AWSStaticCredentialsProvider\", " - + "\"awsAccessKeyId\":\"\", \"awsSecretKey\":\"\"}") - @Default.InstanceFactory(AwsUserCredentialsFactory.class) - AWSCredentialsProvider getAwsCredentialsProvider(); - - void setAwsCredentialsProvider(AWSCredentialsProvider value); - - /** Attempts to load AWS credentials. */ - class AwsUserCredentialsFactory implements DefaultValueFactory { - - @Override - public AWSCredentialsProvider create(PipelineOptions options) { - return DefaultAWSCredentialsProviderChain.getInstance(); - } - } - - /** - * The client configuration instance that should be used to configure AWS service clients. Please - * note that the configuration deserialization only allows one to specify proxy settings. Please - * use AwsHttpClientConfiguration's client configuration to set a wider range of options. - * - *

For example, to specify the proxy host, port, username and password, specify the following: - * - * --clientConfiguration={ - * "proxyHost":"hostname", - * "proxyPort":1234, - * "proxyUsername":"username", - * "proxyPassword":"password" - * } - * - * - * @return - */ - @Description( - "The client configuration instance that should be used to configure AWS service " - + "clients. Please note that the configuration deserialization only allows one to specify " - + "proxy settings. For example, to specify the proxy host, port, username and password, " - + "specify the following: --clientConfiguration={\"proxyHost\":\"hostname\",\"proxyPort\":1234," - + "\"proxyUsername\":\"username\",\"proxyPassword\":\"password\"}") - @Default.InstanceFactory(ClientConfigurationFactory.class) - ClientConfiguration getClientConfiguration(); - - void setClientConfiguration(ClientConfiguration clientConfiguration); - - /** Default AWS client configuration. */ - class ClientConfigurationFactory implements DefaultValueFactory { - - @Override - public ClientConfiguration create(PipelineOptions options) { - return new ClientConfiguration(); - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsPipelineOptionsRegistrar.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsPipelineOptionsRegistrar.java deleted file mode 100644 index 3dad9fd611cb..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/AwsPipelineOptionsRegistrar.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.options; - -import com.google.auto.service.AutoService; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.options.PipelineOptionsRegistrar; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; - -/** A registrar containing the default AWS options. */ -@AutoService(PipelineOptionsRegistrar.class) -public class AwsPipelineOptionsRegistrar implements PipelineOptionsRegistrar { - - @Override - public Iterable> getPipelineOptions() { - return ImmutableList.>builder() - .add(AwsOptions.class) - .add(S3Options.class) - .build(); - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3ClientBuilderFactory.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3ClientBuilderFactory.java deleted file mode 100644 index ce6eaa57cd8e..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3ClientBuilderFactory.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.options; - -import com.amazonaws.services.s3.AmazonS3ClientBuilder; - -/** Construct AmazonS3ClientBuilder from S3 pipeline options. */ -public interface S3ClientBuilderFactory { - AmazonS3ClientBuilder createBuilder(S3Options s3Options); -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3Options.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3Options.java deleted file mode 100644 index e9979b5c99ea..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/S3Options.java +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.options; - -import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams; -import com.amazonaws.services.s3.model.SSECustomerKey; -import org.apache.beam.sdk.io.aws.s3.DefaultS3ClientBuilderFactory; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.DefaultValueFactory; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptions; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** Options used to configure Amazon Web Services S3. */ -public interface S3Options extends AwsOptions { - - @Description("AWS S3 storage class used for creating S3 objects") - @Default.String("STANDARD") - String getS3StorageClass(); - - void setS3StorageClass(String value); - - @Description( - "Size of S3 upload chunks; max upload object size is this value multiplied by 10000;" - + "default is 64MB, or 5MB in memory-constrained environments. Must be at least 5MB.") - @Default.InstanceFactory(S3UploadBufferSizeBytesFactory.class) - Integer getS3UploadBufferSizeBytes(); - - void setS3UploadBufferSizeBytes(Integer value); - - @Description("Thread pool size, limiting max concurrent S3 operations") - @Default.Integer(50) - int getS3ThreadPoolSize(); - - void setS3ThreadPoolSize(int value); - - @Description("Algorithm for SSE-S3 encryption, e.g. AES256.") - @Nullable - String getSSEAlgorithm(); - - void setSSEAlgorithm(String value); - - @Description( - "SSE key for SSE-C encryption, e.g. a base64 encoded key and the algorithm." - + "To specify on the command-line, represent the value as a JSON object. For example:" - + " --SSECustomerKey={\"key\": \"86glyTlCN...\", \"algorithm\": \"AES256\"}") - @Nullable - SSECustomerKey getSSECustomerKey(); - - void setSSECustomerKey(SSECustomerKey value); - - @Description( - "KMS key id for SSE-KMS encryption, e.g. \"arn:aws:kms:...\"." - + "To specify on the command-line, represent the value as a JSON object. For example:" - + " --SSEAwsKeyManagementParams={\"awsKmsKeyId\": \"arn:aws:kms:...\"}") - @Nullable - SSEAwsKeyManagementParams getSSEAwsKeyManagementParams(); - - void setSSEAwsKeyManagementParams(SSEAwsKeyManagementParams value); - - @Description( - "Set to true to use an S3 Bucket Key for object encryption with server-side " - + "encryption using AWS KMS (SSE-KMS)") - @Default.Boolean(false) - boolean getBucketKeyEnabled(); - - void setBucketKeyEnabled(boolean value); - - @Description( - "Factory class that should be created and used to create a builder of AmazonS3 client." - + "Override the default value if you need a S3 client with custom properties, like path style access, etc.") - @Default.Class(DefaultS3ClientBuilderFactory.class) - Class getS3ClientFactoryClass(); - - void setS3ClientFactoryClass(Class s3ClientFactoryClass); - - /** - * Provide the default s3 upload buffer size in bytes: 64MB if more than 512MB in RAM are - * available and 5MB otherwise. - */ - class S3UploadBufferSizeBytesFactory implements DefaultValueFactory { - public static final int MINIMUM_UPLOAD_BUFFER_SIZE_BYTES = 5_242_880; - - @Override - public Integer create(PipelineOptions options) { - return Runtime.getRuntime().maxMemory() < 536_870_912 - ? MINIMUM_UPLOAD_BUFFER_SIZE_BYTES - : 67_108_864; - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/package-info.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/package-info.java deleted file mode 100644 index fc79c546706a..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/options/package-info.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** - * Defines {@link org.apache.beam.sdk.options.PipelineOptions} for configuring pipeline execution - * for Amazon Web Services components. - */ -package org.apache.beam.sdk.io.aws.options; diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/DefaultS3ClientBuilderFactory.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/DefaultS3ClientBuilderFactory.java deleted file mode 100644 index fa96d79b63a7..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/DefaultS3ClientBuilderFactory.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import org.apache.beam.sdk.io.aws.options.S3ClientBuilderFactory; -import org.apache.beam.sdk.io.aws.options.S3Options; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; - -/** - * Construct AmazonS3ClientBuilder with default values of S3 client properties like path style - * access, accelerated mode, etc. - */ -public class DefaultS3ClientBuilderFactory implements S3ClientBuilderFactory { - - @Override - public AmazonS3ClientBuilder createBuilder(S3Options s3Options) { - AmazonS3ClientBuilder builder = - AmazonS3ClientBuilder.standard().withCredentials(s3Options.getAwsCredentialsProvider()); - - if (s3Options.getClientConfiguration() != null) { - builder = builder.withClientConfiguration(s3Options.getClientConfiguration()); - } - - if (!Strings.isNullOrEmpty(s3Options.getAwsServiceEndpoint())) { - builder = - builder.withEndpointConfiguration( - new AwsClientBuilder.EndpointConfiguration( - s3Options.getAwsServiceEndpoint(), s3Options.getAwsRegion())); - } else if (!Strings.isNullOrEmpty(s3Options.getAwsRegion())) { - builder = builder.withRegion(s3Options.getAwsRegion()); - } - return builder; - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/DefaultS3FileSystemSchemeRegistrar.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/DefaultS3FileSystemSchemeRegistrar.java deleted file mode 100644 index 0988309cb0e2..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/DefaultS3FileSystemSchemeRegistrar.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.google.auto.service.AutoService; -import javax.annotation.Nonnull; -import org.apache.beam.sdk.io.aws.options.S3Options; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; - -/** Registers the "s3" uri schema to be handled by {@link S3FileSystem}. */ -@AutoService(S3FileSystemSchemeRegistrar.class) -public class DefaultS3FileSystemSchemeRegistrar implements S3FileSystemSchemeRegistrar { - - @Override - public Iterable fromOptions(@Nonnull PipelineOptions options) { - checkNotNull(options, "Expect the runner have called FileSystems.setDefaultPipelineOptions()."); - return ImmutableList.of( - S3FileSystemConfiguration.fromS3Options(options.as(S3Options.class)).build()); - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystem.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystem.java deleted file mode 100644 index 75d66c46478a..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystem.java +++ /dev/null @@ -1,671 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import static org.apache.beam.sdk.io.FileSystemUtils.wildcardToRegexp; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; - -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.AmazonS3Exception; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.CopyObjectResult; -import com.amazonaws.services.s3.model.CopyPartRequest; -import com.amazonaws.services.s3.model.CopyPartResult; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.S3ObjectSummary; -import com.google.auto.value.AutoValue; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.nio.channels.ReadableByteChannel; -import java.nio.channels.WritableByteChannel; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Date; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.concurrent.Callable; -import java.util.concurrent.CompletionStage; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.regex.Pattern; -import java.util.stream.Collectors; -import org.apache.beam.sdk.io.FileSystem; -import org.apache.beam.sdk.io.aws.options.S3Options; -import org.apache.beam.sdk.io.fs.CreateOptions; -import org.apache.beam.sdk.io.fs.MatchResult; -import org.apache.beam.sdk.io.fs.MoveOptions; -import org.apache.beam.sdk.metrics.Lineage; -import org.apache.beam.sdk.util.MoreFutures; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Supplier; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Suppliers; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ArrayListMultimap; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Multimap; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.ListeningExecutorService; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.MoreExecutors; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.ThreadFactoryBuilder; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * {@link FileSystem} implementation for storage systems that use the S3 protocol. - * - * @see S3FileSystemSchemeRegistrar - * @deprecated Module beam-sdks-java-io-amazon-web-services is deprecated and will be - * eventually removed. Please migrate to module beam-sdks-java-io-amazon-web-services2 - * . - */ -@Deprecated -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class S3FileSystem extends FileSystem { - - private static final Logger LOG = LoggerFactory.getLogger(S3FileSystem.class); - - // Amazon S3 API: You can create a copy of your object up to 5 GB in a single atomic operation - // Ref. https://docs.aws.amazon.com/AmazonS3/latest/dev/CopyingObjectsExamples.html - private static final long MAX_COPY_OBJECT_SIZE_BYTES = 5_368_709_120L; - - // S3 API, delete-objects: "You may specify up to 1000 keys." - private static final int MAX_DELETE_OBJECTS_PER_REQUEST = 1000; - - private static final ImmutableSet NON_READ_SEEK_EFFICIENT_ENCODINGS = - ImmutableSet.of("gzip"); - - // Non-final for testing. - private Supplier amazonS3; - private final S3FileSystemConfiguration config; - private final ListeningExecutorService executorService; - - S3FileSystem(S3FileSystemConfiguration config) { - this.config = checkNotNull(config, "config"); - // The Supplier is to make sure we don't call .build() unless we are actually using S3. - amazonS3 = Suppliers.memoize(config.getS3ClientBuilder()::build); - - checkNotNull(config.getS3StorageClass(), "storageClass"); - checkArgument(config.getS3ThreadPoolSize() > 0, "threadPoolSize"); - executorService = - MoreExecutors.listeningDecorator( - Executors.newFixedThreadPool( - config.getS3ThreadPoolSize(), new ThreadFactoryBuilder().setDaemon(true).build())); - - LOG.warn( - "You are using a deprecated file system for S3. Please migrate to module " - + "'org.apache.beam:beam-sdks-java-io-amazon-web-services2'."); - } - - S3FileSystem(S3Options options) { - this(S3FileSystemConfiguration.fromS3Options(options).build()); - } - - @Override - protected String getScheme() { - return config.getScheme(); - } - - @VisibleForTesting - void setAmazonS3Client(AmazonS3 amazonS3) { - this.amazonS3 = Suppliers.ofInstance(amazonS3); - } - - @VisibleForTesting - AmazonS3 getAmazonS3Client() { - return this.amazonS3.get(); - } - - @Override - protected List match(List specs) throws IOException { - List paths = - specs.stream().map(S3ResourceId::fromUri).collect(Collectors.toList()); - List globs = new ArrayList<>(); - List nonGlobs = new ArrayList<>(); - List isGlobBooleans = new ArrayList<>(); - - for (S3ResourceId path : paths) { - if (path.isWildcard()) { - globs.add(path); - isGlobBooleans.add(true); - } else { - nonGlobs.add(path); - isGlobBooleans.add(false); - } - } - - Iterator globMatches = matchGlobPaths(globs).iterator(); - Iterator nonGlobMatches = matchNonGlobPaths(nonGlobs).iterator(); - - ImmutableList.Builder matchResults = ImmutableList.builder(); - for (Boolean isGlob : isGlobBooleans) { - if (isGlob) { - checkState( - globMatches.hasNext(), - "Internal error encountered in S3Filesystem: expected more elements in globMatches."); - matchResults.add(globMatches.next()); - } else { - checkState( - nonGlobMatches.hasNext(), - "Internal error encountered in S3Filesystem: expected more elements in nonGlobMatches."); - matchResults.add(nonGlobMatches.next()); - } - } - checkState( - !globMatches.hasNext(), - "Internal error encountered in S3Filesystem: expected no more elements in globMatches."); - checkState( - !nonGlobMatches.hasNext(), - "Internal error encountered in S3Filesystem: expected no more elements in nonGlobMatches."); - - return matchResults.build(); - } - - /** Gets {@link MatchResult} representing all objects that match wildcard-containing paths. */ - @VisibleForTesting - List matchGlobPaths(Collection globPaths) throws IOException { - List> expandTasks = new ArrayList<>(globPaths.size()); - for (final S3ResourceId path : globPaths) { - expandTasks.add(() -> expandGlob(path)); - } - - Map expandedGlobByGlobPath = new HashMap<>(); - List> contentTypeTasks = new ArrayList<>(globPaths.size()); - for (ExpandedGlob expandedGlob : callTasks(expandTasks)) { - expandedGlobByGlobPath.put(expandedGlob.getGlobPath(), expandedGlob); - if (expandedGlob.getExpandedPaths() != null) { - for (final S3ResourceId path : expandedGlob.getExpandedPaths()) { - contentTypeTasks.add(() -> getPathContentEncoding(path)); - } - } - } - - Map exceptionByPath = new HashMap<>(); - for (PathWithEncoding pathWithException : callTasks(contentTypeTasks)) { - exceptionByPath.put(pathWithException.getPath(), pathWithException); - } - - List results = new ArrayList<>(globPaths.size()); - for (S3ResourceId globPath : globPaths) { - ExpandedGlob expandedGlob = expandedGlobByGlobPath.get(globPath); - - if (expandedGlob.getException() != null) { - results.add(MatchResult.create(MatchResult.Status.ERROR, expandedGlob.getException())); - - } else { - List metadatas = new ArrayList<>(); - IOException exception = null; - for (S3ResourceId expandedPath : expandedGlob.getExpandedPaths()) { - PathWithEncoding pathWithEncoding = exceptionByPath.get(expandedPath); - - if (pathWithEncoding.getException() != null) { - exception = pathWithEncoding.getException(); - break; - } else { - // TODO(https://github.com/apache/beam/issues/20755): Support file checksum in this - // method. - metadatas.add( - createBeamMetadata( - pathWithEncoding.getPath(), pathWithEncoding.getContentEncoding(), null)); - } - } - - if (exception != null) { - if (exception instanceof FileNotFoundException) { - results.add(MatchResult.create(MatchResult.Status.NOT_FOUND, exception)); - } else { - results.add(MatchResult.create(MatchResult.Status.ERROR, exception)); - } - } else { - results.add(MatchResult.create(MatchResult.Status.OK, metadatas)); - } - } - } - - return ImmutableList.copyOf(results); - } - - @AutoValue - abstract static class ExpandedGlob { - - abstract S3ResourceId getGlobPath(); - - abstract @Nullable List getExpandedPaths(); - - abstract @Nullable IOException getException(); - - static ExpandedGlob create(S3ResourceId globPath, List expandedPaths) { - checkNotNull(globPath, "globPath"); - checkNotNull(expandedPaths, "expandedPaths"); - return new AutoValue_S3FileSystem_ExpandedGlob(globPath, expandedPaths, null); - } - - static ExpandedGlob create(S3ResourceId globPath, IOException exception) { - checkNotNull(globPath, "globPath"); - checkNotNull(exception, "exception"); - return new AutoValue_S3FileSystem_ExpandedGlob(globPath, null, exception); - } - } - - @AutoValue - abstract static class PathWithEncoding { - - abstract S3ResourceId getPath(); - - abstract @Nullable String getContentEncoding(); - - abstract @Nullable IOException getException(); - - static PathWithEncoding create(S3ResourceId path, String contentEncoding) { - checkNotNull(path, "path"); - checkNotNull(contentEncoding, "contentEncoding"); - return new AutoValue_S3FileSystem_PathWithEncoding(path, contentEncoding, null); - } - - static PathWithEncoding create(S3ResourceId path, IOException exception) { - checkNotNull(path, "path"); - checkNotNull(exception, "exception"); - return new AutoValue_S3FileSystem_PathWithEncoding(path, null, exception); - } - } - - private ExpandedGlob expandGlob(S3ResourceId glob) { - // The S3 API can list objects, filtered by prefix, but not by wildcard. - // Here, we find the longest prefix without wildcard "*", - // then filter the results with a regex. - checkArgument(glob.isWildcard(), "isWildcard"); - String keyPrefix = glob.getKeyNonWildcardPrefix(); - Pattern wildcardRegexp = Pattern.compile(wildcardToRegexp(glob.getKey())); - - LOG.debug( - "expanding bucket {}, prefix {}, against pattern {}", - glob.getBucket(), - keyPrefix, - wildcardRegexp.toString()); - - ImmutableList.Builder expandedPaths = ImmutableList.builder(); - String continuationToken = null; - - do { - ListObjectsV2Request request = - new ListObjectsV2Request() - .withBucketName(glob.getBucket()) - .withPrefix(keyPrefix) - .withContinuationToken(continuationToken); - ListObjectsV2Result result; - try { - result = amazonS3.get().listObjectsV2(request); - } catch (AmazonClientException e) { - return ExpandedGlob.create(glob, new IOException(e)); - } - continuationToken = result.getNextContinuationToken(); - - for (S3ObjectSummary objectSummary : result.getObjectSummaries()) { - // Filter against regex. - if (wildcardRegexp.matcher(objectSummary.getKey()).matches()) { - S3ResourceId expandedPath = - S3ResourceId.fromComponents( - glob.getScheme(), objectSummary.getBucketName(), objectSummary.getKey()) - .withSize(objectSummary.getSize()) - .withLastModified(objectSummary.getLastModified()); - LOG.debug("Expanded S3 object path {}", expandedPath); - expandedPaths.add(expandedPath); - } - } - } while (continuationToken != null); - - return ExpandedGlob.create(glob, expandedPaths.build()); - } - - private PathWithEncoding getPathContentEncoding(S3ResourceId path) { - ObjectMetadata s3Metadata; - try { - s3Metadata = getObjectMetadata(path); - } catch (AmazonClientException e) { - if (e instanceof AmazonS3Exception && ((AmazonS3Exception) e).getStatusCode() == 404) { - return PathWithEncoding.create(path, new FileNotFoundException()); - } - return PathWithEncoding.create(path, new IOException(e)); - } - return PathWithEncoding.create(path, Strings.nullToEmpty(s3Metadata.getContentEncoding())); - } - - private List matchNonGlobPaths(Collection paths) throws IOException { - List> tasks = new ArrayList<>(paths.size()); - for (final S3ResourceId path : paths) { - tasks.add(() -> matchNonGlobPath(path)); - } - - return callTasks(tasks); - } - - private ObjectMetadata getObjectMetadata(S3ResourceId s3ResourceId) throws AmazonClientException { - GetObjectMetadataRequest request = - new GetObjectMetadataRequest(s3ResourceId.getBucket(), s3ResourceId.getKey()); - request.setSSECustomerKey(config.getSSECustomerKey()); - return amazonS3.get().getObjectMetadata(request); - } - - @VisibleForTesting - MatchResult matchNonGlobPath(S3ResourceId path) { - ObjectMetadata s3Metadata; - try { - s3Metadata = getObjectMetadata(path); - } catch (AmazonClientException e) { - if (e instanceof AmazonS3Exception && ((AmazonS3Exception) e).getStatusCode() == 404) { - return MatchResult.create(MatchResult.Status.NOT_FOUND, new FileNotFoundException()); - } - return MatchResult.create(MatchResult.Status.ERROR, new IOException(e)); - } - - return MatchResult.create( - MatchResult.Status.OK, - ImmutableList.of( - createBeamMetadata( - path.withSize(s3Metadata.getContentLength()) - .withLastModified(s3Metadata.getLastModified()), - Strings.nullToEmpty(s3Metadata.getContentEncoding()), - s3Metadata.getETag()))); - } - - private static MatchResult.Metadata createBeamMetadata( - S3ResourceId path, String contentEncoding, String eTag) { - checkArgument(path.getSize().isPresent(), "The resource id should have a size."); - checkNotNull(contentEncoding, "contentEncoding"); - boolean isReadSeekEfficient = !NON_READ_SEEK_EFFICIENT_ENCODINGS.contains(contentEncoding); - - MatchResult.Metadata.Builder ret = - MatchResult.Metadata.builder() - .setIsReadSeekEfficient(isReadSeekEfficient) - .setResourceId(path) - .setSizeBytes(path.getSize().get()) - .setLastModifiedMillis(path.getLastModified().transform(Date::getTime).or(0L)); - if (eTag != null) { - ret.setChecksum(eTag); - } - return ret.build(); - } - - @Override - protected WritableByteChannel create(S3ResourceId resourceId, CreateOptions createOptions) - throws IOException { - return new S3WritableByteChannel(amazonS3.get(), resourceId, createOptions.mimeType(), config); - } - - @Override - protected ReadableByteChannel open(S3ResourceId resourceId) throws IOException { - return new S3ReadableSeekableByteChannel(amazonS3.get(), resourceId, config); - } - - @Override - protected void copy(List sourcePaths, List destinationPaths) - throws IOException { - checkArgument( - sourcePaths.size() == destinationPaths.size(), - "sizes of sourcePaths and destinationPaths do not match"); - - List> tasks = new ArrayList<>(sourcePaths.size()); - - Iterator sourcePathsIterator = sourcePaths.iterator(); - Iterator destinationPathsIterator = destinationPaths.iterator(); - while (sourcePathsIterator.hasNext()) { - final S3ResourceId sourcePath = sourcePathsIterator.next(); - final S3ResourceId destinationPath = destinationPathsIterator.next(); - - tasks.add( - () -> { - copy(sourcePath, destinationPath); - return null; - }); - } - - callTasks(tasks); - } - - @VisibleForTesting - void copy(S3ResourceId sourcePath, S3ResourceId destinationPath) throws IOException { - try { - ObjectMetadata sourceObjectMetadata = getObjectMetadata(sourcePath); - if (sourceObjectMetadata.getContentLength() < MAX_COPY_OBJECT_SIZE_BYTES) { - atomicCopy(sourcePath, destinationPath, sourceObjectMetadata); - } else { - multipartCopy(sourcePath, destinationPath, sourceObjectMetadata); - } - } catch (AmazonClientException e) { - throw new IOException(e); - } - } - - @VisibleForTesting - CopyObjectResult atomicCopy( - S3ResourceId sourcePath, S3ResourceId destinationPath, ObjectMetadata sourceObjectMetadata) - throws AmazonClientException { - CopyObjectRequest copyObjectRequest = - new CopyObjectRequest( - sourcePath.getBucket(), - sourcePath.getKey(), - destinationPath.getBucket(), - destinationPath.getKey()); - copyObjectRequest.setNewObjectMetadata(sourceObjectMetadata); - copyObjectRequest.setStorageClass(config.getS3StorageClass()); - copyObjectRequest.setSourceSSECustomerKey(config.getSSECustomerKey()); - copyObjectRequest.setDestinationSSECustomerKey(config.getSSECustomerKey()); - return amazonS3.get().copyObject(copyObjectRequest); - } - - @VisibleForTesting - CompleteMultipartUploadResult multipartCopy( - S3ResourceId sourcePath, S3ResourceId destinationPath, ObjectMetadata sourceObjectMetadata) - throws AmazonClientException { - InitiateMultipartUploadRequest initiateUploadRequest = - new InitiateMultipartUploadRequest(destinationPath.getBucket(), destinationPath.getKey()) - .withStorageClass(config.getS3StorageClass()) - .withObjectMetadata(sourceObjectMetadata) - .withSSECustomerKey(config.getSSECustomerKey()); - - InitiateMultipartUploadResult initiateUploadResult = - amazonS3.get().initiateMultipartUpload(initiateUploadRequest); - final String uploadId = initiateUploadResult.getUploadId(); - - List eTags = new ArrayList<>(); - - final long objectSize = sourceObjectMetadata.getContentLength(); - // extra validation in case a caller calls directly S3FileSystem.multipartCopy - // without using S3FileSystem.copy in the future - if (objectSize == 0) { - final CopyPartRequest copyPartRequest = - new CopyPartRequest() - .withSourceBucketName(sourcePath.getBucket()) - .withSourceKey(sourcePath.getKey()) - .withDestinationBucketName(destinationPath.getBucket()) - .withDestinationKey(destinationPath.getKey()) - .withUploadId(uploadId) - .withPartNumber(1); - copyPartRequest.setSourceSSECustomerKey(config.getSSECustomerKey()); - copyPartRequest.setDestinationSSECustomerKey(config.getSSECustomerKey()); - - CopyPartResult copyPartResult = amazonS3.get().copyPart(copyPartRequest); - eTags.add(copyPartResult.getPartETag()); - } else { - long bytePosition = 0; - // Amazon parts are 1-indexed, not zero-indexed. - for (int partNumber = 1; bytePosition < objectSize; partNumber++) { - final CopyPartRequest copyPartRequest = - new CopyPartRequest() - .withSourceBucketName(sourcePath.getBucket()) - .withSourceKey(sourcePath.getKey()) - .withDestinationBucketName(destinationPath.getBucket()) - .withDestinationKey(destinationPath.getKey()) - .withUploadId(uploadId) - .withPartNumber(partNumber) - .withFirstByte(bytePosition) - .withLastByte( - Math.min(objectSize - 1, bytePosition + MAX_COPY_OBJECT_SIZE_BYTES - 1)); - copyPartRequest.setSourceSSECustomerKey(config.getSSECustomerKey()); - copyPartRequest.setDestinationSSECustomerKey(config.getSSECustomerKey()); - - CopyPartResult copyPartResult = amazonS3.get().copyPart(copyPartRequest); - eTags.add(copyPartResult.getPartETag()); - - bytePosition += MAX_COPY_OBJECT_SIZE_BYTES; - } - } - - CompleteMultipartUploadRequest completeUploadRequest = - new CompleteMultipartUploadRequest() - .withBucketName(destinationPath.getBucket()) - .withKey(destinationPath.getKey()) - .withUploadId(uploadId) - .withPartETags(eTags); - return amazonS3.get().completeMultipartUpload(completeUploadRequest); - } - - @Override - protected void rename( - List sourceResourceIds, - List destinationResourceIds, - MoveOptions... moveOptions) - throws IOException { - if (moveOptions.length > 0) { - throw new UnsupportedOperationException("Support for move options is not yet implemented."); - } - copy(sourceResourceIds, destinationResourceIds); - delete(sourceResourceIds); - } - - @Override - protected void delete(Collection resourceIds) throws IOException { - List nonDirectoryPaths = - resourceIds.stream() - .filter(s3ResourceId -> !s3ResourceId.isDirectory()) - .collect(Collectors.toList()); - Multimap keysByBucket = ArrayListMultimap.create(); - for (S3ResourceId path : nonDirectoryPaths) { - keysByBucket.put(path.getBucket(), path.getKey()); - } - - List> tasks = new ArrayList<>(); - for (final String bucket : keysByBucket.keySet()) { - for (final List keysPartition : - Iterables.partition(keysByBucket.get(bucket), MAX_DELETE_OBJECTS_PER_REQUEST)) { - tasks.add( - () -> { - delete(bucket, keysPartition); - return null; - }); - } - } - - callTasks(tasks); - } - - private void delete(String bucket, Collection keys) throws IOException { - checkArgument( - keys.size() <= MAX_DELETE_OBJECTS_PER_REQUEST, - "only %s keys can be deleted per request, but got %s", - MAX_DELETE_OBJECTS_PER_REQUEST, - keys.size()); - List deleteKeyVersions = - keys.stream().map(KeyVersion::new).collect(Collectors.toList()); - DeleteObjectsRequest request = - new DeleteObjectsRequest(bucket).withKeys(deleteKeyVersions).withQuiet(true); - try { - amazonS3.get().deleteObjects(request); - } catch (AmazonClientException e) { - throw new IOException(e); - } - } - - @Override - protected S3ResourceId matchNewResource(String singleResourceSpec, boolean isDirectory) { - if (isDirectory) { - if (!singleResourceSpec.endsWith("/")) { - singleResourceSpec += "/"; - } - } else { - checkArgument( - !singleResourceSpec.endsWith("/"), - "Expected a file path, but [%s] ends with '/'. This is unsupported in S3FileSystem.", - singleResourceSpec); - } - return S3ResourceId.fromUri(singleResourceSpec); - } - - @Override - protected void reportLineage(S3ResourceId resourceId, Lineage lineage) { - reportLineage(resourceId, lineage, LineageLevel.FILE); - } - - @Override - protected void reportLineage(S3ResourceId resourceId, Lineage lineage, LineageLevel level) { - ImmutableList.Builder segments = - ImmutableList.builder().add(resourceId.getBucket()); - if (level != LineageLevel.TOP_LEVEL && !resourceId.getKey().isEmpty()) { - segments.add(resourceId.getKey()); - } - lineage.add("s3", segments.build()); - } - - /** - * Invokes tasks in a thread pool, then unwraps the resulting {@link Future Futures}. - * - *

Any task exception is wrapped in {@link IOException}. - */ - private List callTasks(Collection> tasks) throws IOException { - - try { - List> futures = new ArrayList<>(tasks.size()); - for (Callable task : tasks) { - futures.add(MoreFutures.supplyAsync(task::call, executorService)); - } - return MoreFutures.get(MoreFutures.allAsList(futures)); - - } catch (ExecutionException e) { - if (e.getCause() != null) { - if (e.getCause() instanceof IOException) { - throw (IOException) e.getCause(); - } - throw new IOException(e.getCause()); - } - throw new IOException(e); - - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new IOException("executor service was interrupted"); - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemConfiguration.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemConfiguration.java deleted file mode 100644 index 248f99aa0651..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemConfiguration.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams; -import com.amazonaws.services.s3.model.SSECustomerKey; -import com.google.auto.value.AutoValue; -import javax.annotation.Nullable; -import org.apache.beam.sdk.io.aws.options.S3ClientBuilderFactory; -import org.apache.beam.sdk.io.aws.options.S3Options; -import org.apache.beam.sdk.util.InstanceBuilder; - -/** - * Object used to configure {@link S3FileSystem}. - * - * @see S3Options - * @see S3FileSystemSchemeRegistrar - */ -@AutoValue -public abstract class S3FileSystemConfiguration { - public static final int MINIMUM_UPLOAD_BUFFER_SIZE_BYTES = - S3Options.S3UploadBufferSizeBytesFactory.MINIMUM_UPLOAD_BUFFER_SIZE_BYTES; - - /** The uri scheme used by resources on this filesystem. */ - public abstract String getScheme(); - - /** The AWS S3 storage class used for creating S3 objects. */ - public abstract String getS3StorageClass(); - - /** Size of S3 upload chunks. */ - public abstract int getS3UploadBufferSizeBytes(); - - /** Thread pool size, limiting the max concurrent S3 operations. */ - public abstract int getS3ThreadPoolSize(); - - /** Algorithm for SSE-S3 encryption, e.g. AES256. */ - public abstract @Nullable String getSSEAlgorithm(); - - /** SSE key for SSE-C encryption, e.g. a base64 encoded key and the algorithm. */ - public abstract @Nullable SSECustomerKey getSSECustomerKey(); - - /** KMS key id for SSE-KMS encryption, e.g. "arn:aws:kms:...". */ - public abstract @Nullable SSEAwsKeyManagementParams getSSEAwsKeyManagementParams(); - - /** - * Whether to ose an S3 Bucket Key for object encryption with server-side encryption using AWS KMS - * (SSE-KMS) or not. - */ - public abstract boolean getBucketKeyEnabled(); - - /** Builder used to create the {@code AmazonS3Client}. */ - public abstract AmazonS3ClientBuilder getS3ClientBuilder(); - - /** Creates a new uninitialized {@link Builder}. */ - public static Builder builder() { - return new AutoValue_S3FileSystemConfiguration.Builder(); - } - - /** Creates a new {@link Builder} with values initialized by this instance's properties. */ - public abstract Builder toBuilder(); - - /** - * Creates a new {@link Builder} with values initialized by the properties of {@code s3Options}. - */ - public static Builder fromS3Options(S3Options s3Options) { - return builder() - .setScheme("s3") - .setS3StorageClass(s3Options.getS3StorageClass()) - .setS3UploadBufferSizeBytes(s3Options.getS3UploadBufferSizeBytes()) - .setS3ThreadPoolSize(s3Options.getS3ThreadPoolSize()) - .setSSEAlgorithm(s3Options.getSSEAlgorithm()) - .setSSECustomerKey(s3Options.getSSECustomerKey()) - .setSSEAwsKeyManagementParams(s3Options.getSSEAwsKeyManagementParams()) - .setBucketKeyEnabled(s3Options.getBucketKeyEnabled()) - .setS3ClientBuilder(getBuilder(s3Options)); - } - - /** Creates a new {@link AmazonS3ClientBuilder} as specified by {@code s3Options}. */ - public static AmazonS3ClientBuilder getBuilder(S3Options s3Options) { - return InstanceBuilder.ofType(S3ClientBuilderFactory.class) - .fromClass(s3Options.getS3ClientFactoryClass()) - .build() - .createBuilder(s3Options); - } - - @AutoValue.Builder - public abstract static class Builder { - public abstract Builder setScheme(String value); - - public abstract Builder setS3StorageClass(String value); - - public abstract Builder setS3UploadBufferSizeBytes(int value); - - public abstract Builder setS3ThreadPoolSize(int value); - - public abstract Builder setSSEAlgorithm(@Nullable String value); - - public abstract Builder setSSECustomerKey(@Nullable SSECustomerKey value); - - public abstract Builder setSSEAwsKeyManagementParams(@Nullable SSEAwsKeyManagementParams value); - - public abstract Builder setBucketKeyEnabled(boolean value); - - public abstract Builder setS3ClientBuilder(AmazonS3ClientBuilder value); - - public abstract S3FileSystemConfiguration build(); - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemRegistrar.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemRegistrar.java deleted file mode 100644 index af153de42622..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemRegistrar.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.google.auto.service.AutoService; -import java.util.Map; -import java.util.ServiceLoader; -import java.util.stream.Collectors; -import javax.annotation.Nonnull; -import org.apache.beam.sdk.io.FileSystem; -import org.apache.beam.sdk.io.FileSystemRegistrar; -import org.apache.beam.sdk.options.PipelineOptions; -import org.apache.beam.sdk.util.common.ReflectHelpers; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Streams; - -/** - * {@link AutoService} registrar for the {@link S3FileSystem}. - * - *

Creates instances of {@link S3FileSystem} for each scheme registered with a {@link - * S3FileSystemSchemeRegistrar}. - */ -@AutoService(FileSystemRegistrar.class) -public class S3FileSystemRegistrar implements FileSystemRegistrar { - - @Override - public Iterable> fromOptions(@Nonnull PipelineOptions options) { - checkNotNull(options, "Expect the runner have called FileSystems.setDefaultPipelineOptions()."); - Map> fileSystems = - Streams.stream( - ServiceLoader.load( - S3FileSystemSchemeRegistrar.class, ReflectHelpers.findClassLoader())) - .flatMap(r -> Streams.stream(r.fromOptions(options))) - .map(S3FileSystem::new) - // Throws IllegalStateException if any duplicate schemes exist. - .collect(Collectors.toMap(S3FileSystem::getScheme, f -> (FileSystem) f)); - return fileSystems.values(); - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemSchemeRegistrar.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemSchemeRegistrar.java deleted file mode 100644 index 191b6f2cd244..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemSchemeRegistrar.java +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import com.google.auto.service.AutoService; -import java.util.ServiceLoader; -import javax.annotation.Nonnull; -import org.apache.beam.sdk.io.FileSystem; -import org.apache.beam.sdk.io.FileSystemRegistrar; -import org.apache.beam.sdk.options.PipelineOptions; - -/** - * A registrar that creates {@link S3FileSystemConfiguration} instances from {@link - * PipelineOptions}. - * - *

Users of storage systems that use the S3 protocol have the ability to register a URI scheme by - * creating a {@link ServiceLoader} entry and a concrete implementation of this interface. - * - *

It is optional but recommended to use one of the many build time tools such as {@link - * AutoService} to generate the necessary META-INF files automatically. - */ -public interface S3FileSystemSchemeRegistrar { - /** - * Create zero or more {@link S3FileSystemConfiguration} instances from the given {@link - * PipelineOptions}. - * - *

Each {@link S3FileSystemConfiguration#getScheme() scheme} is required to be unique among all - * schemes registered by all {@link S3FileSystemSchemeRegistrar}s, as well as among all {@link - * FileSystem}s registered by all {@link FileSystemRegistrar}s. - */ - Iterable fromOptions(@Nonnull PipelineOptions options); -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3ReadableSeekableByteChannel.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3ReadableSeekableByteChannel.java deleted file mode 100644 index bef1fc340888..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3ReadableSeekableByteChannel.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import static com.amazonaws.util.IOUtils.drainInputStream; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.GetObjectRequest; -import com.amazonaws.services.s3.model.S3Object; -import com.amazonaws.services.s3.model.S3ObjectInputStream; -import java.io.BufferedInputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.Channels; -import java.nio.channels.ClosedChannelException; -import java.nio.channels.NonWritableChannelException; -import java.nio.channels.ReadableByteChannel; -import java.nio.channels.SeekableByteChannel; - -/** A readable S3 object, as a {@link SeekableByteChannel}. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class S3ReadableSeekableByteChannel implements SeekableByteChannel { - - private final AmazonS3 amazonS3; - private final S3ResourceId path; - private final long contentLength; - private long position = 0; - private boolean open = true; - private S3Object s3Object; - private final S3FileSystemConfiguration config; - private ReadableByteChannel s3ObjectContentChannel; - - S3ReadableSeekableByteChannel( - AmazonS3 amazonS3, S3ResourceId path, S3FileSystemConfiguration config) throws IOException { - this.amazonS3 = checkNotNull(amazonS3, "amazonS3"); - checkNotNull(path, "path"); - this.config = checkNotNull(config, "config"); - - if (path.getSize().isPresent()) { - contentLength = path.getSize().get(); - this.path = path; - - } else { - try { - contentLength = - amazonS3.getObjectMetadata(path.getBucket(), path.getKey()).getContentLength(); - } catch (AmazonClientException e) { - throw new IOException(e); - } - this.path = path.withSize(contentLength); - } - } - - @Override - public int read(ByteBuffer destinationBuffer) throws IOException { - if (!isOpen()) { - throw new ClosedChannelException(); - } - if (!destinationBuffer.hasRemaining()) { - return 0; - } - if (position == contentLength) { - return -1; - } - - if (s3Object == null) { - GetObjectRequest request = new GetObjectRequest(path.getBucket(), path.getKey()); - request.setSSECustomerKey(config.getSSECustomerKey()); - if (position > 0) { - request.setRange(position, contentLength); - } - try { - s3Object = amazonS3.getObject(request); - } catch (AmazonClientException e) { - throw new IOException(e); - } - s3ObjectContentChannel = - Channels.newChannel(new BufferedInputStream(s3Object.getObjectContent(), 1024 * 1024)); - } - - int totalBytesRead = 0; - int bytesRead = 0; - - do { - totalBytesRead += bytesRead; - try { - bytesRead = s3ObjectContentChannel.read(destinationBuffer); - } catch (AmazonClientException e) { - // TODO replace all catch AmazonServiceException with client exception - throw new IOException(e); - } - } while (bytesRead > 0); - - position += totalBytesRead; - return totalBytesRead; - } - - @Override - public long position() throws ClosedChannelException { - if (!isOpen()) { - throw new ClosedChannelException(); - } - return position; - } - - @Override - public SeekableByteChannel position(long newPosition) throws IOException { - if (!isOpen()) { - throw new ClosedChannelException(); - } - checkArgument(newPosition >= 0, "newPosition too low"); - checkArgument(newPosition < contentLength, "new position too high"); - - if (newPosition == position) { - return this; - } - - // The position has changed, so close and destroy the object to induce a re-creation on the next - // call to read() - if (s3Object != null) { - s3Object.close(); - s3Object = null; - } - position = newPosition; - return this; - } - - @Override - public long size() throws ClosedChannelException { - if (!isOpen()) { - throw new ClosedChannelException(); - } - return contentLength; - } - - @Override - public void close() throws IOException { - if (s3Object != null) { - S3ObjectInputStream s3ObjectInputStream = s3Object.getObjectContent(); - drainInputStream(s3ObjectInputStream); - s3Object.close(); - } - open = false; - } - - @Override - public boolean isOpen() { - return open; - } - - @Override - public int write(ByteBuffer src) { - throw new NonWritableChannelException(); - } - - @Override - public SeekableByteChannel truncate(long size) { - throw new NonWritableChannelException(); - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3ResourceId.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3ResourceId.java deleted file mode 100644 index 2751f98d7f6e..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3ResourceId.java +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; - -import java.io.ObjectStreamException; -import java.util.Date; -import java.util.Objects; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import org.apache.beam.sdk.io.fs.ResolveOptions; -import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions; -import org.apache.beam.sdk.io.fs.ResourceId; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Optional; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Strings; -import org.checkerframework.checker.nullness.qual.Nullable; - -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class S3ResourceId implements ResourceId { - - private static final long serialVersionUID = -8218379666994031337L; - - static final String DEFAULT_SCHEME = "s3"; - - private static final Pattern S3_URI = - Pattern.compile("(?[^:]+)://(?[^/]+)(/(?.*))?"); - - /** Matches a glob containing a wildcard, capturing the portion before the first wildcard. */ - private static final Pattern GLOB_PREFIX = Pattern.compile("(?[^\\[*?]*)[\\[*?].*"); - - private final String bucket; - private final String key; - private final Long size; - private final Date lastModified; - private final String scheme; - - private S3ResourceId( - String scheme, String bucket, String key, @Nullable Long size, @Nullable Date lastModified) { - checkArgument(!Strings.isNullOrEmpty(scheme), "scheme"); - checkArgument(!Strings.isNullOrEmpty(bucket), "bucket"); - checkArgument(!bucket.contains("/"), "bucket must not contain '/': [%s]", bucket); - this.scheme = scheme; - this.bucket = bucket; - this.key = checkNotNull(key, "key"); - this.size = size; - this.lastModified = lastModified; - } - - private Object readResolve() throws ObjectStreamException { - if (scheme == null) { - return new S3ResourceId(DEFAULT_SCHEME, bucket, key, size, lastModified); - } - return this; - } - - static S3ResourceId fromComponents(String scheme, String bucket, String key) { - if (!key.startsWith("/")) { - key = "/" + key; - } - return new S3ResourceId(scheme, bucket, key, null, null); - } - - static S3ResourceId fromUri(String uri) { - Matcher m = S3_URI.matcher(uri); - checkArgument(m.matches(), "Invalid S3 URI: [%s]", uri); - String scheme = m.group("SCHEME"); - String bucket = m.group("BUCKET"); - String key = Strings.nullToEmpty(m.group("KEY")); - if (!key.startsWith("/")) { - key = "/" + key; - } - return fromComponents(scheme, bucket, key); - } - - String getBucket() { - return bucket; - } - - String getKey() { - // Skip leading slash - return key.substring(1); - } - - Optional getSize() { - return Optional.fromNullable(size); - } - - S3ResourceId withSize(long size) { - return new S3ResourceId(scheme, bucket, key, size, lastModified); - } - - Optional getLastModified() { - return Optional.fromNullable(lastModified); - } - - S3ResourceId withLastModified(Date lastModified) { - return new S3ResourceId(scheme, bucket, key, size, lastModified); - } - - @Override - public ResourceId resolve(String other, ResolveOptions resolveOptions) { - checkState(isDirectory(), "Expected this resource to be a directory, but was [%s]", toString()); - - if (resolveOptions == StandardResolveOptions.RESOLVE_DIRECTORY) { - if ("..".equals(other)) { - if ("/".equals(key)) { - return this; - } - int parentStopsAt = key.substring(0, key.length() - 1).lastIndexOf('/'); - return fromComponents(scheme, bucket, key.substring(0, parentStopsAt + 1)); - } - - if ("".equals(other)) { - return this; - } - - if (!other.endsWith("/")) { - other += "/"; - } - if (S3_URI.matcher(other).matches()) { - return resolveFromUri(other); - } - return fromComponents(scheme, bucket, key + other); - } - - if (resolveOptions == StandardResolveOptions.RESOLVE_FILE) { - checkArgument( - !other.endsWith("/"), "Cannot resolve a file with a directory path: [%s]", other); - checkArgument(!"..".equals(other), "Cannot resolve parent as file: [%s]", other); - if (S3_URI.matcher(other).matches()) { - return resolveFromUri(other); - } - return fromComponents(scheme, bucket, key + other); - } - - throw new UnsupportedOperationException( - String.format("Unexpected StandardResolveOptions [%s]", resolveOptions)); - } - - private S3ResourceId resolveFromUri(String uri) { - S3ResourceId id = fromUri(uri); - checkArgument( - id.getScheme().equals(scheme), - "Cannot resolve a URI as a child resource unless its scheme is [%s]; instead it was [%s]", - scheme, - id.getScheme()); - return id; - } - - @Override - public ResourceId getCurrentDirectory() { - if (isDirectory()) { - return this; - } - return fromComponents(scheme, getBucket(), key.substring(0, key.lastIndexOf('/') + 1)); - } - - @Override - public String getScheme() { - return scheme; - } - - @Override - public @Nullable String getFilename() { - if (!isDirectory()) { - return key.substring(key.lastIndexOf('/') + 1); - } - if ("/".equals(key)) { - return null; - } - String keyWithoutTrailingSlash = key.substring(0, key.length() - 1); - return keyWithoutTrailingSlash.substring(keyWithoutTrailingSlash.lastIndexOf('/') + 1); - } - - @Override - public boolean isDirectory() { - return key.endsWith("/"); - } - - boolean isWildcard() { - return GLOB_PREFIX.matcher(getKey()).matches(); - } - - String getKeyNonWildcardPrefix() { - Matcher m = GLOB_PREFIX.matcher(getKey()); - checkArgument(m.matches(), String.format("Glob expression: [%s] is not expandable.", getKey())); - return m.group("PREFIX"); - } - - @Override - public String toString() { - return String.format("%s://%s%s", scheme, bucket, key); - } - - @Override - public boolean equals(@Nullable Object obj) { - if (!(obj instanceof S3ResourceId)) { - return false; - } - S3ResourceId o = (S3ResourceId) obj; - - return scheme.equals(o.scheme) && bucket.equals(o.bucket) && key.equals(o.key); - } - - @Override - public int hashCode() { - return Objects.hash(scheme, bucket, key); - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3WritableByteChannel.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3WritableByteChannel.java deleted file mode 100644 index 3594ca5b0aaa..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/S3WritableByteChannel.java +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.amazonaws.AmazonClientException; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.PartETag; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; -import com.amazonaws.util.Base64; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.ClosedChannelException; -import java.nio.channels.WritableByteChannel; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.util.ArrayList; -import java.util.List; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; - -/** A writable S3 object, as a {@link WritableByteChannel}. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class S3WritableByteChannel implements WritableByteChannel { - private final AmazonS3 amazonS3; - private final S3FileSystemConfiguration config; - private final S3ResourceId path; - - private final String uploadId; - private final ByteBuffer uploadBuffer; - private final List eTags; - - // AWS S3 parts are 1-indexed, not zero-indexed. - private int partNumber = 1; - private boolean open = true; - private final MessageDigest md5 = md5(); - - S3WritableByteChannel( - AmazonS3 amazonS3, S3ResourceId path, String contentType, S3FileSystemConfiguration config) - throws IOException { - this.amazonS3 = checkNotNull(amazonS3, "amazonS3"); - this.config = checkNotNull(config); - this.path = checkNotNull(path, "path"); - checkArgument( - atMostOne( - config.getSSECustomerKey() != null, - config.getSSEAlgorithm() != null, - config.getSSEAwsKeyManagementParams() != null), - "Either SSECustomerKey (SSE-C) or SSEAlgorithm (SSE-S3)" - + " or SSEAwsKeyManagementParams (SSE-KMS) must not be set at the same time."); - // Amazon S3 API docs: Each part must be at least 5 MB in size, except the last part. - checkArgument( - config.getS3UploadBufferSizeBytes() - >= S3FileSystemConfiguration.MINIMUM_UPLOAD_BUFFER_SIZE_BYTES, - "S3UploadBufferSizeBytes must be at least %s bytes", - S3FileSystemConfiguration.MINIMUM_UPLOAD_BUFFER_SIZE_BYTES); - this.uploadBuffer = ByteBuffer.allocate(config.getS3UploadBufferSizeBytes()); - eTags = new ArrayList<>(); - - ObjectMetadata objectMetadata = new ObjectMetadata(); - objectMetadata.setContentType(contentType); - if (config.getSSEAlgorithm() != null) { - objectMetadata.setSSEAlgorithm(config.getSSEAlgorithm()); - } - InitiateMultipartUploadRequest request = - new InitiateMultipartUploadRequest(path.getBucket(), path.getKey()) - .withStorageClass(config.getS3StorageClass()) - .withObjectMetadata(objectMetadata); - request.setSSECustomerKey(config.getSSECustomerKey()); - request.setSSEAwsKeyManagementParams(config.getSSEAwsKeyManagementParams()); - request.setBucketKeyEnabled(config.getBucketKeyEnabled()); - InitiateMultipartUploadResult result; - try { - result = amazonS3.initiateMultipartUpload(request); - } catch (AmazonClientException e) { - throw new IOException(e); - } - uploadId = result.getUploadId(); - } - - private static MessageDigest md5() { - try { - return MessageDigest.getInstance("MD5"); - } catch (NoSuchAlgorithmException e) { - throw new IllegalStateException(e); - } - } - - @Override - public int write(ByteBuffer sourceBuffer) throws IOException { - if (!isOpen()) { - throw new ClosedChannelException(); - } - - int totalBytesWritten = 0; - while (sourceBuffer.hasRemaining()) { - int position = sourceBuffer.position(); - int bytesWritten = Math.min(sourceBuffer.remaining(), uploadBuffer.remaining()); - totalBytesWritten += bytesWritten; - - if (sourceBuffer.hasArray()) { - // If the underlying array is accessible, direct access is the most efficient approach. - int start = sourceBuffer.arrayOffset() + position; - uploadBuffer.put(sourceBuffer.array(), start, bytesWritten); - md5.update(sourceBuffer.array(), start, bytesWritten); - } else { - // Otherwise, use a readonly copy with an appropriate mark to read the current range of the - // buffer twice. - ByteBuffer copyBuffer = sourceBuffer.asReadOnlyBuffer(); - copyBuffer.mark().limit(position + bytesWritten); - uploadBuffer.put(copyBuffer); - copyBuffer.reset(); - md5.update(copyBuffer); - } - sourceBuffer.position(position + bytesWritten); // move position forward by the bytes written - - if (!uploadBuffer.hasRemaining() || sourceBuffer.hasRemaining()) { - flush(); - } - } - - return totalBytesWritten; - } - - private void flush() throws IOException { - uploadBuffer.flip(); - ByteArrayInputStream inputStream = - new ByteArrayInputStream(uploadBuffer.array(), 0, uploadBuffer.limit()); - - UploadPartRequest request = - new UploadPartRequest() - .withBucketName(path.getBucket()) - .withKey(path.getKey()) - .withUploadId(uploadId) - .withPartNumber(partNumber++) - .withPartSize(uploadBuffer.limit()) - .withMD5Digest(Base64.encodeAsString(md5.digest())) - .withInputStream(inputStream); - request.setSSECustomerKey(config.getSSECustomerKey()); - - UploadPartResult result; - try { - result = amazonS3.uploadPart(request); - } catch (AmazonClientException e) { - throw new IOException(e); - } - uploadBuffer.clear(); - md5.reset(); - eTags.add(result.getPartETag()); - } - - @Override - public boolean isOpen() { - return open; - } - - @Override - public void close() throws IOException { - open = false; - if (uploadBuffer.remaining() > 0) { - flush(); - } - CompleteMultipartUploadRequest request = - new CompleteMultipartUploadRequest() - .withBucketName(path.getBucket()) - .withKey(path.getKey()) - .withUploadId(uploadId) - .withPartETags(eTags); - try { - amazonS3.completeMultipartUpload(request); - } catch (AmazonClientException e) { - throw new IOException(e); - } - } - - @VisibleForTesting - static boolean atMostOne(boolean... values) { - boolean one = false; - for (boolean value : values) { - if (!one && value) { - one = true; - } else if (value) { - return false; - } - } - return true; - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/package-info.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/package-info.java deleted file mode 100644 index ebbf1d8db5a5..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/s3/package-info.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** Defines IO connectors for Amazon Web Services S3. */ -package org.apache.beam.sdk.io.aws.s3; diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/AwsClientsProvider.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/AwsClientsProvider.java deleted file mode 100644 index 6a90c0285f20..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/AwsClientsProvider.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sns; - -import com.amazonaws.services.cloudwatch.AmazonCloudWatch; -import com.amazonaws.services.sns.AmazonSNS; -import java.io.Serializable; - -/** - * Provides instances of AWS clients. - * - *

Please note, that any instance of {@link AwsClientsProvider} must be {@link Serializable} to - * ensure it can be sent to worker machines. - */ -public interface AwsClientsProvider extends Serializable { - - /** @deprecated SnsIO doesn't require a CloudWatch client */ - @Deprecated - @SuppressWarnings("return") - default AmazonCloudWatch getCloudWatchClient() { - return null; - } - - AmazonSNS createSnsPublisher(); -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/BasicSnsProvider.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/BasicSnsProvider.java deleted file mode 100644 index aba3a74ccb2a..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/BasicSnsProvider.java +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sns; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.regions.Regions; -import com.amazonaws.services.sns.AmazonSNS; -import com.amazonaws.services.sns.AmazonSNSClientBuilder; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** Basic implementation of {@link AwsClientsProvider} used by default in {@link SnsIO}. */ -class BasicSnsProvider implements AwsClientsProvider { - - private final String accessKey; - private final String secretKey; - private final Regions region; - private final @Nullable String serviceEndpoint; - - BasicSnsProvider( - String accessKey, String secretKey, Regions region, @Nullable String serviceEndpoint) { - checkArgument(accessKey != null, "accessKey can not be null"); - checkArgument(secretKey != null, "secretKey can not be null"); - checkArgument(region != null, "region can not be null"); - this.accessKey = accessKey; - this.secretKey = secretKey; - this.region = region; - this.serviceEndpoint = serviceEndpoint; - } - - private AWSCredentialsProvider getCredentialsProvider() { - return new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKey, secretKey)); - } - - @Override - public AmazonSNS createSnsPublisher() { - AmazonSNSClientBuilder clientBuilder = - AmazonSNSClientBuilder.standard().withCredentials(getCredentialsProvider()); - if (serviceEndpoint == null) { - clientBuilder.withRegion(region); - } else { - clientBuilder.withEndpointConfiguration( - new AwsClientBuilder.EndpointConfiguration(serviceEndpoint, region.getName())); - } - return clientBuilder.build(); - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/PublishResultCoders.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/PublishResultCoders.java deleted file mode 100644 index 6d546204d617..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/PublishResultCoders.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sns; - -import com.amazonaws.ResponseMetadata; -import com.amazonaws.http.SdkHttpMetadata; -import com.amazonaws.services.sns.model.PublishResult; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.CoderException; -import org.apache.beam.sdk.coders.CustomCoder; -import org.apache.beam.sdk.coders.NullableCoder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.io.aws.coders.AwsCoders; - -/** Coders for SNS {@link PublishResult}. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public final class PublishResultCoders { - - private static final Coder MESSAGE_ID_CODER = StringUtf8Coder.of(); - private static final Coder RESPONSE_METADATA_CODER = - NullableCoder.of(AwsCoders.responseMetadata()); - - private PublishResultCoders() {} - - /** - * Returns a new PublishResult coder which by default serializes only the messageId. - * - * @return the PublishResult coder - */ - public static Coder defaultPublishResult() { - return new PublishResultCoder(null, null); - } - - /** - * Returns a new PublishResult coder which serializes the sdkResponseMetadata and sdkHttpMetadata, - * including the HTTP response headers. - * - * @return the PublishResult coder - */ - public static Coder fullPublishResult() { - return new PublishResultCoder( - RESPONSE_METADATA_CODER, NullableCoder.of(AwsCoders.sdkHttpMetadata())); - } - - /** - * Returns a new PublishResult coder which serializes the sdkResponseMetadata and sdkHttpMetadata, - * but does not include the HTTP response headers. - * - * @return the PublishResult coder - */ - public static Coder fullPublishResultWithoutHeaders() { - return new PublishResultCoder( - RESPONSE_METADATA_CODER, NullableCoder.of(AwsCoders.sdkHttpMetadataWithoutHeaders())); - } - - static class PublishResultCoder extends CustomCoder { - - private final Coder responseMetadataEncoder; - private final Coder sdkHttpMetadataCoder; - - private PublishResultCoder( - Coder responseMetadataEncoder, - Coder sdkHttpMetadataCoder) { - this.responseMetadataEncoder = responseMetadataEncoder; - this.sdkHttpMetadataCoder = sdkHttpMetadataCoder; - } - - @Override - public void encode(PublishResult value, OutputStream outStream) - throws CoderException, IOException { - MESSAGE_ID_CODER.encode(value.getMessageId(), outStream); - if (responseMetadataEncoder != null) { - responseMetadataEncoder.encode(value.getSdkResponseMetadata(), outStream); - } - if (sdkHttpMetadataCoder != null) { - sdkHttpMetadataCoder.encode(value.getSdkHttpMetadata(), outStream); - } - } - - @Override - public PublishResult decode(InputStream inStream) throws CoderException, IOException { - String messageId = MESSAGE_ID_CODER.decode(inStream); - PublishResult publishResult = new PublishResult().withMessageId(messageId); - if (responseMetadataEncoder != null) { - publishResult.setSdkResponseMetadata(responseMetadataEncoder.decode(inStream)); - } - if (sdkHttpMetadataCoder != null) { - publishResult.setSdkHttpMetadata(sdkHttpMetadataCoder.decode(inStream)); - } - return publishResult; - } - - @Override - public void verifyDeterministic() throws NonDeterministicException { - MESSAGE_ID_CODER.verifyDeterministic(); - if (responseMetadataEncoder != null) { - responseMetadataEncoder.verifyDeterministic(); - } - if (sdkHttpMetadataCoder != null) { - sdkHttpMetadataCoder.verifyDeterministic(); - } - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/SnsCoderProviderRegistrar.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/SnsCoderProviderRegistrar.java deleted file mode 100644 index 315435861419..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/SnsCoderProviderRegistrar.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sns; - -import com.amazonaws.services.sns.model.PublishResult; -import com.google.auto.service.AutoService; -import java.util.List; -import org.apache.beam.sdk.coders.CoderProvider; -import org.apache.beam.sdk.coders.CoderProviderRegistrar; -import org.apache.beam.sdk.coders.CoderProviders; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; - -/** A {@link CoderProviderRegistrar} for standard types used with {@link SnsIO}. */ -@AutoService(CoderProviderRegistrar.class) -public class SnsCoderProviderRegistrar implements CoderProviderRegistrar { - @Override - public List getCoderProviders() { - return ImmutableList.of( - CoderProviders.forCoder( - TypeDescriptor.of(PublishResult.class), PublishResultCoders.defaultPublishResult())); - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/SnsIO.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/SnsIO.java deleted file mode 100644 index 291026f82f7e..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/SnsIO.java +++ /dev/null @@ -1,420 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sns; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; - -import com.amazonaws.regions.Regions; -import com.amazonaws.services.sns.AmazonSNS; -import com.amazonaws.services.sns.model.GetTopicAttributesResult; -import com.amazonaws.services.sns.model.InternalErrorException; -import com.amazonaws.services.sns.model.PublishRequest; -import com.amazonaws.services.sns.model.PublishResult; -import com.google.auto.value.AutoValue; -import java.io.IOException; -import java.io.Serializable; -import java.util.function.Predicate; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.metrics.Counter; -import org.apache.beam.sdk.metrics.Metrics; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.util.BackOff; -import org.apache.beam.sdk.util.BackOffUtils; -import org.apache.beam.sdk.util.FluentBackoff; -import org.apache.beam.sdk.util.Sleeper; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionTuple; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.TupleTagList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet; -import org.apache.http.HttpStatus; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Duration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * {@link PTransform}s for writing to SNS. - * - *

Writing to SNS

- * - *

Example usage: - * - *

{@code
- * PCollection data = ...;
- *
- * data.apply(SnsIO.write()
- *     .withTopicName("topicName")
- *     .withRetryConfiguration(
- *        SnsIO.RetryConfiguration.create(
- *          4, org.joda.time.Duration.standardSeconds(10)))
- *     .withAWSClientsProvider(new BasicSnsProvider(accessKey, secretKey, region))
- *     .withResultOutputTag(results));
- * }
- * - *

As a client, you need to provide at least the following things: - * - *

    - *
  • name of the SNS topic you're going to write to - *
  • retry configuration - *
  • need to specify AwsClientsProvider. You can pass on the default one BasicSnsProvider - *
  • an output tag where you can get results. Example in SnsIOTest - *
- * - *

By default, the output PublishResult contains only the messageId, all other fields are null. - * If you need the full ResponseMetadata and SdkHttpMetadata you can call {@link - * Write#withFullPublishResult}. If you need the HTTP status code but not the response headers you - * can call {@link Write#withFullPublishResultWithoutHeaders}. - * - * @deprecated Module beam-sdks-java-io-amazon-web-services is deprecated and will be - * eventually removed. Please migrate to {@link org.apache.beam.sdk.io.aws2.sns.SnsIO} in module - * beam-sdks-java-io-amazon-web-services2. - */ -@Deprecated -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public final class SnsIO { - - // Write data tp SNS - public static Write write() { - return new AutoValue_SnsIO_Write.Builder().build(); - } - - /** - * A POJO encapsulating a configuration for retry behavior when issuing requests to SNS. A retry - * will be attempted until the maxAttempts or maxDuration is exceeded, whichever comes first, for - * any of the following exceptions: - * - *

    - *
  • {@link IOException} - *
- */ - @AutoValue - @AutoValue.CopyAnnotations - public abstract static class RetryConfiguration implements Serializable { - private static final Duration DEFAULT_INITIAL_DURATION = Duration.standardSeconds(5); - - @VisibleForTesting - static final RetryPredicate DEFAULT_RETRY_PREDICATE = new DefaultRetryPredicate(); - - abstract int getMaxAttempts(); - - abstract Duration getMaxDuration(); - - abstract Duration getInitialDuration(); - - abstract RetryPredicate getRetryPredicate(); - - abstract Builder builder(); - - public static RetryConfiguration create(int maxAttempts, Duration maxDuration) { - return create(maxAttempts, maxDuration, DEFAULT_INITIAL_DURATION); - } - - @VisibleForTesting - static RetryConfiguration create( - int maxAttempts, Duration maxDuration, Duration initialDuration) { - checkArgument(maxAttempts > 0, "maxAttempts should be greater than 0"); - checkArgument( - maxDuration != null && maxDuration.isLongerThan(Duration.ZERO), - "maxDuration should be greater than 0"); - checkArgument( - initialDuration != null && initialDuration.isLongerThan(Duration.ZERO), - "initialDuration should be greater than 0"); - return new AutoValue_SnsIO_RetryConfiguration.Builder() - .setMaxAttempts(maxAttempts) - .setMaxDuration(maxDuration) - .setInitialDuration(initialDuration) - .setRetryPredicate(DEFAULT_RETRY_PREDICATE) - .build(); - } - - @AutoValue.Builder - abstract static class Builder { - abstract SnsIO.RetryConfiguration.Builder setMaxAttempts(int maxAttempts); - - abstract SnsIO.RetryConfiguration.Builder setMaxDuration(Duration maxDuration); - - abstract SnsIO.RetryConfiguration.Builder setInitialDuration(Duration initialDuration); - - abstract SnsIO.RetryConfiguration.Builder setRetryPredicate(RetryPredicate retryPredicate); - - abstract SnsIO.RetryConfiguration build(); - } - - /** - * An interface used to control if we retry the SNS Publish call when a {@link Throwable} - * occurs. If {@link RetryPredicate#test(Object)} returns true, {@link Write} tries to resend - * the requests to SNS if the {@link RetryConfiguration} permits it. - */ - @FunctionalInterface - interface RetryPredicate extends Predicate, Serializable {} - - private static class DefaultRetryPredicate implements RetryPredicate { - private static final ImmutableSet ELIGIBLE_CODES = - ImmutableSet.of(HttpStatus.SC_SERVICE_UNAVAILABLE); - - @Override - public boolean test(Throwable throwable) { - return (throwable instanceof IOException - || (throwable instanceof InternalErrorException) - || (throwable instanceof InternalErrorException - && ELIGIBLE_CODES.contains(((InternalErrorException) throwable).getStatusCode()))); - } - } - } - - /** Implementation of {@link #write}. */ - @AutoValue - @AutoValue.CopyAnnotations - public abstract static class Write - extends PTransform, PCollectionTuple> { - - abstract @Nullable String getTopicName(); - - abstract @Nullable AwsClientsProvider getAWSClientsProvider(); - - abstract @Nullable RetryConfiguration getRetryConfiguration(); - - abstract @Nullable TupleTag getResultOutputTag(); - - abstract @Nullable Coder getCoder(); - - abstract Builder builder(); - - @AutoValue.Builder - abstract static class Builder { - - abstract Builder setTopicName(String topicName); - - abstract Builder setAWSClientsProvider(AwsClientsProvider clientProvider); - - abstract Builder setRetryConfiguration(RetryConfiguration retryConfiguration); - - abstract Builder setResultOutputTag(TupleTag results); - - abstract Builder setCoder(Coder coder); - - abstract Write build(); - } - - /** - * Specify the SNS topic which will be used for writing, this name is mandatory. - * - * @param topicName topicName - */ - public Write withTopicName(String topicName) { - return builder().setTopicName(topicName).build(); - } - - /** - * Allows to specify custom {@link AwsClientsProvider}. {@link AwsClientsProvider} creates new - * {@link AmazonSNS} which is later used for writing to a SNS topic. - */ - public Write withAWSClientsProvider(AwsClientsProvider awsClientsProvider) { - return builder().setAWSClientsProvider(awsClientsProvider).build(); - } - - /** - * Specify credential details and region to be used to write to SNS. If you need more - * sophisticated credential protocol, then you should look at {@link - * Write#withAWSClientsProvider(AwsClientsProvider)}. - */ - public Write withAWSClientsProvider(String awsAccessKey, String awsSecretKey, Regions region) { - return withAWSClientsProvider(awsAccessKey, awsSecretKey, region, null); - } - - /** - * Specify credential details and region to be used to write to SNS. If you need more - * sophisticated credential protocol, then you should look at {@link - * Write#withAWSClientsProvider(AwsClientsProvider)}. - * - *

The {@code serviceEndpoint} sets an alternative service host. This is useful to execute - * the tests with Kinesis service emulator. - */ - public Write withAWSClientsProvider( - String awsAccessKey, String awsSecretKey, Regions region, String serviceEndpoint) { - return withAWSClientsProvider( - new BasicSnsProvider(awsAccessKey, awsSecretKey, region, serviceEndpoint)); - } - - /** - * Provides configuration to retry a failed request to publish a message to SNS. Users should - * consider that retrying might compound the underlying problem which caused the initial - * failure. Users should also be aware that once retrying is exhausted the error is surfaced to - * the runner which may then opt to retry the current partition in entirety or abort if - * the max number of retries of the runner is completed. Retrying uses an exponential backoff - * algorithm, with minimum backoff of 5 seconds and then surfacing the error once the maximum - * number of retries or maximum configuration duration is exceeded. - * - *

Example use: - * - *

{@code
-     * SnsIO.write()
-     *   .withRetryConfiguration(SnsIO.RetryConfiguration.create(5, Duration.standardMinutes(1))
-     *   ...
-     * }
- * - * @param retryConfiguration the rules which govern the retry behavior - * @return the {@link Write} with retrying configured - */ - public Write withRetryConfiguration(RetryConfiguration retryConfiguration) { - checkArgument(retryConfiguration != null, "retryConfiguration is required"); - return builder().setRetryConfiguration(retryConfiguration).build(); - } - - /** Tuple tag to store results. Mandatory field. */ - public Write withResultOutputTag(TupleTag results) { - return builder().setResultOutputTag(results).build(); - } - - /** - * Encode the full {@code PublishResult} object, including sdkResponseMetadata and - * sdkHttpMetadata with the HTTP response headers. - */ - public Write withFullPublishResult() { - return withCoder(PublishResultCoders.fullPublishResult()); - } - - /** - * Encode the full {@code PublishResult} object, including sdkResponseMetadata and - * sdkHttpMetadata but excluding the HTTP response headers. - */ - public Write withFullPublishResultWithoutHeaders() { - return withCoder(PublishResultCoders.fullPublishResultWithoutHeaders()); - } - - /** Encode the {@code PublishResult} with the given coder. */ - public Write withCoder(Coder coder) { - return builder().setCoder(coder).build(); - } - - @Override - public PCollectionTuple expand(PCollection input) { - LoggerFactory.getLogger(SnsIO.class) - .warn( - "You are using a deprecated IO for Sns. Please migrate to module " - + "'org.apache.beam:beam-sdks-java-io-amazon-web-services2'."); - - checkArgument(getTopicName() != null, "withTopicName() is required"); - PCollectionTuple result = - input.apply( - ParDo.of(new SnsWriterFn(this)) - .withOutputTags(getResultOutputTag(), TupleTagList.empty())); - if (getCoder() != null) { - result.get(getResultOutputTag()).setCoder(getCoder()); - } - return result; - } - - static class SnsWriterFn extends DoFn { - @VisibleForTesting - static final String RETRY_ATTEMPT_LOG = "Error writing to SNS. Retry attempt[{}]"; - - private transient FluentBackoff retryBackoff; // defaults to no retries - private static final Logger LOG = LoggerFactory.getLogger(SnsWriterFn.class); - private static final Counter SNS_WRITE_FAILURES = - Metrics.counter(SnsWriterFn.class, "SNS_Write_Failures"); - - private final SnsIO.Write spec; - private transient AmazonSNS producer; - - SnsWriterFn(SnsIO.Write spec) { - this.spec = spec; - } - - @Setup - public void setup() throws Exception { - // Initialize SnsPublisher - producer = spec.getAWSClientsProvider().createSnsPublisher(); - checkArgument( - topicExists(producer, spec.getTopicName()), - "Topic %s does not exist", - spec.getTopicName()); - - retryBackoff = FluentBackoff.DEFAULT.withMaxRetries(0); // default to no retrying - if (spec.getRetryConfiguration() != null) { - retryBackoff = - retryBackoff - .withMaxRetries(spec.getRetryConfiguration().getMaxAttempts() - 1) - .withInitialBackoff(spec.getRetryConfiguration().getInitialDuration()) - .withMaxCumulativeBackoff(spec.getRetryConfiguration().getMaxDuration()); - } - } - - @ProcessElement - public void processElement(ProcessContext context) throws Exception { - PublishRequest request = context.element(); - Sleeper sleeper = Sleeper.DEFAULT; - BackOff backoff = retryBackoff.backoff(); - int attempt = 0; - while (true) { - attempt++; - try { - PublishResult pr = producer.publish(request); - context.output(pr); - break; - } catch (Exception ex) { - // Fail right away if there is no retry configuration - if (spec.getRetryConfiguration() == null - || !spec.getRetryConfiguration().getRetryPredicate().test(ex)) { - SNS_WRITE_FAILURES.inc(); - LOG.info("Unable to publish message {}.", request.getMessage(), ex); - throw new IOException("Error writing to SNS (no attempt made to retry)", ex); - } - - if (!BackOffUtils.next(sleeper, backoff)) { - throw new IOException( - String.format( - "Error writing to SNS after %d attempt(s). No more attempts allowed", - attempt), - ex); - } else { - // Note: this used in test cases to verify behavior - LOG.warn(RETRY_ATTEMPT_LOG, attempt, ex); - } - } - } - } - - @Teardown - public void tearDown() { - if (producer != null) { - producer.shutdown(); - producer = null; - } - } - - @SuppressWarnings({"checkstyle:illegalCatch"}) - private static boolean topicExists(AmazonSNS client, String topicName) { - try { - GetTopicAttributesResult topicAttributesResult = client.getTopicAttributes(topicName); - return topicAttributesResult != null - && topicAttributesResult.getSdkHttpMetadata().getHttpStatusCode() == 200; - } catch (Exception e) { - LOG.warn("Error checking whether topic {} exists.", topicName, e); - throw e; - } - } - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/package-info.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/package-info.java deleted file mode 100644 index a1895cf4ce6d..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sns/package-info.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** Defines IO connectors for Amazon Web Services SNS. */ -package org.apache.beam.sdk.io.aws.sns; diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsCheckpointMark.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsCheckpointMark.java deleted file mode 100644 index b3e23bff5554..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsCheckpointMark.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sqs; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; - -import java.io.IOException; -import java.io.Serializable; -import java.util.List; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Objects; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.checkerframework.checker.nullness.qual.Nullable; - -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class SqsCheckpointMark implements UnboundedSource.CheckpointMark, Serializable { - - /** - * If the checkpoint is for persisting: the reader who's snapshotted state we are persisting. If - * the checkpoint is for restoring: {@literal null}. Not persisted in durable checkpoint. CAUTION: - * Between a checkpoint being taken and {@link #finalizeCheckpoint()} being called the 'true' - * active reader may have changed. - */ - private transient @Nullable SqsUnboundedReader reader; - - /** - * If the checkpoint is for persisting: The ids of messages which have been passed downstream - * since the last checkpoint. If the checkpoint is for restoring: {@literal null}. Not persisted - * in durable checkpoint. - */ - private @Nullable List safeToDeleteIds; - - /** - * If the checkpoint is for persisting: The receipt handles of messages which have been received - * from SQS but not yet passed downstream at the time of the snapshot. If the checkpoint is for - * restoring: Same, but recovered from durable storage. - */ - @VisibleForTesting final List notYetReadReceipts; - - public SqsCheckpointMark( - SqsUnboundedReader reader, List messagesToDelete, List notYetReadReceipts) { - this.reader = reader; - this.safeToDeleteIds = ImmutableList.copyOf(messagesToDelete); - this.notYetReadReceipts = ImmutableList.copyOf(notYetReadReceipts); - } - - @Override - public void finalizeCheckpoint() throws IOException { - checkState(reader != null && safeToDeleteIds != null, "Cannot finalize a restored checkpoint"); - // Even if the 'true' active reader has changed since the checkpoint was taken we are - // fine: - // - The underlying SQS topic will not have changed, so the following deletes will still - // go to the right place. - // - We'll delete the ACK ids from the readers in-flight state, but that only affect - // flow control and stats, neither of which are relevant anymore. - try { - reader.delete(safeToDeleteIds); - } finally { - int remainingInFlight = reader.numInFlightCheckpoints.decrementAndGet(); - checkState(remainingInFlight >= 0, "Miscounted in-flight checkpoints"); - reader.maybeCloseClient(); - reader = null; - safeToDeleteIds = null; - } - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - SqsCheckpointMark that = (SqsCheckpointMark) o; - return Objects.equal(safeToDeleteIds, that.safeToDeleteIds); - } - - @Override - public int hashCode() { - return Objects.hashCode(safeToDeleteIds); - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsConfiguration.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsConfiguration.java deleted file mode 100644 index 3c798112325e..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsConfiguration.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sqs; - -import com.amazonaws.ClientConfiguration; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.io.IOException; -import java.io.Serializable; -import org.apache.beam.sdk.io.aws.options.AwsModule; -import org.apache.beam.sdk.io.aws.options.AwsOptions; - -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class SqsConfiguration implements Serializable { - - private String awsRegion; - private String awsCredentialsProviderString; - private String awsClientConfigurationString; - - public SqsConfiguration(AwsOptions awsOptions) { - ObjectMapper om = new ObjectMapper(); - om.registerModule(new AwsModule()); - try { - this.awsCredentialsProviderString = - om.writeValueAsString(awsOptions.getAwsCredentialsProvider()); - } catch (JsonProcessingException e) { - this.awsCredentialsProviderString = null; - } - - try { - this.awsClientConfigurationString = - om.writeValueAsString(awsOptions.getClientConfiguration()); - } catch (JsonProcessingException e) { - this.awsClientConfigurationString = null; - } - - this.awsRegion = awsOptions.getAwsRegion(); - } - - public AWSCredentialsProvider getAwsCredentialsProvider() { - ObjectMapper om = new ObjectMapper(); - om.registerModule(new AwsModule()); - try { - return om.readValue(awsCredentialsProviderString, AWSCredentialsProvider.class); - } catch (IOException e) { - return null; - } - } - - public ClientConfiguration getClientConfiguration() { - ObjectMapper om = new ObjectMapper(); - om.registerModule(new AwsModule()); - try { - return om.readValue(awsClientConfigurationString, ClientConfiguration.class); - } catch (IOException e) { - return null; - } - } - - public String getAwsRegion() { - return awsRegion; - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsIO.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsIO.java deleted file mode 100644 index 26ca03c95c33..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsIO.java +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sqs; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; - -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.AmazonSQSClientBuilder; -import com.amazonaws.services.sqs.model.Message; -import com.amazonaws.services.sqs.model.SendMessageRequest; -import com.google.auto.value.AutoValue; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.io.aws.options.AwsOptions; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PBegin; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Duration; -import org.slf4j.LoggerFactory; - -/** - * An unbounded source for Amazon Simple Queue Service (SQS). - * - *

Reading from an SQS queue

- * - *

The {@link SqsIO} {@link Read} returns an unbounded {@link PCollection} of {@link - * com.amazonaws.services.sqs.model.Message} containing the received messages. Note: This source - * does not currently advance the watermark when no new messages are received. - * - *

To configure an SQS source, you have to provide the queueUrl to connect to. The following - * example illustrates how to configure the source: - * - *

{@code
- * pipeline.apply(SqsIO.read().withQueueUrl(queueUrl))
- * }
- * - *

Writing to an SQS queue

- * - *

The following example illustrates how to use the sink: - * - *

{@code
- * pipeline
- *   .apply(...) // returns PCollection
- *   .apply(SqsIO.write())
- * }
- * - *

Additional Configuration

- * - *

Additional configuration can be provided via {@link AwsOptions} from command line args or in - * code. For example, if you wanted to provide a secret access key via code: - * - *

{@code
- * PipelineOptions pipelineOptions = PipelineOptionsFactory.fromArgs(args).withValidation().create();
- * AwsOptions awsOptions = pipelineOptions.as(AwsOptions.class);
- * BasicAWSCredentials awsCreds = new BasicAWSCredentials("accesskey", "secretkey");
- * awsOptions.setAwsCredentialsProvider(new AWSStaticCredentialsProvider(awsCreds));
- * Pipeline pipeline = Pipeline.create(options);
- * }
- * - *

For more information on the available options see {@link AwsOptions}. - * - * @deprecated Module beam-sdks-java-io-amazon-web-services is deprecated and will be - * eventually removed. Please migrate to {@link org.apache.beam.sdk.io.aws2.sqs.SqsIO} in module - * beam-sdks-java-io-amazon-web-services2. - */ -@Deprecated -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class SqsIO { - - public static Read read() { - return new AutoValue_SqsIO_Read.Builder() - .setCoder(SqsMessageCoder.of()) - .setMaxNumRecords(Long.MAX_VALUE) - .build(); - } - - public static Write write() { - return new AutoValue_SqsIO_Write.Builder().build(); - } - - private SqsIO() {} - - /** - * A {@link PTransform} to read/receive messages from SQS. See {@link SqsIO} for more information - * on usage and configuration. - */ - @AutoValue - public abstract static class Read extends PTransform> { - - abstract Coder coder(); - - abstract @Nullable String queueUrl(); - - abstract long maxNumRecords(); - - abstract @Nullable Duration maxReadTime(); - - abstract Builder toBuilder(); - - @AutoValue.Builder - abstract static class Builder { - abstract Builder setCoder(Coder coder); - - abstract Builder setQueueUrl(String queueUrl); - - abstract Builder setMaxNumRecords(long maxNumRecords); - - abstract Builder setMaxReadTime(Duration maxReadTime); - - abstract Read build(); - } - - /** - * Optionally set a custom {@link Message} output coder if you need to access further (message) - * attributes. - * - *

The default {@link SqsMessageCoder} only supports `SentTimestamp` and - * `requestTimeMsSinceEpoch`. - */ - public Read withCoder(Coder coder) { - return toBuilder().setCoder(coder).build(); - } - - /** - * Define the max number of records received by the {@link Read}. When the max number of records - * is lower than {@code Long.MAX_VALUE}, the {@link Read} will provide a bounded {@link - * PCollection}. - */ - public Read withMaxNumRecords(long maxNumRecords) { - return toBuilder().setMaxNumRecords(maxNumRecords).build(); - } - - /** - * Define the max read time (duration) while the {@link Read} will receive messages. When this - * max read time is not null, the {@link Read} will provide a bounded {@link PCollection}. - */ - public Read withMaxReadTime(Duration maxReadTime) { - return toBuilder().setMaxReadTime(maxReadTime).build(); - } - - /** Define the queueUrl used by the {@link Read} to receive messages from SQS. */ - public Read withQueueUrl(String queueUrl) { - checkArgument(queueUrl != null, "queueUrl can not be null"); - checkArgument(!queueUrl.isEmpty(), "queueUrl can not be empty"); - return toBuilder().setQueueUrl(queueUrl).build(); - } - - @Override - public PCollection expand(PBegin input) { - LoggerFactory.getLogger(SqsIO.class) - .warn( - "You are using a deprecated IO for Sqs. Please migrate to module " - + "'org.apache.beam:beam-sdks-java-io-amazon-web-services2'."); - - org.apache.beam.sdk.io.Read.Unbounded unbounded = - org.apache.beam.sdk.io.Read.from( - new SqsUnboundedSource( - this, - new SqsConfiguration(input.getPipeline().getOptions().as(AwsOptions.class)), - coder())); - - PTransform> transform = unbounded; - - if (maxNumRecords() < Long.MAX_VALUE || maxReadTime() != null) { - transform = unbounded.withMaxReadTime(maxReadTime()).withMaxNumRecords(maxNumRecords()); - } - - return input.getPipeline().apply(transform); - } - } - - /** - * A {@link PTransform} to send messages to SQS. See {@link SqsIO} for more information on usage - * and configuration. - */ - @AutoValue - public abstract static class Write extends PTransform, PDone> { - abstract Builder toBuilder(); - - @AutoValue.Builder - abstract static class Builder { - abstract Write build(); - } - - @Override - public PDone expand(PCollection input) { - LoggerFactory.getLogger(SqsIO.class) - .warn( - "You are using a deprecated IO for Sqs. Please migrate to module " - + "'org.apache.beam:beam-sdks-java-io-amazon-web-services2'."); - - input.apply( - ParDo.of( - new SqsWriteFn( - new SqsConfiguration(input.getPipeline().getOptions().as(AwsOptions.class))))); - return PDone.in(input.getPipeline()); - } - } - - private static class SqsWriteFn extends DoFn { - private final SqsConfiguration sqsConfiguration; - private transient AmazonSQS sqs; - - SqsWriteFn(SqsConfiguration sqsConfiguration) { - this.sqsConfiguration = sqsConfiguration; - } - - @Setup - public void setup() { - sqs = - AmazonSQSClientBuilder.standard() - .withClientConfiguration(sqsConfiguration.getClientConfiguration()) - .withCredentials(sqsConfiguration.getAwsCredentialsProvider()) - .withRegion(sqsConfiguration.getAwsRegion()) - .build(); - } - - @ProcessElement - public void processElement(ProcessContext processContext) throws Exception { - sqs.sendMessage(processContext.element()); - } - - @Teardown - public void teardown() throws Exception { - if (sqs != null) { - sqs.shutdown(); - } - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsMessageCoder.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsMessageCoder.java deleted file mode 100644 index 792642c17609..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsMessageCoder.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sqs; - -import static com.amazonaws.services.sqs.model.MessageSystemAttributeName.SentTimestamp; -import static org.apache.beam.sdk.io.aws.sqs.SqsUnboundedReader.REQUEST_TIME; - -import com.amazonaws.services.sqs.model.Message; -import com.amazonaws.services.sqs.model.MessageAttributeValue; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import org.apache.beam.sdk.coders.AtomicCoder; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.NullableCoder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.checkerframework.checker.nullness.qual.NonNull; - -/** - * Deterministic coder for an AWS Sdk SQS message. - * - *

This encoder only keeps the `SentTimestamp` attribute as well as the `requestTimeMsSinceEpoch` - * message attribute, other attributes are dropped. You may provide your own coder in case you need - * to access further attributes. - */ -class SqsMessageCoder extends AtomicCoder { - private static final Coder STRING_CODER = StringUtf8Coder.of(); - private static final NullableCoder OPT_STRING_CODER = - NullableCoder.of(StringUtf8Coder.of()); - - private static final Coder INSTANCE = new SqsMessageCoder(); - - static Coder of() { - return INSTANCE; - } - - private SqsMessageCoder() {} - - @Override - public void encode(Message value, OutputStream out) throws IOException { - STRING_CODER.encode(value.getMessageId(), out); - STRING_CODER.encode(value.getReceiptHandle(), out); - OPT_STRING_CODER.encode(value.getBody(), out); - OPT_STRING_CODER.encode(value.getAttributes().get(SentTimestamp.toString()), out); - MessageAttributeValue reqTime = value.getMessageAttributes().get(REQUEST_TIME); - OPT_STRING_CODER.encode(reqTime != null ? reqTime.getStringValue() : null, out); - } - - @Override - public Message decode(InputStream in) throws IOException { - Message msg = new Message(); - msg.setMessageId(STRING_CODER.decode(in)); - msg.setReceiptHandle(STRING_CODER.decode(in)); - - // SQS library not annotated, but this coder assumes null is allowed (documentation does not - // specify) - @SuppressWarnings("nullness") - @NonNull - String body = OPT_STRING_CODER.decode(in); - msg.setBody(body); - - String sentAt = OPT_STRING_CODER.decode(in); - if (sentAt != null) { - msg.addAttributesEntry(SentTimestamp.toString(), sentAt); - } - - String reqTime = OPT_STRING_CODER.decode(in); - if (reqTime != null) { - msg.addMessageAttributesEntry( - REQUEST_TIME, new MessageAttributeValue().withStringValue(reqTime)); - } - return msg; - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedReader.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedReader.java deleted file mode 100644 index 1fd5e38f5464..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedReader.java +++ /dev/null @@ -1,944 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sqs; - -import static java.nio.charset.StandardCharsets.UTF_8; -import static java.util.stream.Collectors.groupingBy; -import static java.util.stream.Collectors.toMap; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; - -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.AmazonSQSClientBuilder; -import com.amazonaws.services.sqs.model.BatchResultErrorEntry; -import com.amazonaws.services.sqs.model.ChangeMessageVisibilityBatchRequestEntry; -import com.amazonaws.services.sqs.model.ChangeMessageVisibilityBatchResult; -import com.amazonaws.services.sqs.model.DeleteMessageBatchRequestEntry; -import com.amazonaws.services.sqs.model.DeleteMessageBatchResult; -import com.amazonaws.services.sqs.model.GetQueueAttributesRequest; -import com.amazonaws.services.sqs.model.Message; -import com.amazonaws.services.sqs.model.MessageAttributeValue; -import com.amazonaws.services.sqs.model.MessageSystemAttributeName; -import com.amazonaws.services.sqs.model.QueueAttributeName; -import com.amazonaws.services.sqs.model.ReceiveMessageRequest; -import com.amazonaws.services.sqs.model.ReceiveMessageResult; -import java.io.IOException; -import java.util.ArrayDeque; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.NoSuchElementException; -import java.util.Objects; -import java.util.Queue; -import java.util.Set; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.io.UnboundedSource.CheckpointMark; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.Max; -import org.apache.beam.sdk.transforms.Min; -import org.apache.beam.sdk.transforms.Sum; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.sdk.util.BucketingFunction; -import org.apache.beam.sdk.util.MovingFunction; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.EvictingQueue; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class SqsUnboundedReader extends UnboundedSource.UnboundedReader { - private static final Logger LOG = LoggerFactory.getLogger(SqsUnboundedReader.class); - - /** Request time attribute in {@link Message#getMessageAttributes()}. */ - static final String REQUEST_TIME = "requestTimeMsSinceEpoch"; - - /** Maximum number of messages to pull from SQS per request. */ - public static final int MAX_NUMBER_OF_MESSAGES = 10; - - /** Maximum times to retry batch SQS operations upon partial success. */ - private static final int BATCH_OPERATION_MAX_RETIRES = 5; - - /** Timeout for round trip from receiving a message to finally deleting it from SQS. */ - private static final Duration PROCESSING_TIMEOUT = Duration.standardMinutes(2); - - /** - * Percentage of visibility timeout by which to extend visibility timeout when they are near - * timeout. - */ - private static final int VISIBILITY_EXTENSION_PCT = 50; - - /** - * Percentage of ack timeout we should use as a safety margin. We'll try to extend visibility - * timeout by this margin before the visibility timeout actually expires. - */ - private static final int VISIBILITY_SAFETY_PCT = 20; - - /** - * For stats only: How close we can get to an visibility deadline before we risk it being already - * considered passed by SQS. - */ - private static final Duration VISIBILITY_TOO_LATE = Duration.standardSeconds(2); - - /** Maximum number of message ids per delete or visibility extension call. */ - private static final int DELETE_BATCH_SIZE = 10; - - /** Maximum number of messages in flight. */ - private static final int MAX_IN_FLIGHT = 20000; - - /** Maximum number of recent messages for calculating average message size. */ - private static final int MAX_AVG_BYTE_MESSAGES = 20; - - /** Period of samples to determine watermark and other stats. */ - private static final Duration SAMPLE_PERIOD = Duration.standardMinutes(1); - - /** Period of updates to determine watermark and other stats. */ - private static final Duration SAMPLE_UPDATE = Duration.standardSeconds(5); - - /** Period for logging stats. */ - private static final Duration LOG_PERIOD = Duration.standardSeconds(30); - - /** Minimum number of unread messages required before considering updating watermark. */ - private static final int MIN_WATERMARK_MESSAGES = 10; - - /** - * Minimum number of SAMPLE_UPDATE periods over which unread messages should be spread before - * considering updating watermark. - */ - private static final int MIN_WATERMARK_SPREAD = 2; - - private static final Combine.BinaryCombineLongFn MIN = Min.ofLongs(); - - private static final Combine.BinaryCombineLongFn MAX = Max.ofLongs(); - - private static final Combine.BinaryCombineLongFn SUM = Sum.ofLongs(); - - /** For access to topic and SQS client. */ - private final SqsUnboundedSource source; - - /** - * The closed state of this {@link SqsUnboundedReader}. If true, the reader has not yet been - * closed, and it will have a non-null value within {@link #SqsUnboundedReader}. - */ - private AtomicBoolean active = new AtomicBoolean(true); - - /** SQS client of this reader instance. */ - private AmazonSQS sqsClient = null; - - /** The current message, or {@literal null} if none. */ - private Message current; - - /** - * Messages we have received from SQS and not yet delivered downstream. We preserve their order. - */ - final Queue messagesNotYetRead; - - /** Message ids of messages we have delivered downstream but not yet deleted. */ - private Set safeToDeleteIds; - - /** - * Visibility timeout, in ms, as set on subscription when we first start reading. Not updated - * thereafter. -1 if not yet determined. - */ - private long visibilityTimeoutMs; - - /** Byte size of undecoded elements in {@link #messagesNotYetRead}. */ - private long notYetReadBytes; - - /** Byte size of recent messages. */ - private EvictingQueue recentMessageBytes; - - /** - * Bucketed map from received time (as system time, ms since epoch) to message timestamps (mssince - * epoch) of all received but not-yet read messages. Used to estimate watermark. - */ - private BucketingFunction minUnreadTimestampMsSinceEpoch; - - /** - * Minimum of timestamps (ms since epoch) of all recently read messages. Used to estimate - * watermark. - */ - private MovingFunction minReadTimestampMsSinceEpoch; - - /** Number of recent empty receives. */ - private MovingFunction numEmptyReceives; - - private static class InFlightState { - /** Receipt handle of message. */ - String receiptHandle; - - /** When request which yielded message was issued. */ - long requestTimeMsSinceEpoch; - - /** - * When SQS will consider this message's visibility timeout to timeout and thus it needs to be - * extended. - */ - long visibilityDeadlineMsSinceEpoch; - - public InFlightState( - String receiptHandle, long requestTimeMsSinceEpoch, long visibilityDeadlineMsSinceEpoch) { - this.receiptHandle = receiptHandle; - this.requestTimeMsSinceEpoch = requestTimeMsSinceEpoch; - this.visibilityDeadlineMsSinceEpoch = visibilityDeadlineMsSinceEpoch; - } - } - - /** - * Map from message ids of messages we have received from SQS but not yet deleted to their in - * flight state. Ordered from earliest to latest visibility deadline. - */ - private final LinkedHashMap inFlight; - - /** - * Batches of successfully deleted message ids which need to be pruned from the above. CAUTION: - * Accessed by both reader and checkpointing threads. - */ - private final Queue> deletedIds; - - /** - * System time (ms since epoch) we last received a message from SQS, or -1 if not yet received any - * messages. - */ - private long lastReceivedMsSinceEpoch; - - /** The last reported watermark (ms since epoch), or beginning of time if none yet reported. */ - private long lastWatermarkMsSinceEpoch; - - /** Stats only: System time (ms since epoch) we last logs stats, or -1 if never. */ - private long lastLogTimestampMsSinceEpoch; - - /** Stats only: Total number of messages received. */ - private long numReceived; - - /** Stats only: Number of messages which have recently been received. */ - private MovingFunction numReceivedRecently; - - /** Stats only: Number of messages which have recently had their deadline extended. */ - private MovingFunction numExtendedDeadlines; - - /** - * Stats only: Number of messages which have recently had their deadline extended even though it - * may be too late to do so. - */ - private MovingFunction numLateDeadlines; - - /** Stats only: Number of messages which have recently been deleted. */ - private MovingFunction numDeleted; - - /** - * Stats only: Number of messages which have recently expired (visibility timeout were extended - * for too long). - */ - private MovingFunction numExpired; - - /** Stats only: Number of messages which have recently been returned to visible on SQS. */ - private MovingFunction numReleased; - - /** Stats only: Number of message bytes which have recently been read by downstream consumer. */ - private MovingFunction numReadBytes; - - /** - * Stats only: Minimum of timestamp (ms since epoch) of all recently received messages. Used to - * estimate timestamp skew. Does not contribute to watermark estimator. - */ - private MovingFunction minReceivedTimestampMsSinceEpoch; - - /** - * Stats only: Maximum of timestamp (ms since epoch) of all recently received messages. Used to - * estimate timestamp skew. - */ - private MovingFunction maxReceivedTimestampMsSinceEpoch; - - /** Stats only: Minimum of recent estimated watermarks (ms since epoch). */ - private MovingFunction minWatermarkMsSinceEpoch; - - /** Stats ony: Maximum of recent estimated watermarks (ms since epoch). */ - private MovingFunction maxWatermarkMsSinceEpoch; - - /** - * Stats only: Number of messages with timestamps strictly behind the estimated watermark at the - * time they are received. These may be considered 'late' by downstream computations. - */ - private MovingFunction numLateMessages; - - /** - * Stats only: Current number of checkpoints in flight. CAUTION: Accessed by both checkpointing - * and reader threads. - */ - AtomicInteger numInFlightCheckpoints; - - /** Stats only: Maximum number of checkpoints in flight at any time. */ - private int maxInFlightCheckpoints; - - private static MovingFunction newFun(Combine.BinaryCombineLongFn function) { - return new MovingFunction( - SAMPLE_PERIOD.getMillis(), - SAMPLE_UPDATE.getMillis(), - MIN_WATERMARK_SPREAD, - MIN_WATERMARK_MESSAGES, - function); - } - - public SqsUnboundedReader(SqsUnboundedSource source, SqsCheckpointMark sqsCheckpointMark) - throws IOException { - this.source = source; - - messagesNotYetRead = new ArrayDeque<>(); - safeToDeleteIds = new HashSet<>(); - inFlight = new LinkedHashMap<>(); - deletedIds = new ConcurrentLinkedQueue<>(); - visibilityTimeoutMs = -1; - notYetReadBytes = 0; - recentMessageBytes = EvictingQueue.create(MAX_AVG_BYTE_MESSAGES); - minUnreadTimestampMsSinceEpoch = - new BucketingFunction( - SAMPLE_UPDATE.getMillis(), MIN_WATERMARK_SPREAD, MIN_WATERMARK_MESSAGES, MIN); - minReadTimestampMsSinceEpoch = newFun(MIN); - numEmptyReceives = newFun(SUM); - lastReceivedMsSinceEpoch = -1; - lastWatermarkMsSinceEpoch = BoundedWindow.TIMESTAMP_MIN_VALUE.getMillis(); - current = null; - lastLogTimestampMsSinceEpoch = -1; - numReceived = 0L; - numReceivedRecently = newFun(SUM); - numExtendedDeadlines = newFun(SUM); - numLateDeadlines = newFun(SUM); - numDeleted = newFun(SUM); - numExpired = newFun(SUM); - numReleased = newFun(SUM); - numReadBytes = newFun(SUM); - minReceivedTimestampMsSinceEpoch = newFun(MIN); - maxReceivedTimestampMsSinceEpoch = newFun(MAX); - minWatermarkMsSinceEpoch = newFun(MIN); - maxWatermarkMsSinceEpoch = newFun(MAX); - numLateMessages = newFun(SUM); - numInFlightCheckpoints = new AtomicInteger(); - maxInFlightCheckpoints = 0; - - if (sqsCheckpointMark != null) { - long nowMsSinceEpoch = now(); - initClient(); - extendBatch(nowMsSinceEpoch, sqsCheckpointMark.notYetReadReceipts, 0); - numReleased.add(nowMsSinceEpoch, sqsCheckpointMark.notYetReadReceipts.size()); - } - } - - @Override - public Instant getWatermark() { - - // NOTE: We'll allow the watermark to go backwards. The underlying runner is responsible - // for aggregating all reported watermarks and ensuring the aggregate is latched. - // If we attempt to latch locally then it is possible a temporary starvation of one reader - // could cause its estimated watermark to fast forward to current system time. Then when - // the reader resumes its watermark would be unable to resume tracking. - // By letting the underlying runner latch we avoid any problems due to localized starvation. - long nowMsSinceEpoch = now(); - long readMin = minReadTimestampMsSinceEpoch.get(nowMsSinceEpoch); - long unreadMin = minUnreadTimestampMsSinceEpoch.get(); - if (readMin == Long.MAX_VALUE - && unreadMin == Long.MAX_VALUE - && numEmptyReceives.get(nowMsSinceEpoch) > 0 - && nowMsSinceEpoch > lastReceivedMsSinceEpoch + SAMPLE_PERIOD.getMillis()) { - // We don't currently have any unread messages pending, we have not had any messages - // read for a while, and we have not received any new messages from SQS for a while. - // Advance watermark to current time. - // TODO: Estimate a timestamp lag. - lastWatermarkMsSinceEpoch = nowMsSinceEpoch; - } else if (minReadTimestampMsSinceEpoch.isSignificant() - || minUnreadTimestampMsSinceEpoch.isSignificant()) { - // Take minimum of the timestamps in all unread messages and recently read messages. - lastWatermarkMsSinceEpoch = Math.min(readMin, unreadMin); - } - // else: We're not confident enough to estimate a new watermark. Stick with the old one. - minWatermarkMsSinceEpoch.add(nowMsSinceEpoch, lastWatermarkMsSinceEpoch); - maxWatermarkMsSinceEpoch.add(nowMsSinceEpoch, lastWatermarkMsSinceEpoch); - return new Instant(lastWatermarkMsSinceEpoch); - } - - @Override - public Message getCurrent() throws NoSuchElementException { - if (current == null) { - throw new NoSuchElementException(); - } - return current; - } - - @Override - public Instant getCurrentTimestamp() throws NoSuchElementException { - if (current == null) { - throw new NoSuchElementException(); - } - - return getTimestamp(current); - } - - @Override - public byte[] getCurrentRecordId() throws NoSuchElementException { - if (current == null) { - throw new NoSuchElementException(); - } - return current.getMessageId().getBytes(UTF_8); - } - - @Override - public CheckpointMark getCheckpointMark() { - int cur = numInFlightCheckpoints.incrementAndGet(); - maxInFlightCheckpoints = Math.max(maxInFlightCheckpoints, cur); - List snapshotSafeToDeleteIds = Lists.newArrayList(safeToDeleteIds); - List snapshotNotYetReadReceipts = new ArrayList<>(messagesNotYetRead.size()); - for (Message message : messagesNotYetRead) { - snapshotNotYetReadReceipts.add(message.getReceiptHandle()); - } - return new SqsCheckpointMark(this, snapshotSafeToDeleteIds, snapshotNotYetReadReceipts); - } - - @Override - public SqsUnboundedSource getCurrentSource() { - return source; - } - - @Override - public long getTotalBacklogBytes() { - long avgBytes = avgMessageBytes(); - List requestAttributes = - Collections.singletonList(QueueAttributeName.ApproximateNumberOfMessages.toString()); - Map queueAttributes = - sqsClient - .getQueueAttributes(source.getRead().queueUrl(), requestAttributes) - .getAttributes(); - long numMessages = - Long.parseLong( - queueAttributes.get(QueueAttributeName.ApproximateNumberOfMessages.toString())); - - // No messages consumed for estimating average message size - if (avgBytes == -1 && numMessages > 0) { - return BACKLOG_UNKNOWN; - } else { - return numMessages * avgBytes; - } - } - - @Override - public boolean start() throws IOException { - initClient(); - visibilityTimeoutMs = - Integer.parseInt( - sqsClient - .getQueueAttributes( - new GetQueueAttributesRequest(source.getRead().queueUrl()) - .withAttributeNames("VisibilityTimeout")) - .getAttributes() - .get("VisibilityTimeout")) - * 1000L; - return advance(); - } - - private void initClient() { - if (sqsClient == null) { - sqsClient = - AmazonSQSClientBuilder.standard() - .withClientConfiguration(source.getSqsConfiguration().getClientConfiguration()) - .withCredentials(source.getSqsConfiguration().getAwsCredentialsProvider()) - .withRegion(source.getSqsConfiguration().getAwsRegion()) - .build(); - } - } - - @Override - public boolean advance() throws IOException { - // Emit stats. - stats(); - - if (current != null) { - // Current is consumed. It can no longer contribute to holding back the watermark. - minUnreadTimestampMsSinceEpoch.remove(getRequestTimeMsSinceEpoch(current)); - current = null; - } - - // Retire state associated with deleted messages. - retire(); - - // Extend all pressing deadlines. - // Will BLOCK until done. - // If the system is pulling messages only to let them sit in a downstream queue then - // this will have the effect of slowing down the pull rate. - // However, if the system is genuinely taking longer to process each message then - // the work to extend visibility timeout would be better done in the background. - extend(); - - if (messagesNotYetRead.isEmpty()) { - // Pull another batch. - // Will BLOCK until fetch returns, but will not block until a message is available. - pull(); - } - - // Take one message from queue. - current = messagesNotYetRead.poll(); - if (current == null) { - // Try again later. - return false; - } - notYetReadBytes -= current.getBody().getBytes(UTF_8).length; - checkState(notYetReadBytes >= 0); - long nowMsSinceEpoch = now(); - numReadBytes.add(nowMsSinceEpoch, current.getBody().getBytes(UTF_8).length); - recentMessageBytes.add(current.getBody().getBytes(UTF_8).length); - minReadTimestampMsSinceEpoch.add(nowMsSinceEpoch, getCurrentTimestamp().getMillis()); - if (getCurrentTimestamp().getMillis() < lastWatermarkMsSinceEpoch) { - numLateMessages.add(nowMsSinceEpoch, 1L); - } - - // Current message can be considered 'read' and will be persisted by the next - // checkpoint. So it is now safe to delete from SQS. - safeToDeleteIds.add(current.getMessageId()); - - return true; - } - - /** - * {@inheritDoc}. - * - *

Marks this {@link SqsUnboundedReader} as no longer active. The {@link AmazonSQS} continue to - * exist and be active beyond the life of this call if there are any in-flight checkpoints. When - * no in-flight checkpoints remain, the reader will be closed. - */ - @Override - public void close() throws IOException { - active.set(false); - maybeCloseClient(); - } - - /** - * Close this reader's underlying {@link AmazonSQS} if the reader has been closed and there are no - * outstanding checkpoints. - */ - void maybeCloseClient() throws IOException { - if (!active.get() && numInFlightCheckpoints.get() == 0) { - // The reader has been closed and it has no more outstanding checkpoints. The client - // must be closed so it doesn't leak - if (sqsClient != null) { - sqsClient.shutdown(); - } - } - } - - /** delete the provided {@code messageIds} from SQS. */ - void delete(List messageIds) throws IOException { - AtomicInteger counter = new AtomicInteger(); - for (List messageList : - messageIds.stream() - .collect(groupingBy(x -> counter.getAndIncrement() / DELETE_BATCH_SIZE)) - .values()) { - deleteBatch(messageList); - } - } - - /** - * delete the provided {@code messageIds} from SQS, blocking until all of the messages are - * deleted. - * - *

CAUTION: May be invoked from a separate thread. - * - *

CAUTION: Retains {@code messageIds}. - */ - private void deleteBatch(List messageIds) throws IOException { - int retries = 0; - List errorMessages = new ArrayList<>(); - Map pendingReceipts = - IntStream.range(0, messageIds.size()) - .boxed() - .filter(i -> inFlight.containsKey(messageIds.get(i))) - .collect(toMap(Object::toString, i -> inFlight.get(messageIds.get(i)).receiptHandle)); - - while (!pendingReceipts.isEmpty()) { - - if (retries >= BATCH_OPERATION_MAX_RETIRES) { - throw new IOException( - "Failed to delete " - + pendingReceipts.size() - + " messages after " - + retries - + " retries: " - + String.join(", ", errorMessages)); - } - - List entries = - pendingReceipts.entrySet().stream() - .map(r -> new DeleteMessageBatchRequestEntry(r.getKey(), r.getValue())) - .collect(Collectors.toList()); - - DeleteMessageBatchResult result = - sqsClient.deleteMessageBatch(source.getRead().queueUrl(), entries); - - // Retry errors except invalid handles - Set retryErrors = - result.getFailed().stream() - .filter(e -> !e.getCode().equals("ReceiptHandleIsInvalid")) - .collect(Collectors.toSet()); - - pendingReceipts - .keySet() - .retainAll( - retryErrors.stream().map(BatchResultErrorEntry::getId).collect(Collectors.toSet())); - - errorMessages = - retryErrors.stream().map(BatchResultErrorEntry::getMessage).collect(Collectors.toList()); - - retries += 1; - } - deletedIds.add(messageIds); - } - - /** - * Messages which have been deleted (via the checkpoint finalize) are no longer in flight. This is - * only used for flow control and stats. - */ - private void retire() { - long nowMsSinceEpoch = now(); - while (true) { - List ackIds = deletedIds.poll(); - if (ackIds == null) { - return; - } - numDeleted.add(nowMsSinceEpoch, ackIds.size()); - for (String ackId : ackIds) { - inFlight.remove(ackId); - safeToDeleteIds.remove(ackId); - } - } - } - - /** BLOCKING Fetch another batch of messages from SQS. */ - private void pull() { - if (inFlight.size() >= MAX_IN_FLIGHT) { - // Wait for checkpoint to be finalized before pulling anymore. - // There may be lag while checkpoints are persisted and the finalizeCheckpoint method - // is invoked. By limiting the in-flight messages we can ensure we don't end up consuming - // messages faster than we can checkpoint them. - return; - } - - long requestTimeMsSinceEpoch = now(); - long deadlineMsSinceEpoch = requestTimeMsSinceEpoch + visibilityTimeoutMs; - - final ReceiveMessageRequest receiveMessageRequest = - new ReceiveMessageRequest(source.getRead().queueUrl()); - - receiveMessageRequest.setMaxNumberOfMessages(MAX_NUMBER_OF_MESSAGES); - receiveMessageRequest.setAttributeNames( - Arrays.asList(MessageSystemAttributeName.SentTimestamp.toString())); - final ReceiveMessageResult receiveMessageResult = - sqsClient.receiveMessage(receiveMessageRequest); - - final List messages = receiveMessageResult.getMessages(); - - if (messages == null || messages.isEmpty()) { - numEmptyReceives.add(requestTimeMsSinceEpoch, 1L); - return; - } - - lastReceivedMsSinceEpoch = requestTimeMsSinceEpoch; - - // Capture the received messages. - for (Message message : messages) { - // Keep request time as message attribute for later usage - MessageAttributeValue reqTime = - new MessageAttributeValue().withStringValue(Long.toString(requestTimeMsSinceEpoch)); - message.setMessageAttributes(ImmutableMap.of(REQUEST_TIME, reqTime)); - messagesNotYetRead.add(message); - notYetReadBytes += message.getBody().getBytes(UTF_8).length; - inFlight.put( - message.getMessageId(), - new InFlightState( - message.getReceiptHandle(), requestTimeMsSinceEpoch, deadlineMsSinceEpoch)); - numReceived++; - numReceivedRecently.add(requestTimeMsSinceEpoch, 1L); - - long timestampMillis = getTimestamp(message).getMillis(); - minReceivedTimestampMsSinceEpoch.add(requestTimeMsSinceEpoch, timestampMillis); - maxReceivedTimestampMsSinceEpoch.add(requestTimeMsSinceEpoch, timestampMillis); - minUnreadTimestampMsSinceEpoch.add(requestTimeMsSinceEpoch, timestampMillis); - } - } - - /** Return the current time, in ms since epoch. */ - long now() { - return System.currentTimeMillis(); - } - - /** - * BLOCKING Extend deadline for all messages which need it. CAUTION: If extensions can't keep up - * with wallclock then we'll never return. - */ - private void extend() throws IOException { - while (true) { - long nowMsSinceEpoch = now(); - List assumeExpired = new ArrayList<>(); - List toBeExtended = new ArrayList<>(); - List toBeExpired = new ArrayList<>(); - // Messages will be in increasing deadline order. - for (Map.Entry entry : inFlight.entrySet()) { - if (entry.getValue().visibilityDeadlineMsSinceEpoch - - (visibilityTimeoutMs * VISIBILITY_SAFETY_PCT) / 100 - > nowMsSinceEpoch) { - // All remaining messages don't need their visibility timeouts to be extended. - break; - } - - if (entry.getValue().visibilityDeadlineMsSinceEpoch - VISIBILITY_TOO_LATE.getMillis() - < nowMsSinceEpoch) { - // SQS may have already considered this message to have expired. - // If so it will (eventually) be made available on a future pull request. - // If this message ends up being committed then it will be considered a duplicate - // when re-pulled. - assumeExpired.add(entry.getKey()); - continue; - } - - if (entry.getValue().requestTimeMsSinceEpoch + PROCESSING_TIMEOUT.getMillis() - < nowMsSinceEpoch) { - // This message has been in-flight for too long. - // Give up on it, otherwise we risk extending its visibility timeout indefinitely. - toBeExpired.add(entry.getKey()); - continue; - } - - // Extend the visibility timeout for this message. - toBeExtended.add(entry.getKey()); - if (toBeExtended.size() >= DELETE_BATCH_SIZE) { - // Enough for one batch. - break; - } - } - - if (assumeExpired.isEmpty() && toBeExtended.isEmpty() && toBeExpired.isEmpty()) { - // Nothing to be done. - return; - } - - if (!assumeExpired.isEmpty()) { - // If we didn't make the visibility deadline assume expired and no longer in flight. - numLateDeadlines.add(nowMsSinceEpoch, assumeExpired.size()); - for (String messageId : assumeExpired) { - inFlight.remove(messageId); - } - } - - if (!toBeExpired.isEmpty()) { - // Expired messages are no longer considered in flight. - numExpired.add(nowMsSinceEpoch, toBeExpired.size()); - for (String messageId : toBeExpired) { - inFlight.remove(messageId); - } - } - - if (!toBeExtended.isEmpty()) { - // SQS extends visibility timeout from it's notion of current time. - // We'll try to track that on our side, but note the deadlines won't necessarily agree. - long extensionMs = (int) ((visibilityTimeoutMs * VISIBILITY_EXTENSION_PCT) / 100L); - long newDeadlineMsSinceEpoch = nowMsSinceEpoch + extensionMs; - for (String messageId : toBeExtended) { - // Maintain increasing ack deadline order. - String receiptHandle = inFlight.get(messageId).receiptHandle; - InFlightState state = inFlight.remove(messageId); - - inFlight.put( - messageId, - new InFlightState( - receiptHandle, state.requestTimeMsSinceEpoch, newDeadlineMsSinceEpoch)); - } - List receiptHandles = - toBeExtended.stream() - .map(inFlight::get) - .filter(Objects::nonNull) // get rid of null values - .map(m -> m.receiptHandle) - .collect(Collectors.toList()); - // BLOCKs until extended. - extendBatch(nowMsSinceEpoch, receiptHandles, (int) (extensionMs / 1000)); - } - } - } - - /** - * BLOCKING Extend the visibility timeout for messages from SQS with the given {@code - * receiptHandles}. - */ - void extendBatch(long nowMsSinceEpoch, List receiptHandles, int extensionSec) - throws IOException { - int retries = 0; - int numMessages = receiptHandles.size(); - Map pendingReceipts = - IntStream.range(0, receiptHandles.size()) - .boxed() - .collect(toMap(Object::toString, receiptHandles::get)); - List errorMessages = new ArrayList<>(); - - while (!pendingReceipts.isEmpty()) { - - if (retries >= BATCH_OPERATION_MAX_RETIRES) { - throw new IOException( - "Failed to extend visibility timeout for " - + pendingReceipts.size() - + " messages after " - + retries - + " retries: " - + String.join(", ", errorMessages)); - } - - List entries = - pendingReceipts.entrySet().stream() - .map( - r -> - new ChangeMessageVisibilityBatchRequestEntry(r.getKey(), r.getValue()) - .withVisibilityTimeout(extensionSec)) - .collect(Collectors.toList()); - - ChangeMessageVisibilityBatchResult result = - sqsClient.changeMessageVisibilityBatch(source.getRead().queueUrl(), entries); - - // Retry errors except invalid handles - Set retryErrors = - result.getFailed().stream() - .filter(e -> !e.getCode().equals("ReceiptHandleIsInvalid")) - .collect(Collectors.toSet()); - - pendingReceipts - .keySet() - .retainAll( - retryErrors.stream().map(BatchResultErrorEntry::getId).collect(Collectors.toSet())); - - errorMessages = - retryErrors.stream().map(BatchResultErrorEntry::getMessage).collect(Collectors.toList()); - - retries += 1; - } - numExtendedDeadlines.add(nowMsSinceEpoch, numMessages); - } - - /** Log stats if time to do so. */ - private void stats() { - long nowMsSinceEpoch = now(); - if (lastLogTimestampMsSinceEpoch < 0) { - lastLogTimestampMsSinceEpoch = nowMsSinceEpoch; - return; - } - long deltaMs = nowMsSinceEpoch - lastLogTimestampMsSinceEpoch; - if (deltaMs < LOG_PERIOD.getMillis()) { - return; - } - - String messageSkew = "unknown"; - long minTimestamp = minReceivedTimestampMsSinceEpoch.get(nowMsSinceEpoch); - long maxTimestamp = maxReceivedTimestampMsSinceEpoch.get(nowMsSinceEpoch); - if (minTimestamp < Long.MAX_VALUE && maxTimestamp > Long.MIN_VALUE) { - messageSkew = (maxTimestamp - minTimestamp) + "ms"; - } - - String watermarkSkew = "unknown"; - long minWatermark = minWatermarkMsSinceEpoch.get(nowMsSinceEpoch); - long maxWatermark = maxWatermarkMsSinceEpoch.get(nowMsSinceEpoch); - if (minWatermark < Long.MAX_VALUE && maxWatermark > Long.MIN_VALUE) { - watermarkSkew = (maxWatermark - minWatermark) + "ms"; - } - - String oldestInFlight = "no"; - String oldestAckId = Iterables.getFirst(inFlight.keySet(), null); - if (oldestAckId != null) { - oldestInFlight = (nowMsSinceEpoch - inFlight.get(oldestAckId).requestTimeMsSinceEpoch) + "ms"; - } - - LOG.debug( - "SQS {} has " - + "{} received messages, " - + "{} current unread messages, " - + "{} current unread bytes, " - + "{} current in-flight msgs, " - + "{} oldest in-flight, " - + "{} current in-flight checkpoints, " - + "{} max in-flight checkpoints, " - + "{} bytes in backlog, " - + "{}B/s recent read, " - + "{} recent received, " - + "{} recent extended, " - + "{} recent late extended, " - + "{} recent deleted, " - + "{} recent released, " - + "{} recent expired, " - + "{} recent message timestamp skew, " - + "{} recent watermark skew, " - + "{} recent late messages, " - + "{} last reported watermark", - source.getRead().queueUrl(), - numReceived, - messagesNotYetRead.size(), - notYetReadBytes, - inFlight.size(), - oldestInFlight, - numInFlightCheckpoints.get(), - maxInFlightCheckpoints, - getTotalBacklogBytes(), - numReadBytes.get(nowMsSinceEpoch) / (SAMPLE_PERIOD.getMillis() / 1000L), - numReceivedRecently.get(nowMsSinceEpoch), - numExtendedDeadlines.get(nowMsSinceEpoch), - numLateDeadlines.get(nowMsSinceEpoch), - numDeleted.get(nowMsSinceEpoch), - numReleased.get(nowMsSinceEpoch), - numExpired.get(nowMsSinceEpoch), - messageSkew, - watermarkSkew, - numLateMessages.get(nowMsSinceEpoch), - new Instant(lastWatermarkMsSinceEpoch)); - - lastLogTimestampMsSinceEpoch = nowMsSinceEpoch; - } - - /** Return the average byte size of all message read. -1 if no message read yet */ - private long avgMessageBytes() { - if (!recentMessageBytes.isEmpty()) { - return (long) recentMessageBytes.stream().mapToDouble(s -> s).average().getAsDouble(); - } else { - return -1L; - } - } - - /** Extract the timestamp from the given {@code message}. */ - private Instant getTimestamp(final Message message) { - return new Instant( - Long.parseLong( - message.getAttributes().get(MessageSystemAttributeName.SentTimestamp.toString()))); - } - - /** Extract the request timestamp from the given {@code message}. */ - private Long getRequestTimeMsSinceEpoch(final Message message) { - return Long.parseLong(message.getMessageAttributes().get(REQUEST_TIME).getStringValue()); - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedSource.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedSource.java deleted file mode 100644 index 0ee9b8084179..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedSource.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sqs; - -import com.amazonaws.services.sqs.model.Message; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.SerializableCoder; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.io.aws.sqs.SqsIO.Read; -import org.apache.beam.sdk.options.PipelineOptions; -import org.checkerframework.checker.nullness.qual.Nullable; - -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class SqsUnboundedSource extends UnboundedSource { - - private final Read read; - private final SqsConfiguration sqsConfiguration; - private final Coder outputCoder; - - public SqsUnboundedSource( - Read read, SqsConfiguration sqsConfiguration, Coder outputCoder) { - this.read = read; - this.sqsConfiguration = sqsConfiguration; - this.outputCoder = outputCoder; - } - - @Override - public List split(int desiredNumSplits, PipelineOptions options) { - List sources = new ArrayList<>(); - for (int i = 0; i < Math.max(1, desiredNumSplits); ++i) { - sources.add(new SqsUnboundedSource(read, sqsConfiguration, outputCoder)); - } - return sources; - } - - @Override - public UnboundedReader createReader( - PipelineOptions options, @Nullable SqsCheckpointMark checkpointMark) { - try { - return new SqsUnboundedReader(this, checkpointMark); - } catch (IOException e) { - throw new RuntimeException("Unable to subscribe to " + read.queueUrl() + ": ", e); - } - } - - @Override - public Coder getCheckpointMarkCoder() { - return SerializableCoder.of(SqsCheckpointMark.class); - } - - @Override - public Coder getOutputCoder() { - return outputCoder; - } - - public Read getRead() { - return read; - } - - SqsConfiguration getSqsConfiguration() { - return sqsConfiguration; - } - - @Override - public boolean requiresDeduping() { - return true; - } -} diff --git a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/package-info.java b/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/package-info.java deleted file mode 100644 index d688641ddff6..000000000000 --- a/sdks/java/io/amazon-web-services/src/main/java/org/apache/beam/sdk/io/aws/sqs/package-info.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** Defines IO connectors for Amazon Web Services SQS. */ -package org.apache.beam.sdk.io.aws.sqs; diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/ITEnvironment.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/ITEnvironment.java deleted file mode 100644 index 3415a11bf9f0..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/ITEnvironment.java +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws; - -import static org.apache.beam.sdk.testing.TestPipeline.testingPipelineOptions; -import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3; - -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; -import org.apache.beam.sdk.io.aws.options.AwsOptions; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.testing.TestPipelineOptions; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.apache.commons.lang3.StringUtils; -import org.junit.rules.ExternalResource; -import org.slf4j.LoggerFactory; -import org.testcontainers.containers.localstack.LocalStackContainer; -import org.testcontainers.containers.localstack.LocalStackContainer.Service; -import org.testcontainers.containers.output.Slf4jLogConsumer; -import org.testcontainers.utility.DockerImageName; - -/** - * JUnit rule providing an integration testing environment for AWS as {@link ExternalResource}. - * - *

This rule is typically used as @ClassRule. It starts a Localstack container with the requested - * AWS service and provides matching {@link AwsOptions}. The usage of localstack can also be - * disabled using {@link ITOptions} pipeline options to run integration tests against AWS, for - * instance: - * - *

{@code
- * ./gradlew :sdks:java:io:amazon-web-services:integrationTest \
- *   --info \
- *   --tests "org.apache.beam.sdk.io.aws.s3.S3FileSystemIT" \
- *   -DintegrationTestPipelineOptions='["--awsRegion=eu-central-1","--useLocalstack=false"]'
- * }
- * - * @param The options type to use for the integration test. - */ -public class ITEnvironment extends ExternalResource { - private static final String LOCALSTACK = "localstack/localstack"; - private static final String LOCALSTACK_VERSION = "0.13.1"; - - public interface ITOptions extends AwsOptions, TestPipelineOptions { - @Description("Number of rows to write and read by the test") - @Default.Integer(1000) - Integer getNumberOfRows(); - - void setNumberOfRows(Integer count); - - @Description("Flag if to use localstack, enabled by default.") - @Default.Boolean(true) - Boolean getUseLocalstack(); - - void setUseLocalstack(Boolean useLocalstack); - - @Description("Localstack log level, e.g. trace, debug, info") - String getLocalstackLogLevel(); - - void setLocalstackLogLevel(String level); - } - - private final OptionsT options; - private final LocalStackContainer localstack; - - public ITEnvironment(Service service, Class optionsClass, String... env) { - this(new Service[] {service}, optionsClass, env); - } - - public ITEnvironment(Service[] services, Class optionsClass, String... env) { - localstack = - new LocalStackContainer(DockerImageName.parse(LOCALSTACK).withTag(LOCALSTACK_VERSION)) - .withServices(services) - .withStartupAttempts(3); - - PipelineOptionsFactory.register(optionsClass); - options = testingPipelineOptions().as(optionsClass); - - localstack.setEnv(ImmutableList.copyOf(env)); - if (options.getLocalstackLogLevel() != null) { - localstack - .withEnv("LS_LOG", options.getLocalstackLogLevel()) - .withLogConsumer( - new Slf4jLogConsumer(LoggerFactory.getLogger(StringUtils.join(services)))); - } - } - - public TestPipeline createTestPipeline() { - return TestPipeline.fromOptions(options); - } - - public , ClientT> ClientT buildClient( - BuilderT builder) { - if (options.getAwsServiceEndpoint() != null) { - builder.withEndpointConfiguration( - new EndpointConfiguration(options.getAwsServiceEndpoint(), options.getAwsRegion())); - } else { - builder.setRegion(options.getAwsRegion()); - } - return builder.withCredentials(options.getAwsCredentialsProvider()).build(); - } - - public OptionsT options() { - return options; - } - - @Override - protected void before() { - if (options.getUseLocalstack()) { - startLocalstack(); - } - } - - @Override - protected void after() { - localstack.stop(); // noop if not started - } - - /** Necessary setup for localstack environment. */ - private void startLocalstack() { - localstack.start(); - options.setAwsServiceEndpoint( - localstack.getEndpointOverride(S3).toString()); // service irrelevant - options.setAwsRegion(localstack.getRegion()); - options.setAwsCredentialsProvider( - new AWSStaticCredentialsProvider( - new BasicAWSCredentials(localstack.getAccessKey(), localstack.getSecretKey()))); - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/coders/AwsCodersTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/coders/AwsCodersTest.java deleted file mode 100644 index 1ee20a6fa7ea..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/coders/AwsCodersTest.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.coders; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; - -import com.amazonaws.ResponseMetadata; -import com.amazonaws.http.HttpResponse; -import com.amazonaws.http.SdkHttpMetadata; -import java.util.UUID; -import org.apache.beam.sdk.util.CoderUtils; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.junit.Test; - -/** Tests for AWS coders. */ -public class AwsCodersTest { - - @Test - public void testResponseMetadataDecodeEncodeEquals() throws Exception { - ResponseMetadata value = buildResponseMetadata(); - ResponseMetadata clone = CoderUtils.clone(AwsCoders.responseMetadata(), value); - assertThat(clone.getRequestId(), equalTo(value.getRequestId())); - } - - @Test - public void testSdkHttpMetadataDecodeEncodeEquals() throws Exception { - SdkHttpMetadata value = buildSdkHttpMetadata(); - SdkHttpMetadata clone = CoderUtils.clone(AwsCoders.sdkHttpMetadata(), value); - assertThat(clone.getHttpStatusCode(), equalTo(value.getHttpStatusCode())); - assertThat(clone.getHttpHeaders(), equalTo(value.getHttpHeaders())); - } - - @Test - public void testSdkHttpMetadataWithoutHeadersDecodeEncodeEquals() throws Exception { - SdkHttpMetadata value = buildSdkHttpMetadata(); - SdkHttpMetadata clone = CoderUtils.clone(AwsCoders.sdkHttpMetadataWithoutHeaders(), value); - assertThat(clone.getHttpStatusCode(), equalTo(value.getHttpStatusCode())); - assertThat(clone.getHttpHeaders().isEmpty(), equalTo(true)); - } - - private ResponseMetadata buildResponseMetadata() { - return new ResponseMetadata( - ImmutableMap.of(ResponseMetadata.AWS_REQUEST_ID, UUID.randomUUID().toString())); - } - - private SdkHttpMetadata buildSdkHttpMetadata() { - HttpResponse httpResponse = new HttpResponse(null, null); - httpResponse.setStatusCode(200); - httpResponse.addHeader("Content-Type", "application/json"); - return SdkHttpMetadata.from(httpResponse); - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/AttributeValueCoderTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/AttributeValueCoderTest.java deleted file mode 100644 index 489feb7a87c9..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/AttributeValueCoderTest.java +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.dynamodb; - -import com.amazonaws.services.dynamodbv2.model.AttributeValue; -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.junit.Assert; -import org.junit.Test; - -/** Unit test cases for each type of AttributeValue to test encoding and decoding. */ -public class AttributeValueCoderTest { - - @Test - public void shouldPassForStringType() throws IOException { - AttributeValue expected = new AttributeValue(); - expected.setS("testing"); - - AttributeValueCoder coder = AttributeValueCoder.of(); - ByteArrayOutputStream output = new ByteArrayOutputStream(); - coder.encode(expected, output); - - ByteArrayInputStream in = new ByteArrayInputStream(output.toByteArray()); - - AttributeValue actual = coder.decode(in); - - Assert.assertEquals(expected, actual); - } - - @Test - public void shouldPassForNumberType() throws IOException { - AttributeValue expected = new AttributeValue(); - expected.setN("123"); - - AttributeValueCoder coder = AttributeValueCoder.of(); - ByteArrayOutputStream output = new ByteArrayOutputStream(); - coder.encode(expected, output); - - ByteArrayInputStream in = new ByteArrayInputStream(output.toByteArray()); - - AttributeValue actual = coder.decode(in); - - Assert.assertEquals(expected, actual); - } - - @Test - public void shouldPassForBooleanType() throws IOException { - AttributeValue expected = new AttributeValue(); - expected.setBOOL(false); - - AttributeValueCoder coder = AttributeValueCoder.of(); - ByteArrayOutputStream output = new ByteArrayOutputStream(); - coder.encode(expected, output); - - ByteArrayInputStream in = new ByteArrayInputStream(output.toByteArray()); - - AttributeValue actual = coder.decode(in); - - Assert.assertEquals(expected, actual); - } - - @Test - public void shouldPassForByteArray() throws IOException { - AttributeValue expected = new AttributeValue(); - expected.setB(ByteBuffer.wrap("hello".getBytes(StandardCharsets.UTF_8))); - - AttributeValueCoder coder = AttributeValueCoder.of(); - ByteArrayOutputStream output = new ByteArrayOutputStream(); - coder.encode(expected, output); - - ByteArrayInputStream in = new ByteArrayInputStream(output.toByteArray()); - - AttributeValue actual = coder.decode(in); - - Assert.assertEquals(expected, actual); - } - - @Test - public void shouldPassForListOfString() throws IOException { - AttributeValue expected = new AttributeValue(); - expected.setSS(ImmutableList.of("foo", "bar")); - - AttributeValueCoder coder = AttributeValueCoder.of(); - ByteArrayOutputStream output = new ByteArrayOutputStream(); - coder.encode(expected, output); - - ByteArrayInputStream in = new ByteArrayInputStream(output.toByteArray()); - - AttributeValue actual = coder.decode(in); - - Assert.assertEquals(expected, actual); - } - - @Test - public void shouldPassForOneListOfNumber() throws IOException { - AttributeValue expected = new AttributeValue(); - expected.setNS(ImmutableList.of("123", "456")); - - AttributeValueCoder coder = AttributeValueCoder.of(); - ByteArrayOutputStream output = new ByteArrayOutputStream(); - coder.encode(expected, output); - - ByteArrayInputStream in = new ByteArrayInputStream(output.toByteArray()); - - AttributeValue actual = coder.decode(in); - - Assert.assertEquals(expected, actual); - } - - @Test - public void shouldPassForOneListOfByteArray() throws IOException { - AttributeValue expected = new AttributeValue(); - expected.setBS( - ImmutableList.of( - ByteBuffer.wrap("mylistbyte1".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("mylistbyte2".getBytes(StandardCharsets.UTF_8)))); - - AttributeValueCoder coder = AttributeValueCoder.of(); - ByteArrayOutputStream output = new ByteArrayOutputStream(); - coder.encode(expected, output); - - ByteArrayInputStream in = new ByteArrayInputStream(output.toByteArray()); - - AttributeValue actual = coder.decode(in); - - Assert.assertEquals(expected, actual); - } - - @Test - public void shouldPassForListType() throws IOException { - AttributeValue expected = new AttributeValue(); - - List listAttr = new ArrayList<>(); - listAttr.add(new AttributeValue("innerMapValue1")); - listAttr.add(new AttributeValue().withN("8976234")); - - expected.setL(listAttr); - - AttributeValueCoder coder = AttributeValueCoder.of(); - ByteArrayOutputStream output = new ByteArrayOutputStream(); - coder.encode(expected, output); - - ByteArrayInputStream in = new ByteArrayInputStream(output.toByteArray()); - - AttributeValue actual = coder.decode(in); - - Assert.assertEquals(expected, actual); - } - - @Test - public void shouldPassForMapType() throws IOException { - AttributeValue expected = new AttributeValue(); - - Map attrMap = new HashMap<>(); - attrMap.put("innerMapAttr1", new AttributeValue("innerMapValue1")); - attrMap.put( - "innerMapAttr2", - new AttributeValue().withB(ByteBuffer.wrap("8976234".getBytes(StandardCharsets.UTF_8)))); - - expected.setM(attrMap); - - AttributeValueCoder coder = AttributeValueCoder.of(); - ByteArrayOutputStream output = new ByteArrayOutputStream(); - coder.encode(expected, output); - - ByteArrayInputStream in = new ByteArrayInputStream(output.toByteArray()); - - AttributeValue actual = coder.decode(in); - - Assert.assertEquals(expected, actual); - } - - @Test - public void shouldPassForNullType() throws IOException { - AttributeValue expected = new AttributeValue(); - expected.setNULL(true); - - AttributeValueCoder coder = AttributeValueCoder.of(); - ByteArrayOutputStream output = new ByteArrayOutputStream(); - coder.encode(expected, output); - - ByteArrayInputStream in = new ByteArrayInputStream(output.toByteArray()); - - AttributeValue actual = coder.decode(in); - - Assert.assertEquals(expected, actual); - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOIT.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOIT.java deleted file mode 100644 index e3aa62450ce5..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOIT.java +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.dynamodb; - -import static org.apache.beam.sdk.io.common.TestRow.getExpectedHashForRowCount; -import static org.apache.beam.sdk.values.TypeDescriptors.strings; -import static org.testcontainers.containers.localstack.LocalStackContainer.Service.DYNAMODB; - -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.regions.Regions; -import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; -import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder; -import com.amazonaws.services.dynamodbv2.model.AttributeDefinition; -import com.amazonaws.services.dynamodbv2.model.AttributeValue; -import com.amazonaws.services.dynamodbv2.model.CreateTableRequest; -import com.amazonaws.services.dynamodbv2.model.KeySchemaElement; -import com.amazonaws.services.dynamodbv2.model.KeyType; -import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughput; -import com.amazonaws.services.dynamodbv2.model.PutRequest; -import com.amazonaws.services.dynamodbv2.model.ScalarAttributeType; -import com.amazonaws.services.dynamodbv2.model.ScanRequest; -import com.amazonaws.services.dynamodbv2.model.TableStatus; -import com.amazonaws.services.dynamodbv2.model.WriteRequest; -import java.util.Map; -import org.apache.beam.sdk.io.GenerateSequence; -import org.apache.beam.sdk.io.aws.ITEnvironment; -import org.apache.beam.sdk.io.common.HashingFn; -import org.apache.beam.sdk.io.common.TestRow; -import org.apache.beam.sdk.io.common.TestRow.DeterministicallyConstructTestRowFn; -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Flatten; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExternalResource; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -@RunWith(JUnit4.class) -/** - * Integration test to write and read from DynamoDB. - * - *

By default this runs against Localstack, but you can use {@link DynamoDBIOIT.ITOptions} to - * configure tests to run against AWS DynamoDB. - * - *

{@code
- * ./gradlew :sdks:java:io:amazon-web-services:integrationTest \
- *   --info \
- *   --tests "org.apache.beam.sdk.io.aws.dynamodb.DynamoDBIOIT" \
- *   -DintegrationTestPipelineOptions='["--awsRegion=eu-central-1","--useLocalstack=false"]'
- * }
- */ -public class DynamoDBIOIT { - public interface ITOptions extends ITEnvironment.ITOptions { - @Description("DynamoDB table name") - @Default.String("beam-dynamodbio-it") - String getDynamoDBTable(); - - void setDynamoDBTable(String value); - - @Description("DynamoDB total segments") - @Default.Integer(2) - Integer getDynamoDBSegments(); - - void setDynamoDBSegments(Integer segments); - - @Description("Create DynamoDB table. Enabled when using localstack") - @Default.Boolean(false) - Boolean getCreateTable(); - - void setCreateTable(Boolean createTable); - } - - private static final String COL_ID = "id"; - private static final String COL_NAME = "name"; - - @ClassRule - public static ITEnvironment env = - new ITEnvironment<>(DYNAMODB, ITOptions.class, "DYNAMODB_ERROR_PROBABILITY=0.1"); - - @Rule public TestPipeline pipelineWrite = env.createTestPipeline(); - @Rule public TestPipeline pipelineRead = env.createTestPipeline(); - @Rule public ExternalResource dbTable = CreateDbTable.optionally(env.options()); - - /** Test which write and then read data from DynamoDB. */ - @Test - public void testWriteThenRead() { - runWrite(); - runRead(); - } - - /** Write test dataset to DynamoDB. */ - private void runWrite() { - int rows = env.options().getNumberOfRows(); - pipelineWrite - .apply("Generate Sequence", GenerateSequence.from(0).to(rows)) - .apply("Prepare TestRows", ParDo.of(new DeterministicallyConstructTestRowFn())) - .apply( - "Write to DynamoDB", - DynamoDBIO.write() - .withAwsClientsProvider(clientProvider()) - .withWriteRequestMapperFn(row -> buildWriteRequest(row))); - pipelineWrite.run().waitUntilFinish(); - } - - /** Read test dataset from DynamoDB. */ - private void runRead() { - int rows = env.options().getNumberOfRows(); - PCollection> records = - pipelineRead - .apply( - "Read from DynamoDB", - DynamoDBIO.read() - .withAwsClientsProvider(clientProvider()) - .withScanRequestFn(in -> buildScanRequest()) - .items()) - .apply("Flatten result", Flatten.iterables()); - - PAssert.thatSingleton(records.apply("Count All", Count.globally())).isEqualTo((long) rows); - - PCollection consolidatedHashcode = - records - .apply(MapElements.into(strings()).via(record -> record.get(COL_NAME).getS())) - .apply("Hash records", Combine.globally(new HashingFn()).withoutDefaults()); - - PAssert.that(consolidatedHashcode).containsInAnyOrder(getExpectedHashForRowCount(rows)); - - pipelineRead.run().waitUntilFinish(); - } - - private AwsClientsProvider clientProvider() { - AWSCredentials credentials = env.options().getAwsCredentialsProvider().getCredentials(); - return new BasicDynamoDBProvider( - credentials.getAWSAccessKeyId(), - credentials.getAWSSecretKey(), - Regions.fromName(env.options().getAwsRegion()), - env.options().getAwsServiceEndpoint()); - } - - private static ScanRequest buildScanRequest() { - return new ScanRequest(env.options().getDynamoDBTable()) - .withTotalSegments(env.options().getDynamoDBSegments()); - } - - private static KV buildWriteRequest(TestRow row) { - AttributeValue id = new AttributeValue().withN(row.id().toString()); - AttributeValue name = new AttributeValue().withS(row.name()); - PutRequest req = new PutRequest(ImmutableMap.of(COL_ID, id, COL_NAME, name)); - return KV.of(env.options().getDynamoDBTable(), new WriteRequest().withPutRequest(req)); - } - - static class CreateDbTable extends ExternalResource { - static ExternalResource optionally(ITOptions opts) { - boolean create = opts.getCreateTable() || opts.getUseLocalstack(); - return create ? new CreateDbTable() : new ExternalResource() {}; - } - - private final String name = env.options().getDynamoDBTable(); - private final AmazonDynamoDB client = env.buildClient(AmazonDynamoDBClientBuilder.standard()); - - @Override - protected void before() throws Throwable { - CreateTableRequest request = - new CreateTableRequest() - .withTableName(name) - .withAttributeDefinitions( - attribute(COL_ID, ScalarAttributeType.N), - attribute(COL_NAME, ScalarAttributeType.S)) - .withKeySchema(keyElement(COL_ID, KeyType.HASH), keyElement(COL_NAME, KeyType.RANGE)) - .withProvisionedThroughput(new ProvisionedThroughput(1000L, 1000L)); - String status = client.createTable(request).getTableDescription().getTableStatus(); - int attempts = 10; - for (int i = 0; i <= attempts; ++i) { - if (status.equals(TableStatus.ACTIVE.toString())) { - return; - } - Thread.sleep(1000L); - status = client.describeTable(name).getTable().getTableStatus(); - } - throw new RuntimeException("Unable to initialize table"); - } - - @Override - protected void after() { - client.deleteTable(name); - client.shutdown(); - } - - private AttributeDefinition attribute(String name, ScalarAttributeType type) { - return new AttributeDefinition(name, type); - } - - private KeySchemaElement keyElement(String name, KeyType type) { - return new KeySchemaElement(name, type); - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOReadTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOReadTest.java deleted file mode 100644 index 27e2a84076b7..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOReadTest.java +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.dynamodb; - -import static java.lang.Math.min; -import static java.util.stream.Collectors.toList; -import static java.util.stream.IntStream.range; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables.getLast; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists.newArrayList; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists.transform; -import static org.mockito.ArgumentMatchers.argThat; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; -import com.amazonaws.services.dynamodbv2.model.AttributeValue; -import com.amazonaws.services.dynamodbv2.model.ScanRequest; -import com.amazonaws.services.dynamodbv2.model.ScanResult; -import java.util.Arrays; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.stream.IntStream; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Flatten; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.runner.RunWith; -import org.mockito.ArgumentMatcher; -import org.mockito.Mock; -import org.mockito.junit.MockitoJUnitRunner; - -@RunWith(MockitoJUnitRunner.class) -public class DynamoDBIOReadTest { - private static final String tableName = "Test"; - - @Rule public final TestPipeline pipeline = TestPipeline.create(); - @Rule public final ExpectedException thrown = ExpectedException.none(); - @Mock public AmazonDynamoDB client; - - @Test - public void testReadOneSegment() { - MockData mockData = new MockData(range(0, 10)); - mockData.mockScan(10, client); // 1 scan iteration - - PCollection>> actual = - pipeline.apply( - DynamoDBIO.>>read() - .withAwsClientsProvider(StaticAwsClientsProvider.of(client)) - .withScanRequestFn( - in -> new ScanRequest().withTableName(tableName).withTotalSegments(1)) - .items()); - - PAssert.that(actual.apply(Count.globally())).containsInAnyOrder(1L); - PAssert.that(actual).containsInAnyOrder(mockData.getAllItems()); - - pipeline.run().waitUntilFinish(); - } - - @Test - public void testReadWithCustomLimit() { - final int requestedLimit = 100; - MockData mockData = new MockData(range(0, 10)); - mockData.mockScan(requestedLimit, client); // 1 scan iteration - - pipeline.apply( - DynamoDBIO.>>read() - .withAwsClientsProvider(StaticAwsClientsProvider.of(client)) - .withScanRequestFn( - in -> - new ScanRequest() - .withTableName(tableName) - .withTotalSegments(1) - .withLimit(requestedLimit)) - .items()); - - pipeline.run().waitUntilFinish(); - - verify(client).scan(argThat((ScanRequest req) -> requestedLimit == req.getLimit())); - } - - @Test - public void testReadThreeSegments() { - MockData mockData = new MockData(range(0, 10), range(10, 20), range(20, 30)); - mockData.mockScan(10, client); // 1 scan iteration per segment - - PCollection>> actual = - pipeline.apply( - DynamoDBIO.>>read() - .withAwsClientsProvider(StaticAwsClientsProvider.of(client)) - .withScanRequestFn( - in -> new ScanRequest().withTableName(tableName).withTotalSegments(3)) - .items()); - - PAssert.that(actual.apply(Count.globally())).containsInAnyOrder(3L); - PAssert.that(actual.apply(Flatten.iterables())).containsInAnyOrder(mockData.getAllItems()); - - pipeline.run().waitUntilFinish(); - } - - @Test - public void testReadWithStartKey() { - MockData mockData = new MockData(range(0, 10), range(20, 32)); - mockData.mockScan(5, client); // 2 + 3 scan iterations - - PCollection>> actual = - pipeline.apply( - DynamoDBIO.>>read() - .withAwsClientsProvider(StaticAwsClientsProvider.of(client)) - .withScanRequestFn( - in -> new ScanRequest().withTableName(tableName).withTotalSegments(2)) - .items()); - - PAssert.that(actual.apply(Count.globally())).containsInAnyOrder(5L); - PAssert.that(actual.apply(Flatten.iterables())).containsInAnyOrder(mockData.getAllItems()); - - pipeline.run().waitUntilFinish(); - } - - @Test - public void testReadMissingScanRequestFn() { - pipeline.enableAbandonedNodeEnforcement(false); - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("withScanRequestFn() is required"); - - pipeline.apply(DynamoDBIO.read().withAwsClientsProvider(StaticAwsClientsProvider.of(client))); - } - - @Test - public void testReadMissingAwsClientsProvider() { - pipeline.enableAbandonedNodeEnforcement(false); - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("withAwsClientsProvider() is required"); - - pipeline.apply(DynamoDBIO.read().withScanRequestFn(in -> new ScanRequest())); - } - - @Test - public void testReadMissingTotalSegments() { - pipeline.enableAbandonedNodeEnforcement(false); - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("TotalSegments is required with withScanRequestFn() and greater zero"); - - pipeline.apply( - DynamoDBIO.read() - .withAwsClientsProvider(StaticAwsClientsProvider.of(client)) - .withScanRequestFn(in -> new ScanRequest())); - } - - @Test - public void testReadInvalidTotalSegments() { - pipeline.enableAbandonedNodeEnforcement(false); - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("TotalSegments is required with withScanRequestFn() and greater zero"); - - pipeline.apply( - DynamoDBIO.read() - .withAwsClientsProvider(StaticAwsClientsProvider.of(client)) - .withScanRequestFn(in -> new ScanRequest().withTotalSegments(0))); - } - - private static class MockData { - private final List> data; - - MockData(IntStream... segments) { - data = Arrays.stream(segments).map(ids -> newArrayList(ids.iterator())).collect(toList()); - } - - List> getAllItems() { - return data.stream().flatMap(ids -> ids.stream()).map(id -> item(id)).collect(toList()); - } - - void mockScan(int sizeLimit, AmazonDynamoDB mock) { - for (int segment = 0; segment < data.size(); segment++) { - List ids = data.get(segment); - - List> items = null; - Map startKey, lastKey; - for (int start = 0; start < ids.size(); start += sizeLimit) { - startKey = items != null ? getLast(items) : null; - items = transform(ids.subList(start, min(ids.size(), start + sizeLimit)), id -> item(id)); - lastKey = start + sizeLimit < ids.size() ? getLast(items) : null; - - when(mock.scan(argThat(matchesScanRequest(segment, startKey)))) - .thenReturn(new ScanResult().withItems(items).withLastEvaluatedKey(lastKey)); - } - } - } - - ArgumentMatcher matchesScanRequest( - Integer segment, Map startKey) { - return req -> - req != null - && segment.equals(req.getSegment()) - && Objects.equals(startKey, req.getExclusiveStartKey()); - } - } - - private static Map item(int id) { - return ImmutableMap.of( - "rangeKey", new AttributeValue().withN(String.valueOf(id)), - "hashKey", new AttributeValue().withS(String.valueOf(id))); - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOWriteTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOWriteTest.java deleted file mode 100644 index e49276ed4c40..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/DynamoDBIOWriteTest.java +++ /dev/null @@ -1,430 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.dynamodb; - -import static java.util.stream.Collectors.toList; -import static java.util.stream.IntStream.range; -import static java.util.stream.IntStream.rangeClosed; -import static org.apache.beam.sdk.io.aws.dynamodb.DynamoDBIO.Write.WriteFn.RETRY_ERROR_LOG; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps.filterKeys; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Maps.transformValues; -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.argThat; -import static org.mockito.Mockito.inOrder; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; -import com.amazonaws.services.dynamodbv2.model.AmazonDynamoDBException; -import com.amazonaws.services.dynamodbv2.model.AttributeValue; -import com.amazonaws.services.dynamodbv2.model.BatchWriteItemRequest; -import com.amazonaws.services.dynamodbv2.model.BatchWriteItemResult; -import com.amazonaws.services.dynamodbv2.model.DeleteRequest; -import com.amazonaws.services.dynamodbv2.model.PutRequest; -import com.amazonaws.services.dynamodbv2.model.WriteRequest; -import java.io.IOException; -import java.io.Serializable; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.function.Function; -import java.util.function.Supplier; -import java.util.stream.IntStream; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.PipelineResult; -import org.apache.beam.sdk.coders.DefaultCoder; -import org.apache.beam.sdk.extensions.avro.coders.AvroCoder; -import org.apache.beam.sdk.io.aws.dynamodb.DynamoDBIO.RetryConfiguration; -import org.apache.beam.sdk.io.aws.dynamodb.DynamoDBIO.Write.WriteFn; -import org.apache.beam.sdk.testing.ExpectedLogs; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SerializableBiFunction; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.values.KV; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.joda.time.Duration; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.runner.RunWith; -import org.mockito.ArgumentCaptor; -import org.mockito.ArgumentMatcher; -import org.mockito.InOrder; -import org.mockito.Mock; -import org.mockito.junit.MockitoJUnitRunner; -import org.slf4j.helpers.MessageFormatter; - -@RunWith(MockitoJUnitRunner.class) -public class DynamoDBIOWriteTest { - private static final String tableName = "Test"; - - @Rule public final TestPipeline pipeline = TestPipeline.create(); - @Rule public final ExpectedLogs writeFnLogs = ExpectedLogs.none(WriteFn.class); - @Rule public final ExpectedException thrown = ExpectedException.none(); - - @Mock public AmazonDynamoDB client; - - @Test - public void testWritePutItems() { - List items = Item.range(0, 100); - - Supplier> capturePuts = - captureBatchWrites(client, req -> req.getPutRequest().getItem()); - - PCollection output = - pipeline - .apply(Create.of(items)) - .apply( - DynamoDBIO.write() - .withWriteRequestMapperFn(putRequestMapper) - .withAwsClientsProvider(StaticAwsClientsProvider.of(client))); - - PAssert.that(output).empty(); - pipeline.run().waitUntilFinish(); - - assertThat(capturePuts.get()).containsExactlyInAnyOrderElementsOf(items); - } - - @Test - public void testWritePutItemsWithDuplicates() { - List items = Item.range(0, 100); - - Supplier>> captureRequests = - captureBatchWriteRequests(client, req -> req.getPutRequest().getItem()); - - pipeline - .apply(Create.of(items)) - // generate identical duplicates - .apply(ParDo.of(new AddDuplicatesDoFn(3, false))) - .apply( - DynamoDBIO.write() - .withWriteRequestMapperFn(putRequestMapper) - .withAwsClientsProvider(StaticAwsClientsProvider.of(client))); - - pipeline.run().waitUntilFinish(); - - List> requests = captureRequests.get(); - for (List reqItems : requests) { - assertThat(reqItems).doesNotHaveDuplicates(); // each request is free of duplicates - } - - assertThat(requests.stream().flatMap(List::stream)).containsAll(items); - } - - @Test - public void testWritePutItemsWithDuplicatesByKey() { - ImmutableList keys = ImmutableList.of("id"); - List items = Item.range(0, 100); - - Supplier>> captureRequests = - captureBatchWriteRequests(client, req -> req.getPutRequest().getItem()); - - pipeline - .apply(Create.of(items)) - // decorate duplicates so they are different - .apply(ParDo.of(new AddDuplicatesDoFn(3, true))) - .apply( - DynamoDBIO.write() - .withWriteRequestMapperFn(putRequestMapper) - .withAwsClientsProvider(StaticAwsClientsProvider.of(client)) - .withDeduplicateKeys(keys)); - - pipeline.run().waitUntilFinish(); - - List> requests = captureRequests.get(); - for (List reqItems : requests) { - List keysOnly = - reqItems.stream() - .map(item -> new Item(filterKeys(item.entries, keys::contains))) - .collect(toList()); - assertThat(keysOnly).doesNotHaveDuplicates(); // each request is free of duplicates - } - - assertThat(requests.stream().flatMap(List::stream)).containsAll(items); - } - - @Test - public void testWriteDeleteItems() { - List items = Item.range(0, 100); - - Supplier> captureDeletes = - captureBatchWrites(client, req -> req.getDeleteRequest().getKey()); - - PCollection output = - pipeline - .apply(Create.of(items)) - .apply( - DynamoDBIO.write() - .withWriteRequestMapperFn(deleteRequestMapper) - .withAwsClientsProvider(StaticAwsClientsProvider.of(client))); - - PAssert.that(output).empty(); - pipeline.run().waitUntilFinish(); - - assertThat(captureDeletes.get()).hasSize(100); - assertThat(captureDeletes.get()).containsExactlyInAnyOrderElementsOf(items); - } - - @Test - public void testWriteDeleteItemsWithDuplicates() { - List items = Item.range(0, 100); - - Supplier>> captureRequests = - captureBatchWriteRequests(client, req -> req.getDeleteRequest().getKey()); - - pipeline - .apply(Create.of(items)) - // generate identical duplicates - .apply(ParDo.of(new AddDuplicatesDoFn(3, false))) - .apply( - DynamoDBIO.write() - .withWriteRequestMapperFn(deleteRequestMapper) - .withAwsClientsProvider(StaticAwsClientsProvider.of(client))); - - pipeline.run().waitUntilFinish(); - - List> requests = captureRequests.get(); - for (List reqItems : requests) { - assertThat(reqItems).doesNotHaveDuplicates(); // each request is free of duplicates - } - - assertThat(requests.stream().flatMap(List::stream)).containsAll(items); - } - - @Test - public void testWritePutItemsWithRetrySuccess() { - when(client.batchWriteItem(any(BatchWriteItemRequest.class))) - .thenThrow( - AmazonDynamoDBException.class, - AmazonDynamoDBException.class, - AmazonDynamoDBException.class) - .thenReturn(new BatchWriteItemResult().withUnprocessedItems(ImmutableMap.of())); - - pipeline - .apply(Create.of(Item.of(1))) - .apply( - "write", - DynamoDBIO.write() - .withWriteRequestMapperFn(putRequestMapper) - .withAwsClientsProvider(StaticAwsClientsProvider.of(client)) - .withRetryConfiguration(try4Times)); - - PipelineResult result = pipeline.run(); - result.waitUntilFinish(); - - verify(client, times(4)).batchWriteItem(any(BatchWriteItemRequest.class)); - } - - @Test - public void testWritePutItemsWithPartialSuccess() { - List writes = putRequests(Item.range(0, 10)); - - when(client.batchWriteItem(any(BatchWriteItemRequest.class))) - .thenReturn(partialWriteSuccess(writes.subList(4, 10))) - .thenReturn(partialWriteSuccess(writes.subList(8, 10))) - .thenReturn(new BatchWriteItemResult().withUnprocessedItems(ImmutableMap.of())); - - pipeline - .apply(Create.of(10)) // number if items to produce - .apply(ParDo.of(new GenerateItems())) // 10 items in one bundle - .apply( - "write", - DynamoDBIO.write() - .withWriteRequestMapperFn(putRequestMapper) - .withAwsClientsProvider(StaticAwsClientsProvider.of(client)) - .withRetryConfiguration(try4Times)); - - PipelineResult result = pipeline.run(); - result.waitUntilFinish(); - - verify(client, times(3)).batchWriteItem(any(BatchWriteItemRequest.class)); - - InOrder ordered = inOrder(client); - ordered.verify(client).batchWriteItem(argThat(matchWritesUnordered(writes))); - ordered.verify(client).batchWriteItem(argThat(matchWritesUnordered(writes.subList(4, 10)))); - ordered.verify(client).batchWriteItem(argThat(matchWritesUnordered(writes.subList(8, 10)))); - } - - @Test - public void testWritePutItemsWithRetryFailure() throws Throwable { - thrown.expect(IOException.class); - thrown.expectMessage("Error writing to DynamoDB"); - thrown.expectMessage("No more attempts allowed"); - - when(client.batchWriteItem(any(BatchWriteItemRequest.class))) - .thenThrow(AmazonDynamoDBException.class); - - pipeline - .apply(Create.of(Item.of(1))) - .apply( - DynamoDBIO.write() - .withWriteRequestMapperFn(putRequestMapper) - .withAwsClientsProvider(StaticAwsClientsProvider.of(client)) - .withRetryConfiguration(try4Times)); - - try { - pipeline.run().waitUntilFinish(); - } catch (final Pipeline.PipelineExecutionException e) { - verify(client, times(4)).batchWriteItem(any(BatchWriteItemRequest.class)); - writeFnLogs.verifyWarn(MessageFormatter.format(RETRY_ERROR_LOG, 4, "").getMessage()); - throw e.getCause(); - } - } - - @DefaultCoder(AvroCoder.class) - static class Item implements Serializable { - Map entries; - - private Item() {} - - private Item(Map entries) { - this.entries = entries; - } - - static Item of(int id) { - return new Item(ImmutableMap.of("id", String.valueOf(id))); - } - - static Item of(Map attributes) { - return new Item(ImmutableMap.copyOf(transformValues(attributes, a -> a.getS()))); - } - - static List range(int startInclusive, int endExclusive) { - return IntStream.range(startInclusive, endExclusive).mapToObj(Item::of).collect(toList()); - } - - Item withEntry(String key, String value) { - return new Item( - ImmutableMap.builder().putAll(entries).put(key, value).build()); - } - - Map attributeMap() { - return new HashMap<>(transformValues(entries, v -> new AttributeValue().withS(v))); - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - return Objects.equals(entries, ((Item) o).entries); - } - - @Override - public int hashCode() { - return Objects.hash(entries); - } - - @Override - public String toString() { - return "Item" + entries; - } - } - - private Supplier>> captureBatchWriteRequests( - AmazonDynamoDB mock, Function> extractor) { - ArgumentCaptor reqCaptor = - ArgumentCaptor.forClass(BatchWriteItemRequest.class); - when(mock.batchWriteItem(reqCaptor.capture())) - .thenReturn(new BatchWriteItemResult().withUnprocessedItems(ImmutableMap.of())); - - return () -> - reqCaptor.getAllValues().stream() - .flatMap(req -> req.getRequestItems().values().stream()) - .map(writes -> writes.stream().map(extractor).map(Item::of).collect(toList())) - .collect(toList()); - } - - private Supplier> captureBatchWrites( - AmazonDynamoDB mock, Function> extractor) { - Supplier>> requests = captureBatchWriteRequests(mock, extractor); - return () -> requests.get().stream().flatMap(reqs -> reqs.stream()).collect(toList()); - } - - private static ArgumentMatcher matchWritesUnordered( - List writes) { - return (BatchWriteItemRequest req) -> - req != null - && req.getRequestItems().get(tableName).size() == writes.size() - && req.getRequestItems().get(tableName).containsAll(writes); - } - - private static BatchWriteItemResult partialWriteSuccess(List unprocessed) { - return new BatchWriteItemResult().withUnprocessedItems(ImmutableMap.of(tableName, unprocessed)); - } - - private static List putRequests(List items) { - return items.stream().map(putRequest).collect(toList()); - } - - private static Function putRequest = - item -> new WriteRequest().withPutRequest(new PutRequest().withItem(item.attributeMap())); - - private static Function deleteRequest = - key -> new WriteRequest().withDeleteRequest(new DeleteRequest().withKey(key.attributeMap())); - - private static SerializableFunction> putRequestMapper = - item -> KV.of(tableName, putRequest.apply(item)); - - private static SerializableFunction> deleteRequestMapper = - key -> KV.of(tableName, deleteRequest.apply(key)); - - private static RetryConfiguration try4Times = - RetryConfiguration.create(4, Duration.standardSeconds(1), Duration.millis(1)); - - private static class GenerateItems extends DoFn { - @ProcessElement - public void processElement(ProcessContext ctx) { - range(0, ctx.element()).forEach(i -> ctx.output(Item.of(i))); - } - } - - /** - * A DoFn that adds N duplicates to a bundle. The original is emitted last and is the only item - * kept if deduplicating appropriately. - */ - private static class AddDuplicatesDoFn extends DoFn { - private final int duplicates; - private final SerializableBiFunction decorator; - - AddDuplicatesDoFn(int duplicates, boolean decorate) { - this.duplicates = duplicates; - this.decorator = - decorate ? (item, i) -> item.withEntry("duplicate", i.toString()) : (item, i) -> item; - } - - @ProcessElement - public void processElement(ProcessContext ctx) { - Item original = ctx.element(); - rangeClosed(1, duplicates).forEach(i -> ctx.output(decorator.apply(original, i))); - ctx.output(original); - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/StaticAwsClientsProvider.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/StaticAwsClientsProvider.java deleted file mode 100644 index d3f676cf1096..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/dynamodb/StaticAwsClientsProvider.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.dynamodb; - -import static java.util.Collections.synchronizedMap; - -import com.amazonaws.services.dynamodbv2.AmazonDynamoDB; -import java.util.HashMap; -import java.util.Map; - -/** Client provider supporting unserializable clients such as mock instances for unit tests. */ -class StaticAwsClientsProvider implements AwsClientsProvider { - private static final Map clients = synchronizedMap(new HashMap<>()); - - private final int id; - private final transient boolean cleanup; - - private StaticAwsClientsProvider(AmazonDynamoDB client) { - this.id = System.identityHashCode(client); - this.cleanup = true; - } - - static AwsClientsProvider of(AmazonDynamoDB client) { - StaticAwsClientsProvider provider = new StaticAwsClientsProvider(client); - clients.put(provider.id, client); - return provider; - } - - @Override - public AmazonDynamoDB createDynamoDB() { - return clients.get(id); - } - - @Override - protected void finalize() { - if (cleanup) { - clients.remove(id); - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/options/AwsHttpClientConfigurationTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/options/AwsHttpClientConfigurationTest.java deleted file mode 100644 index f535c4271ac9..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/options/AwsHttpClientConfigurationTest.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.options; - -import static org.junit.Assert.assertEquals; - -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * {@link AwsHttpClientConfigurationTest}. Test to verify that aws http client configuration are - * correctly being set for the respective AWS services. - */ -@RunWith(JUnit4.class) -public class AwsHttpClientConfigurationTest { - - @Test - public void testAwsHttpClientConfigurationValues() { - S3Options s3Options = getOptions(); - assertEquals(5000, s3Options.getClientConfiguration().getSocketTimeout()); - assertEquals(1000, s3Options.getClientConfiguration().getClientExecutionTimeout()); - assertEquals(10, s3Options.getClientConfiguration().getMaxConnections()); - } - - private static S3Options getOptions() { - String[] args = { - "--s3ClientFactoryClass=org.apache.beam.sdk.io.aws.s3.DefaultS3ClientBuilderFactory", - "--clientConfiguration={\"clientExecutionTimeout\":1000," - + "\"maxConnections\":10," - + "\"socketTimeout\":5000}" - }; - return PipelineOptionsFactory.fromArgs(args).as(S3Options.class); - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/options/AwsModuleTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/options/AwsModuleTest.java deleted file mode 100644 index 0099b08b7043..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/options/AwsModuleTest.java +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.options; - -import static org.apache.beam.repackaged.core.org.apache.commons.lang3.reflect.FieldUtils.readField; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.hasItem; -import static org.junit.Assert.assertEquals; - -import com.amazonaws.ClientConfiguration; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.BasicSessionCredentials; -import com.amazonaws.auth.ClasspathPropertiesFileCredentialsProvider; -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; -import com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper; -import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; -import com.amazonaws.auth.PropertiesFileCredentialsProvider; -import com.amazonaws.auth.STSAssumeRoleSessionCredentialsProvider; -import com.amazonaws.auth.SystemPropertiesCredentialsProvider; -import com.amazonaws.auth.profile.ProfileCredentialsProvider; -import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams; -import com.amazonaws.services.s3.model.SSECustomerKey; -import com.fasterxml.jackson.databind.Module; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.util.List; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.util.common.ReflectHelpers; -import org.apache.beam.sdk.util.construction.PipelineOptionsTranslation; -import org.hamcrest.Matchers; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests {@link AwsModule}. */ -@RunWith(JUnit4.class) -public class AwsModuleTest { - - private final ObjectMapper objectMapper = new ObjectMapper().registerModule(new AwsModule()); - - @Test - public void testObjectMapperIsAbleToFindModule() { - List modules = ObjectMapper.findModules(ReflectHelpers.findClassLoader()); - assertThat(modules, hasItem(Matchers.instanceOf(AwsModule.class))); - } - - @Test - public void testAWSStaticCredentialsProviderSerializationDeserialization() throws Exception { - String awsKeyId = "key-id"; - String awsSecretKey = "secret-key"; - - AWSStaticCredentialsProvider credentialsProvider = - new AWSStaticCredentialsProvider(new BasicAWSCredentials(awsKeyId, awsSecretKey)); - - String serializedCredentialsProvider = objectMapper.writeValueAsString(credentialsProvider); - AWSCredentialsProvider deserializedCredentialsProvider = - objectMapper.readValue(serializedCredentialsProvider, AWSCredentialsProvider.class); - - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - assertEquals( - credentialsProvider.getCredentials().getAWSAccessKeyId(), - deserializedCredentialsProvider.getCredentials().getAWSAccessKeyId()); - assertEquals( - credentialsProvider.getCredentials().getAWSSecretKey(), - deserializedCredentialsProvider.getCredentials().getAWSSecretKey()); - - String sessionToken = "session-token"; - BasicSessionCredentials sessionCredentials = - new BasicSessionCredentials(awsKeyId, awsSecretKey, sessionToken); - credentialsProvider = new AWSStaticCredentialsProvider(sessionCredentials); - serializedCredentialsProvider = objectMapper.writeValueAsString(credentialsProvider); - deserializedCredentialsProvider = - objectMapper.readValue(serializedCredentialsProvider, AWSCredentialsProvider.class); - BasicSessionCredentials deserializedCredentials = - (BasicSessionCredentials) deserializedCredentialsProvider.getCredentials(); - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - assertEquals(deserializedCredentials.getAWSAccessKeyId(), awsKeyId); - assertEquals(deserializedCredentials.getAWSSecretKey(), awsSecretKey); - assertEquals(deserializedCredentials.getSessionToken(), sessionToken); - } - - @Test - public void testPropertiesFileCredentialsProviderSerializationDeserialization() throws Exception { - String credentialsFilePath = "/path/to/file"; - - PropertiesFileCredentialsProvider credentialsProvider = - new PropertiesFileCredentialsProvider(credentialsFilePath); - - String serializedCredentialsProvider = objectMapper.writeValueAsString(credentialsProvider); - AWSCredentialsProvider deserializedCredentialsProvider = - objectMapper.readValue(serializedCredentialsProvider, AWSCredentialsProvider.class); - - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - assertEquals( - credentialsFilePath, - readField(deserializedCredentialsProvider, "credentialsFilePath", true)); - } - - @Test - public void testClasspathPropertiesFileCredentialsProviderSerializationDeserialization() - throws Exception { - String credentialsFilePath = "/path/to/file"; - - ClasspathPropertiesFileCredentialsProvider credentialsProvider = - new ClasspathPropertiesFileCredentialsProvider(credentialsFilePath); - - String serializedCredentialsProvider = objectMapper.writeValueAsString(credentialsProvider); - AWSCredentialsProvider deserializedCredentialsProvider = - objectMapper.readValue(serializedCredentialsProvider, AWSCredentialsProvider.class); - - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - assertEquals( - credentialsFilePath, - readField(deserializedCredentialsProvider, "credentialsFilePath", true)); - } - - @Test - public void testSTSAssumeRoleSessionCredentialsProviderSerializationDeserialization() - throws Exception { - String roleArn = "arn:aws:iam::000111222333:role/TestRole"; - String roleSessionName = "roleSessionName"; - STSAssumeRoleSessionCredentialsProvider credentialsProvider = - new STSAssumeRoleSessionCredentialsProvider.Builder(roleArn, roleSessionName).build(); - String serializedCredentialsProvider = objectMapper.writeValueAsString(credentialsProvider); - AWSCredentialsProvider deserializedCredentialsProvider = - objectMapper.readValue(serializedCredentialsProvider, AWSCredentialsProvider.class); - - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - assertEquals(roleArn, readField(deserializedCredentialsProvider, "roleArn", true)); - assertEquals( - roleSessionName, readField(deserializedCredentialsProvider, "roleSessionName", true)); - } - - @Test - public void testSingletonAWSCredentialsProviderSerializationDeserialization() throws Exception { - AWSCredentialsProvider credentialsProvider; - String serializedCredentialsProvider; - AWSCredentialsProvider deserializedCredentialsProvider; - - credentialsProvider = new DefaultAWSCredentialsProviderChain(); - serializedCredentialsProvider = objectMapper.writeValueAsString(credentialsProvider); - deserializedCredentialsProvider = - objectMapper.readValue(serializedCredentialsProvider, AWSCredentialsProvider.class); - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - - credentialsProvider = new EnvironmentVariableCredentialsProvider(); - serializedCredentialsProvider = objectMapper.writeValueAsString(credentialsProvider); - deserializedCredentialsProvider = - objectMapper.readValue(serializedCredentialsProvider, AWSCredentialsProvider.class); - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - - credentialsProvider = new SystemPropertiesCredentialsProvider(); - serializedCredentialsProvider = objectMapper.writeValueAsString(credentialsProvider); - deserializedCredentialsProvider = - objectMapper.readValue(serializedCredentialsProvider, AWSCredentialsProvider.class); - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - - credentialsProvider = new ProfileCredentialsProvider(); - serializedCredentialsProvider = objectMapper.writeValueAsString(credentialsProvider); - deserializedCredentialsProvider = - objectMapper.readValue(serializedCredentialsProvider, AWSCredentialsProvider.class); - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - - credentialsProvider = new EC2ContainerCredentialsProviderWrapper(); - serializedCredentialsProvider = objectMapper.writeValueAsString(credentialsProvider); - deserializedCredentialsProvider = - objectMapper.readValue(serializedCredentialsProvider, AWSCredentialsProvider.class); - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - } - - @Test - public void testSSECustomerKeySerializationDeserialization() throws Exception { - final String key = "86glyTlCNZgccSxW8JxMa6ZdjdK3N141glAysPUZ3AA="; - final String md5 = null; - final String algorithm = "AES256"; - - SSECustomerKey value = new SSECustomerKey(key); - - String valueAsJson = objectMapper.writeValueAsString(value); - SSECustomerKey valueDes = objectMapper.readValue(valueAsJson, SSECustomerKey.class); - assertEquals(key, valueDes.getKey()); - assertEquals(algorithm, valueDes.getAlgorithm()); - assertEquals(md5, valueDes.getMd5()); - } - - @Test - public void testSSEAwsKeyManagementParamsSerializationDeserialization() throws Exception { - final String awsKmsKeyId = - "arn:aws:kms:eu-west-1:123456789012:key/dc123456-7890-ABCD-EF01-234567890ABC"; - final String encryption = "aws:kms"; - SSEAwsKeyManagementParams value = new SSEAwsKeyManagementParams(awsKmsKeyId); - - String valueAsJson = objectMapper.writeValueAsString(value); - SSEAwsKeyManagementParams valueDes = - objectMapper.readValue(valueAsJson, SSEAwsKeyManagementParams.class); - assertEquals(awsKmsKeyId, valueDes.getAwsKmsKeyId()); - assertEquals(encryption, valueDes.getEncryption()); - } - - @Test - public void testClientConfigurationSerializationDeserialization() throws Exception { - ClientConfiguration clientConfiguration = new ClientConfiguration(); - clientConfiguration.setProxyHost("localhost"); - clientConfiguration.setProxyPort(1234); - clientConfiguration.setProxyUsername("username"); - clientConfiguration.setProxyPassword("password"); - - final String valueAsJson = objectMapper.writeValueAsString(clientConfiguration); - final ClientConfiguration valueDes = - objectMapper.readValue(valueAsJson, ClientConfiguration.class); - assertEquals("localhost", valueDes.getProxyHost()); - assertEquals(1234, valueDes.getProxyPort()); - assertEquals("username", valueDes.getProxyUsername()); - assertEquals("password", valueDes.getProxyPassword()); - } - - @Test - public void testAwsHttpClientConfigurationSerializationDeserialization() throws Exception { - ClientConfiguration clientConfiguration = new ClientConfiguration(); - clientConfiguration.setConnectionTimeout(100); - clientConfiguration.setConnectionMaxIdleMillis(1000); - clientConfiguration.setSocketTimeout(300); - - final String valueAsJson = objectMapper.writeValueAsString(clientConfiguration); - final ClientConfiguration clientConfigurationDeserialized = - objectMapper.readValue(valueAsJson, ClientConfiguration.class); - assertEquals(100, clientConfigurationDeserialized.getConnectionTimeout()); - assertEquals(1000, clientConfigurationDeserialized.getConnectionMaxIdleMillis()); - assertEquals(300, clientConfigurationDeserialized.getSocketTimeout()); - } - - @Test - public void testAwsHttpClientConfigurationSerializationDeserializationProto() throws Exception { - AwsOptions awsOptions = - PipelineOptionsTranslation.fromProto( - PipelineOptionsTranslation.toProto( - PipelineOptionsFactory.fromArgs( - "--clientConfiguration={ \"connectionTimeout\": 100, \"connectionMaxIdleTime\": 1000, \"socketTimeout\": 300, \"proxyPort\": -1, \"requestTimeout\": 1500 }") - .create())) - .as(AwsOptions.class); - ClientConfiguration clientConfigurationDeserialized = awsOptions.getClientConfiguration(); - - assertEquals(100, clientConfigurationDeserialized.getConnectionTimeout()); - assertEquals(1000, clientConfigurationDeserialized.getConnectionMaxIdleMillis()); - assertEquals(300, clientConfigurationDeserialized.getSocketTimeout()); - assertEquals(-1, clientConfigurationDeserialized.getProxyPort()); - assertEquals(1500, clientConfigurationDeserialized.getRequestTimeout()); - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/MatchResultMatcher.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/MatchResultMatcher.java deleted file mode 100644 index e6b127947df0..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/MatchResultMatcher.java +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import java.io.IOException; -import java.util.List; -import org.apache.beam.sdk.io.fs.MatchResult; -import org.apache.beam.sdk.io.fs.ResourceId; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.hamcrest.BaseMatcher; -import org.hamcrest.Description; -import org.hamcrest.Matcher; - -/** - * Hamcrest {@link Matcher} to match {@link MatchResult}. Necessary because {@link - * MatchResult#metadata()} throws an exception under normal circumstances. - */ -class MatchResultMatcher extends BaseMatcher { - - private final MatchResult.Status expectedStatus; - private final List expectedMetadata; - private final IOException expectedException; - - private MatchResultMatcher( - MatchResult.Status expectedStatus, - List expectedMetadata, - IOException expectedException) { - this.expectedStatus = checkNotNull(expectedStatus); - checkArgument((expectedMetadata == null) ^ (expectedException == null)); - this.expectedMetadata = expectedMetadata; - this.expectedException = expectedException; - } - - static MatchResultMatcher create(List expectedMetadata) { - return new MatchResultMatcher(MatchResult.Status.OK, expectedMetadata, null); - } - - private static MatchResultMatcher create(MatchResult.Metadata expectedMetadata) { - return create(ImmutableList.of(expectedMetadata)); - } - - static MatchResultMatcher create( - long sizeBytes, long lastModifiedMillis, ResourceId resourceId, boolean isReadSeekEfficient) { - return create( - MatchResult.Metadata.builder() - .setSizeBytes(sizeBytes) - .setLastModifiedMillis(lastModifiedMillis) - .setResourceId(resourceId) - .setIsReadSeekEfficient(isReadSeekEfficient) - .build()); - } - - static MatchResultMatcher create( - MatchResult.Status expectedStatus, IOException expectedException) { - return new MatchResultMatcher(expectedStatus, null, expectedException); - } - - static MatchResultMatcher create(MatchResult expected) { - MatchResult.Status expectedStatus = expected.status(); - List expectedMetadata = null; - IOException expectedException = null; - try { - expectedMetadata = expected.metadata(); - } catch (IOException e) { - expectedException = e; - } - return new MatchResultMatcher(expectedStatus, expectedMetadata, expectedException); - } - - @Override - public boolean matches(Object actual) { - if (actual == null) { - return false; - } - if (!(actual instanceof MatchResult)) { - return false; - } - MatchResult actualResult = (MatchResult) actual; - if (!expectedStatus.equals(actualResult.status())) { - return false; - } - - List actualMetadata; - try { - actualMetadata = actualResult.metadata(); - } catch (IOException e) { - return expectedException != null && expectedException.toString().equals(e.toString()); - } - return expectedMetadata != null && expectedMetadata.equals(actualMetadata); - } - - @Override - public void describeTo(Description description) { - if (expectedMetadata != null) { - description.appendText(MatchResult.create(expectedStatus, expectedMetadata).toString()); - } else { - description.appendText(MatchResult.create(expectedStatus, expectedException).toString()); - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemIT.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemIT.java deleted file mode 100644 index 112ab95463b4..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemIT.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import static org.apache.beam.sdk.io.common.TestRow.getExpectedHashForRowCount; -import static org.apache.commons.lang3.StringUtils.isAllLowerCase; -import static org.apache.http.HttpHeaders.CONTENT_LENGTH; -import static org.testcontainers.containers.localstack.LocalStackContainer.Service.S3; - -import com.amazonaws.Request; -import com.amazonaws.handlers.RequestHandler2; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import java.util.Map; -import org.apache.beam.sdk.io.GenerateSequence; -import org.apache.beam.sdk.io.TextIO; -import org.apache.beam.sdk.io.aws.ITEnvironment; -import org.apache.beam.sdk.io.aws.options.S3Options; -import org.apache.beam.sdk.io.common.HashingFn; -import org.apache.beam.sdk.io.common.TestRow.DeterministicallyConstructTestRowFn; -import org.apache.beam.sdk.io.common.TestRow.SelectNameFn; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.joda.time.DateTime; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExternalResource; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** - * Integration test to write and read from a S3 compatible file system. - * - *

By default this runs against Localstack, but you can use {@link S3FileSystemIT.S3ITOptions} to - * configure tests to run against AWS S3. - * - *

{@code
- * ./gradlew :sdks:java:io:amazon-web-services:integrationTest \
- *   --info \
- *   --tests "org.apache.beam.sdk.io.aws.s3.S3FileSystemIT" \
- *   -DintegrationTestPipelineOptions='["--awsRegion=eu-central-1","--useLocalstack=false"]'
- * }
- */ -@RunWith(JUnit4.class) -public class S3FileSystemIT { - public interface S3ITOptions extends ITEnvironment.ITOptions, S3Options {} - - @ClassRule - public static ITEnvironment env = - new ITEnvironment(S3, S3ITOptions.class) { - @Override - protected void before() { - super.before(); - options().setS3ClientFactoryClass(S3ClientFixFix.class); - } - }; - - @Rule public TestPipeline pipelineWrite = env.createTestPipeline(); - @Rule public TestPipeline pipelineRead = env.createTestPipeline(); - @Rule public S3Bucket s3Bucket = new S3Bucket(); - - @Test - public void testWriteThenRead() { - int rows = env.options().getNumberOfRows(); - // Write test dataset to S3. - pipelineWrite - .apply("Generate Sequence", GenerateSequence.from(0).to(rows)) - .apply("Prepare TestRows", ParDo.of(new DeterministicallyConstructTestRowFn())) - .apply("Prepare file rows", ParDo.of(new SelectNameFn())) - .apply("Write to S3 file", TextIO.write().to("s3://" + s3Bucket.name + "/test")); - - pipelineWrite.run().waitUntilFinish(); - - // Read test dataset from S3. - PCollection output = - pipelineRead.apply(TextIO.read().from("s3://" + s3Bucket.name + "/test*")); - - PAssert.thatSingleton(output.apply("Count All", Count.globally())).isEqualTo((long) rows); - - PAssert.that(output.apply(Combine.globally(new HashingFn()).withoutDefaults())) - .containsInAnyOrder(getExpectedHashForRowCount(rows)); - - pipelineRead.run().waitUntilFinish(); - } - - static class S3Bucket extends ExternalResource { - public final String name = "beam-s3io-it-" + new DateTime().toString("yyyyMMdd-HHmmss"); - - @Override - protected void before() { - AmazonS3 client = env.buildClient(AmazonS3ClientBuilder.standard()); - client.createBucket(name); - client.shutdown(); - } - } - - // Fix duplicated Content-Length header due to case-sensitive handling of header names - // https://github.com/aws/aws-sdk-java/issues/2503 - private static class S3ClientFixFix extends DefaultS3ClientBuilderFactory { - @Override - public AmazonS3ClientBuilder createBuilder(S3Options s3Options) { - return super.createBuilder(s3Options) - .withRequestHandlers( - new RequestHandler2() { - @Override - public void beforeRequest(Request request) { - Map headers = request.getHeaders(); - if (!isAllLowerCase(CONTENT_LENGTH) && headers.containsKey(CONTENT_LENGTH)) { - headers.remove(CONTENT_LENGTH.toLowerCase()); // remove duplicated header - } - } - }); - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemTest.java deleted file mode 100644 index db749d7080e2..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3FileSystemTest.java +++ /dev/null @@ -1,1248 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.buildMockedS3FileSystem; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3Config; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3ConfigWithCustomEndpointAndPathStyleAccessEnabled; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3ConfigWithSSECustomerKey; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3Options; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3OptionsWithCustomEndpointAndPathStyleAccessEnabled; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3OptionsWithSSECustomerKey; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.toMd5; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.contains; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.argThat; -import static org.mockito.Matchers.anyObject; -import static org.mockito.Matchers.notNull; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import akka.http.scaladsl.Http; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.AnonymousAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder.EndpointConfiguration; -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.s3.model.AmazonS3Exception; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CopyObjectRequest; -import com.amazonaws.services.s3.model.CopyObjectResult; -import com.amazonaws.services.s3.model.CopyPartRequest; -import com.amazonaws.services.s3.model.CopyPartResult; -import com.amazonaws.services.s3.model.DeleteObjectsRequest; -import com.amazonaws.services.s3.model.GetObjectMetadataRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; -import com.amazonaws.services.s3.model.ListObjectsV2Request; -import com.amazonaws.services.s3.model.ListObjectsV2Result; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.S3ObjectSummary; -import com.amazonaws.services.s3.model.SSECustomerKey; -import io.findify.s3mock.S3Mock; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.net.URISyntaxException; -import java.net.URL; -import java.nio.ByteBuffer; -import java.nio.channels.ReadableByteChannel; -import java.nio.channels.WritableByteChannel; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import org.apache.beam.sdk.io.aws.options.S3Options; -import org.apache.beam.sdk.io.fs.CreateOptions; -import org.apache.beam.sdk.io.fs.MatchResult; -import org.apache.beam.sdk.metrics.Lineage; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; -import org.mockito.ArgumentMatcher; - -/** Test case for {@link S3FileSystem}. */ -@RunWith(JUnit4.class) -public class S3FileSystemTest { - private static S3Mock api; - private static AmazonS3 client; - - @BeforeClass - public static void beforeClass() { - api = new S3Mock.Builder().withInMemoryBackend().build(); - Http.ServerBinding binding = api.start(); - - EndpointConfiguration endpoint = - new EndpointConfiguration( - "http://localhost:" + binding.localAddress().getPort(), "us-west-2"); - client = - AmazonS3ClientBuilder.standard() - .withPathStyleAccessEnabled(true) - .withEndpointConfiguration(endpoint) - .withCredentials(new AWSStaticCredentialsProvider(new AnonymousAWSCredentials())) - .build(); - } - - @AfterClass - public static void afterClass() { - api.stop(); - } - - @Test - public void testGetScheme() { - S3FileSystem s3FileSystem = new S3FileSystem(s3Config("s3")); - assertEquals("s3", s3FileSystem.getScheme()); - - s3FileSystem = new S3FileSystem(s3Config("other")); - assertEquals("other", s3FileSystem.getScheme()); - } - - @Test - public void testGetSchemeWithS3Options() { - S3FileSystem s3FileSystem = new S3FileSystem(s3Options()); - assertEquals("s3", s3FileSystem.getScheme()); - } - - @Test - public void testGetPathStyleAccessEnabled() throws URISyntaxException { - S3FileSystem s3FileSystem = - new S3FileSystem(s3ConfigWithCustomEndpointAndPathStyleAccessEnabled("s3")); - URL s3Url = s3FileSystem.getAmazonS3Client().getUrl("bucket", "file"); - assertEquals("https://s3.custom.dns/bucket/file", s3Url.toURI().toString()); - } - - @Test - public void testGetPathStyleAccessEnabledWithS3Options() throws URISyntaxException { - S3FileSystem s3FileSystem = - new S3FileSystem(s3OptionsWithCustomEndpointAndPathStyleAccessEnabled()); - URL s3Url = s3FileSystem.getAmazonS3Client().getUrl("bucket", "file"); - assertEquals("https://s3.custom.dns/bucket/file", s3Url.toURI().toString()); - } - - @Test - public void testCopy() throws IOException { - testCopy(s3Config("s3")); - testCopy(s3Config("other")); - testCopy(s3ConfigWithSSECustomerKey("s3")); - testCopy(s3ConfigWithSSECustomerKey("other")); - } - - @Test - public void testCopyWithS3Options() throws IOException { - testCopy(s3Options()); - testCopy(s3OptionsWithSSECustomerKey()); - } - - private GetObjectMetadataRequest createObjectMetadataRequest( - S3ResourceId path, SSECustomerKey sseCustomerKey) { - GetObjectMetadataRequest getObjectMetadataRequest = - new GetObjectMetadataRequest(path.getBucket(), path.getKey()); - getObjectMetadataRequest.setSSECustomerKey(sseCustomerKey); - return getObjectMetadataRequest; - } - - private void assertGetObjectMetadata( - S3FileSystem s3FileSystem, - GetObjectMetadataRequest request, - String sseCustomerKeyMd5, - ObjectMetadata objectMetadata) { - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata(argThat(new GetObjectMetadataRequestMatcher(request)))) - .thenReturn(objectMetadata); - assertEquals( - sseCustomerKeyMd5, - s3FileSystem.getAmazonS3Client().getObjectMetadata(request).getSSECustomerKeyMd5()); - } - - private void testCopy(S3FileSystemConfiguration config) throws IOException { - testCopy(buildMockedS3FileSystem(config), config.getSSECustomerKey()); - } - - private void testCopy(S3Options options) throws IOException { - testCopy(buildMockedS3FileSystem(options), options.getSSECustomerKey()); - } - - private void testCopy(S3FileSystem s3FileSystem, SSECustomerKey sseCustomerKey) - throws IOException { - S3ResourceId sourcePath = S3ResourceId.fromUri(s3FileSystem.getScheme() + "://bucket/from"); - S3ResourceId destinationPath = S3ResourceId.fromUri(s3FileSystem.getScheme() + "://bucket/to"); - - ObjectMetadata objectMetadata = new ObjectMetadata(); - objectMetadata.setContentLength(0); - String sseCustomerKeyMd5 = toMd5(sseCustomerKey); - if (sseCustomerKeyMd5 != null) { - objectMetadata.setSSECustomerKeyMd5(sseCustomerKeyMd5); - } - assertGetObjectMetadata( - s3FileSystem, - createObjectMetadataRequest(sourcePath, sseCustomerKey), - sseCustomerKeyMd5, - objectMetadata); - - s3FileSystem.copy(sourcePath, destinationPath); - - verify(s3FileSystem.getAmazonS3Client(), times(1)).copyObject(any(CopyObjectRequest.class)); - - // we simulate a big object >= 5GB so it takes the multiPart path - objectMetadata.setContentLength(5_368_709_120L); - assertGetObjectMetadata( - s3FileSystem, - createObjectMetadataRequest(sourcePath, sseCustomerKey), - sseCustomerKeyMd5, - objectMetadata); - - try { - s3FileSystem.copy(sourcePath, destinationPath); - } catch (NullPointerException e) { - // ignore failing unmocked path, this is covered by testMultipartCopy test - } - - verify(s3FileSystem.getAmazonS3Client(), never()).copyObject(null); - } - - @Test - public void testAtomicCopy() { - testAtomicCopy(s3Config("s3")); - testAtomicCopy(s3Config("other")); - testAtomicCopy(s3ConfigWithSSECustomerKey("s3")); - testAtomicCopy(s3ConfigWithSSECustomerKey("other")); - } - - @Test - public void testAtomicCopyWithS3Options() { - testAtomicCopy(s3Options()); - testAtomicCopy(s3OptionsWithSSECustomerKey()); - } - - private void testAtomicCopy(S3FileSystemConfiguration config) { - testAtomicCopy(buildMockedS3FileSystem(config), config.getSSECustomerKey()); - } - - private void testAtomicCopy(S3Options options) { - testAtomicCopy(buildMockedS3FileSystem(options), options.getSSECustomerKey()); - } - - private void testAtomicCopy(S3FileSystem s3FileSystem, SSECustomerKey sseCustomerKey) { - S3ResourceId sourcePath = S3ResourceId.fromUri(s3FileSystem.getScheme() + "://bucket/from"); - S3ResourceId destinationPath = S3ResourceId.fromUri(s3FileSystem.getScheme() + "://bucket/to"); - - CopyObjectResult copyObjectResult = new CopyObjectResult(); - String sseCustomerKeyMd5 = toMd5(sseCustomerKey); - if (sseCustomerKeyMd5 != null) { - copyObjectResult.setSSECustomerKeyMd5(sseCustomerKeyMd5); - } - CopyObjectRequest copyObjectRequest = - new CopyObjectRequest( - sourcePath.getBucket(), - sourcePath.getKey(), - destinationPath.getBucket(), - destinationPath.getKey()); - copyObjectRequest.setSourceSSECustomerKey(sseCustomerKey); - copyObjectRequest.setDestinationSSECustomerKey(sseCustomerKey); - when(s3FileSystem.getAmazonS3Client().copyObject(any(CopyObjectRequest.class))) - .thenReturn(copyObjectResult); - assertEquals( - sseCustomerKeyMd5, - s3FileSystem.getAmazonS3Client().copyObject(copyObjectRequest).getSSECustomerKeyMd5()); - - ObjectMetadata sourceS3ObjectMetadata = new ObjectMetadata(); - s3FileSystem.atomicCopy(sourcePath, destinationPath, sourceS3ObjectMetadata); - - verify(s3FileSystem.getAmazonS3Client(), times(2)).copyObject(any(CopyObjectRequest.class)); - } - - @Test - public void testMultipartCopy() { - testMultipartCopy(s3Config("s3")); - testMultipartCopy(s3Config("other")); - testMultipartCopy(s3ConfigWithSSECustomerKey("s3")); - testMultipartCopy(s3ConfigWithSSECustomerKey("other")); - } - - @Test - public void testMultipartCopyWithS3Options() { - testMultipartCopy(s3Options()); - testMultipartCopy(s3OptionsWithSSECustomerKey()); - } - - private void testMultipartCopy(S3FileSystemConfiguration config) { - testMultipartCopy( - buildMockedS3FileSystem(config), - config.getSSECustomerKey(), - config.getS3UploadBufferSizeBytes()); - } - - private void testMultipartCopy(S3Options options) { - testMultipartCopy( - buildMockedS3FileSystem(options), - options.getSSECustomerKey(), - options.getS3UploadBufferSizeBytes()); - } - - private void testMultipartCopy( - S3FileSystem s3FileSystem, SSECustomerKey sseCustomerKey, long s3UploadBufferSizeBytes) { - S3ResourceId sourcePath = S3ResourceId.fromUri(s3FileSystem.getScheme() + "://bucket/from"); - S3ResourceId destinationPath = S3ResourceId.fromUri(s3FileSystem.getScheme() + "://bucket/to"); - - InitiateMultipartUploadResult initiateMultipartUploadResult = - new InitiateMultipartUploadResult(); - initiateMultipartUploadResult.setUploadId("upload-id"); - String sseCustomerKeyMd5 = toMd5(sseCustomerKey); - if (sseCustomerKeyMd5 != null) { - initiateMultipartUploadResult.setSSECustomerKeyMd5(sseCustomerKeyMd5); - } - when(s3FileSystem - .getAmazonS3Client() - .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class))) - .thenReturn(initiateMultipartUploadResult); - assertEquals( - sseCustomerKeyMd5, - s3FileSystem - .getAmazonS3Client() - .initiateMultipartUpload( - new InitiateMultipartUploadRequest( - destinationPath.getBucket(), destinationPath.getKey())) - .getSSECustomerKeyMd5()); - - ObjectMetadata sourceObjectMetadata = new ObjectMetadata(); - sourceObjectMetadata.setContentLength((long) (s3UploadBufferSizeBytes * 1.5)); - sourceObjectMetadata.setContentEncoding("read-seek-efficient"); - if (sseCustomerKeyMd5 != null) { - sourceObjectMetadata.setSSECustomerKeyMd5(sseCustomerKeyMd5); - } - assertGetObjectMetadata( - s3FileSystem, - createObjectMetadataRequest(sourcePath, sseCustomerKey), - sseCustomerKeyMd5, - sourceObjectMetadata); - - CopyPartResult copyPartResult1 = new CopyPartResult(); - copyPartResult1.setETag("etag-1"); - CopyPartResult copyPartResult2 = new CopyPartResult(); - copyPartResult1.setETag("etag-2"); - if (sseCustomerKeyMd5 != null) { - copyPartResult1.setSSECustomerKeyMd5(sseCustomerKeyMd5); - copyPartResult2.setSSECustomerKeyMd5(sseCustomerKeyMd5); - } - CopyPartRequest copyPartRequest = new CopyPartRequest(); - copyPartRequest.setSourceSSECustomerKey(sseCustomerKey); - when(s3FileSystem.getAmazonS3Client().copyPart(any(CopyPartRequest.class))) - .thenReturn(copyPartResult1) - .thenReturn(copyPartResult2); - assertEquals( - sseCustomerKeyMd5, - s3FileSystem.getAmazonS3Client().copyPart(copyPartRequest).getSSECustomerKeyMd5()); - - s3FileSystem.multipartCopy(sourcePath, destinationPath, sourceObjectMetadata); - - verify(s3FileSystem.getAmazonS3Client(), times(1)) - .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); - } - - @Test - public void deleteThousandsOfObjectsInMultipleBuckets() throws IOException { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("s3")); - - List buckets = ImmutableList.of("bucket1", "bucket2"); - List keys = new ArrayList<>(); - for (int i = 0; i < 2500; i++) { - keys.add(String.format("key-%d", i)); - } - List paths = new ArrayList<>(); - for (String bucket : buckets) { - for (String key : keys) { - paths.add(S3ResourceId.fromComponents("s3", bucket, key)); - } - } - - s3FileSystem.delete(paths); - - // Should require 6 calls to delete 2500 objects in each of 2 buckets. - verify(s3FileSystem.getAmazonS3Client(), times(6)) - .deleteObjects(any(DeleteObjectsRequest.class)); - } - - @Test - public void deleteThousandsOfObjectsInMultipleBucketsWithS3Options() throws IOException { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Options()); - - List buckets = ImmutableList.of("bucket1", "bucket2"); - List keys = new ArrayList<>(); - for (int i = 0; i < 2500; i++) { - keys.add(String.format("key-%d", i)); - } - List paths = new ArrayList<>(); - for (String bucket : buckets) { - for (String key : keys) { - paths.add(S3ResourceId.fromComponents("s3", bucket, key)); - } - } - - s3FileSystem.delete(paths); - - // Should require 6 calls to delete 2500 objects in each of 2 buckets. - verify(s3FileSystem.getAmazonS3Client(), times(6)) - .deleteObjects(any(DeleteObjectsRequest.class)); - } - - @Test - public void matchNonGlob() { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("mys3")); - - S3ResourceId path = S3ResourceId.fromUri("mys3://testbucket/testdirectory/filethatexists"); - long lastModifiedMillis = 1540000000000L; - ObjectMetadata s3ObjectMetadata = new ObjectMetadata(); - s3ObjectMetadata.setContentLength(100); - s3ObjectMetadata.setContentEncoding("read-seek-efficient"); - s3ObjectMetadata.setLastModified(new Date(lastModifiedMillis)); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(path.getBucket(), path.getKey()))))) - .thenReturn(s3ObjectMetadata); - - MatchResult result = s3FileSystem.matchNonGlobPath(path); - assertThat( - result, - MatchResultMatcher.create( - ImmutableList.of( - MatchResult.Metadata.builder() - .setSizeBytes(100) - .setLastModifiedMillis(lastModifiedMillis) - .setResourceId(path) - .setIsReadSeekEfficient(true) - .build()))); - } - - @Test - public void matchNonGlobWithS3Options() { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Options()); - - S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/testdirectory/filethatexists"); - long lastModifiedMillis = 1540000000000L; - ObjectMetadata s3ObjectMetadata = new ObjectMetadata(); - s3ObjectMetadata.setContentLength(100); - s3ObjectMetadata.setContentEncoding("read-seek-efficient"); - s3ObjectMetadata.setLastModified(new Date(lastModifiedMillis)); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(path.getBucket(), path.getKey()))))) - .thenReturn(s3ObjectMetadata); - - MatchResult result = s3FileSystem.matchNonGlobPath(path); - assertThat( - result, - MatchResultMatcher.create( - ImmutableList.of( - MatchResult.Metadata.builder() - .setSizeBytes(100) - .setLastModifiedMillis(lastModifiedMillis) - .setResourceId(path) - .setIsReadSeekEfficient(true) - .build()))); - } - - @Test - public void matchNonGlobNotReadSeekEfficient() { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("s3")); - - S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/testdirectory/filethatexists"); - long lastModifiedMillis = 1540000000000L; - ObjectMetadata s3ObjectMetadata = new ObjectMetadata(); - s3ObjectMetadata.setContentLength(100); - s3ObjectMetadata.setLastModified(new Date(lastModifiedMillis)); - s3ObjectMetadata.setContentEncoding("gzip"); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(path.getBucket(), path.getKey()))))) - .thenReturn(s3ObjectMetadata); - - MatchResult result = s3FileSystem.matchNonGlobPath(path); - assertThat( - result, - MatchResultMatcher.create( - ImmutableList.of( - MatchResult.Metadata.builder() - .setSizeBytes(100) - .setLastModifiedMillis(lastModifiedMillis) - .setResourceId(path) - .setIsReadSeekEfficient(false) - .build()))); - } - - @Test - public void matchNonGlobNotReadSeekEfficientWithS3Options() { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Options()); - - S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/testdirectory/filethatexists"); - long lastModifiedMillis = 1540000000000L; - ObjectMetadata s3ObjectMetadata = new ObjectMetadata(); - s3ObjectMetadata.setContentLength(100); - s3ObjectMetadata.setLastModified(new Date(lastModifiedMillis)); - s3ObjectMetadata.setContentEncoding("gzip"); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(path.getBucket(), path.getKey()))))) - .thenReturn(s3ObjectMetadata); - - MatchResult result = s3FileSystem.matchNonGlobPath(path); - assertThat( - result, - MatchResultMatcher.create( - ImmutableList.of( - MatchResult.Metadata.builder() - .setSizeBytes(100) - .setLastModifiedMillis(lastModifiedMillis) - .setResourceId(path) - .setIsReadSeekEfficient(false) - .build()))); - } - - @Test - public void matchNonGlobNullContentEncoding() { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("s3")); - - S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/testdirectory/filethatexists"); - long lastModifiedMillis = 1540000000000L; - ObjectMetadata s3ObjectMetadata = new ObjectMetadata(); - s3ObjectMetadata.setContentLength(100); - s3ObjectMetadata.setLastModified(new Date(lastModifiedMillis)); - s3ObjectMetadata.setContentEncoding(null); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(path.getBucket(), path.getKey()))))) - .thenReturn(s3ObjectMetadata); - - MatchResult result = s3FileSystem.matchNonGlobPath(path); - assertThat( - result, - MatchResultMatcher.create( - ImmutableList.of( - MatchResult.Metadata.builder() - .setSizeBytes(100) - .setLastModifiedMillis(lastModifiedMillis) - .setResourceId(path) - .setIsReadSeekEfficient(true) - .build()))); - } - - @Test - public void matchNonGlobNullContentEncodingWithS3Options() { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Options()); - - S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/testdirectory/filethatexists"); - long lastModifiedMillis = 1540000000000L; - ObjectMetadata s3ObjectMetadata = new ObjectMetadata(); - s3ObjectMetadata.setContentLength(100); - s3ObjectMetadata.setLastModified(new Date(lastModifiedMillis)); - s3ObjectMetadata.setContentEncoding(null); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(path.getBucket(), path.getKey()))))) - .thenReturn(s3ObjectMetadata); - - MatchResult result = s3FileSystem.matchNonGlobPath(path); - assertThat( - result, - MatchResultMatcher.create( - ImmutableList.of( - MatchResult.Metadata.builder() - .setSizeBytes(100) - .setLastModifiedMillis(lastModifiedMillis) - .setResourceId(path) - .setIsReadSeekEfficient(true) - .build()))); - } - - @Test - public void matchNonGlobNotFound() { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("mys3")); - - S3ResourceId path = S3ResourceId.fromUri("mys3://testbucket/testdirectory/nonexistentfile"); - AmazonS3Exception exception = new AmazonS3Exception("mock exception"); - exception.setStatusCode(404); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(path.getBucket(), path.getKey()))))) - .thenThrow(exception); - - MatchResult result = s3FileSystem.matchNonGlobPath(path); - assertThat( - result, - MatchResultMatcher.create(MatchResult.Status.NOT_FOUND, new FileNotFoundException())); - } - - @Test - public void matchNonGlobNotFoundWithS3Options() { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Options()); - - S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/testdirectory/nonexistentfile"); - AmazonS3Exception exception = new AmazonS3Exception("mock exception"); - exception.setStatusCode(404); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(path.getBucket(), path.getKey()))))) - .thenThrow(exception); - - MatchResult result = s3FileSystem.matchNonGlobPath(path); - assertThat( - result, - MatchResultMatcher.create(MatchResult.Status.NOT_FOUND, new FileNotFoundException())); - } - - @Test - public void matchNonGlobForbidden() { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("s3")); - - AmazonS3Exception exception = new AmazonS3Exception("mock exception"); - exception.setStatusCode(403); - S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/testdirectory/keyname"); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(path.getBucket(), path.getKey()))))) - .thenThrow(exception); - - assertThat( - s3FileSystem.matchNonGlobPath(path), - MatchResultMatcher.create(MatchResult.Status.ERROR, new IOException(exception))); - } - - @Test - public void matchNonGlobForbiddenWithS3Options() { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Options()); - - AmazonS3Exception exception = new AmazonS3Exception("mock exception"); - exception.setStatusCode(403); - S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/testdirectory/keyname"); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(path.getBucket(), path.getKey()))))) - .thenThrow(exception); - - assertThat( - s3FileSystem.matchNonGlobPath(path), - MatchResultMatcher.create(MatchResult.Status.ERROR, new IOException(exception))); - } - - static class ListObjectsV2RequestArgumentMatches - implements ArgumentMatcher { - private final ListObjectsV2Request expected; - - ListObjectsV2RequestArgumentMatches(ListObjectsV2Request expected) { - this.expected = checkNotNull(expected); - } - - @Override - public boolean matches(ListObjectsV2Request argument) { - if (argument instanceof ListObjectsV2Request) { - ListObjectsV2Request actual = (ListObjectsV2Request) argument; - return expected.getBucketName().equals(actual.getBucketName()) - && expected.getPrefix().equals(actual.getPrefix()) - && (expected.getContinuationToken() == null - ? actual.getContinuationToken() == null - : expected.getContinuationToken().equals(actual.getContinuationToken())); - } - return false; - } - } - - @Test - public void matchGlob() throws IOException { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("mys3")); - - S3ResourceId path = S3ResourceId.fromUri("mys3://testbucket/foo/bar*baz"); - - ListObjectsV2Request firstRequest = - new ListObjectsV2Request() - .withBucketName(path.getBucket()) - .withPrefix(path.getKeyNonWildcardPrefix()) - .withContinuationToken(null); - - // Expected to be returned; prefix and wildcard/regex match - S3ObjectSummary firstMatch = new S3ObjectSummary(); - firstMatch.setBucketName(path.getBucket()); - firstMatch.setKey("foo/bar0baz"); - firstMatch.setSize(100); - firstMatch.setLastModified(new Date(1540000000001L)); - - // Expected to not be returned; prefix matches, but substring after wildcard does not - S3ObjectSummary secondMatch = new S3ObjectSummary(); - secondMatch.setBucketName(path.getBucket()); - secondMatch.setKey("foo/bar1qux"); - secondMatch.setSize(200); - secondMatch.setLastModified(new Date(1540000000002L)); - - // Expected first request returns continuation token - ListObjectsV2Result firstResult = new ListObjectsV2Result(); - firstResult.setNextContinuationToken("token"); - firstResult.getObjectSummaries().add(firstMatch); - firstResult.getObjectSummaries().add(secondMatch); - when(s3FileSystem - .getAmazonS3Client() - .listObjectsV2(argThat(new ListObjectsV2RequestArgumentMatches(firstRequest)))) - .thenReturn(firstResult); - - // Expect second request with continuation token - ListObjectsV2Request secondRequest = - new ListObjectsV2Request() - .withBucketName(path.getBucket()) - .withPrefix(path.getKeyNonWildcardPrefix()) - .withContinuationToken("token"); - - // Expected to be returned; prefix and wildcard/regex match - S3ObjectSummary thirdMatch = new S3ObjectSummary(); - thirdMatch.setBucketName(path.getBucket()); - thirdMatch.setKey("foo/bar2baz"); - thirdMatch.setSize(300); - thirdMatch.setLastModified(new Date(1540000000003L)); - - // Expected second request returns third prefix match and no continuation token - ListObjectsV2Result secondResult = new ListObjectsV2Result(); - secondResult.setNextContinuationToken(null); - secondResult.getObjectSummaries().add(thirdMatch); - when(s3FileSystem - .getAmazonS3Client() - .listObjectsV2(argThat(new ListObjectsV2RequestArgumentMatches(secondRequest)))) - .thenReturn(secondResult); - - // Expect object metadata queries for content encoding - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setContentEncoding(""); - when(s3FileSystem.getAmazonS3Client().getObjectMetadata(anyObject())).thenReturn(metadata); - - assertThat( - s3FileSystem.matchGlobPaths(ImmutableList.of(path)).get(0), - MatchResultMatcher.create( - ImmutableList.of( - MatchResult.Metadata.builder() - .setIsReadSeekEfficient(true) - .setResourceId( - S3ResourceId.fromComponents( - "mys3", firstMatch.getBucketName(), firstMatch.getKey())) - .setSizeBytes(firstMatch.getSize()) - .setLastModifiedMillis(firstMatch.getLastModified().getTime()) - .build(), - MatchResult.Metadata.builder() - .setIsReadSeekEfficient(true) - .setResourceId( - S3ResourceId.fromComponents( - "mys3", thirdMatch.getBucketName(), thirdMatch.getKey())) - .setSizeBytes(thirdMatch.getSize()) - .setLastModifiedMillis(thirdMatch.getLastModified().getTime()) - .build()))); - } - - @Test - public void matchGlobWithS3Options() throws IOException { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Options()); - - S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/foo/bar*baz"); - - ListObjectsV2Request firstRequest = - new ListObjectsV2Request() - .withBucketName(path.getBucket()) - .withPrefix(path.getKeyNonWildcardPrefix()) - .withContinuationToken(null); - - // Expected to be returned; prefix and wildcard/regex match - S3ObjectSummary firstMatch = new S3ObjectSummary(); - firstMatch.setBucketName(path.getBucket()); - firstMatch.setKey("foo/bar0baz"); - firstMatch.setSize(100); - firstMatch.setLastModified(new Date(1540000000001L)); - - // Expected to not be returned; prefix matches, but substring after wildcard does not - S3ObjectSummary secondMatch = new S3ObjectSummary(); - secondMatch.setBucketName(path.getBucket()); - secondMatch.setKey("foo/bar1qux"); - secondMatch.setSize(200); - secondMatch.setLastModified(new Date(1540000000002L)); - - // Expected first request returns continuation token - ListObjectsV2Result firstResult = new ListObjectsV2Result(); - firstResult.setNextContinuationToken("token"); - firstResult.getObjectSummaries().add(firstMatch); - firstResult.getObjectSummaries().add(secondMatch); - when(s3FileSystem - .getAmazonS3Client() - .listObjectsV2(argThat(new ListObjectsV2RequestArgumentMatches(firstRequest)))) - .thenReturn(firstResult); - - // Expect second request with continuation token - ListObjectsV2Request secondRequest = - new ListObjectsV2Request() - .withBucketName(path.getBucket()) - .withPrefix(path.getKeyNonWildcardPrefix()) - .withContinuationToken("token"); - - // Expected to be returned; prefix and wildcard/regex match - S3ObjectSummary thirdMatch = new S3ObjectSummary(); - thirdMatch.setBucketName(path.getBucket()); - thirdMatch.setKey("foo/bar2baz"); - thirdMatch.setSize(300); - thirdMatch.setLastModified(new Date(1540000000003L)); - - // Expected second request returns third prefix match and no continuation token - ListObjectsV2Result secondResult = new ListObjectsV2Result(); - secondResult.setNextContinuationToken(null); - secondResult.getObjectSummaries().add(thirdMatch); - when(s3FileSystem - .getAmazonS3Client() - .listObjectsV2(argThat(new ListObjectsV2RequestArgumentMatches(secondRequest)))) - .thenReturn(secondResult); - - // Expect object metadata queries for content encoding - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setContentEncoding(""); - when(s3FileSystem.getAmazonS3Client().getObjectMetadata(anyObject())).thenReturn(metadata); - - assertThat( - s3FileSystem.matchGlobPaths(ImmutableList.of(path)).get(0), - MatchResultMatcher.create( - ImmutableList.of( - MatchResult.Metadata.builder() - .setIsReadSeekEfficient(true) - .setResourceId( - S3ResourceId.fromComponents( - "s3", firstMatch.getBucketName(), firstMatch.getKey())) - .setSizeBytes(firstMatch.getSize()) - .setLastModifiedMillis(firstMatch.getLastModified().getTime()) - .build(), - MatchResult.Metadata.builder() - .setIsReadSeekEfficient(true) - .setResourceId( - S3ResourceId.fromComponents( - "s3", thirdMatch.getBucketName(), thirdMatch.getKey())) - .setSizeBytes(thirdMatch.getSize()) - .setLastModifiedMillis(thirdMatch.getLastModified().getTime()) - .build()))); - } - - @Test - public void matchGlobWithSlashes() throws IOException { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("s3")); - - S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/foo/bar\\baz*"); - - ListObjectsV2Request request = - new ListObjectsV2Request() - .withBucketName(path.getBucket()) - .withPrefix(path.getKeyNonWildcardPrefix()) - .withContinuationToken(null); - - // Expected to be returned; prefix and wildcard/regex match - S3ObjectSummary firstMatch = new S3ObjectSummary(); - firstMatch.setBucketName(path.getBucket()); - firstMatch.setKey("foo/bar\\baz0"); - firstMatch.setSize(100); - firstMatch.setLastModified(new Date(1540000000001L)); - - // Expected to not be returned; prefix matches, but substring after wildcard does not - S3ObjectSummary secondMatch = new S3ObjectSummary(); - secondMatch.setBucketName(path.getBucket()); - secondMatch.setKey("foo/bar/baz1"); - secondMatch.setSize(200); - secondMatch.setLastModified(new Date(1540000000002L)); - - // Expected first request returns continuation token - ListObjectsV2Result result = new ListObjectsV2Result(); - result.getObjectSummaries().add(firstMatch); - result.getObjectSummaries().add(secondMatch); - when(s3FileSystem - .getAmazonS3Client() - .listObjectsV2(argThat(new ListObjectsV2RequestArgumentMatches(request)))) - .thenReturn(result); - - // Expect object metadata queries for content encoding - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setContentEncoding(""); - when(s3FileSystem.getAmazonS3Client().getObjectMetadata(anyObject())).thenReturn(metadata); - - assertThat( - s3FileSystem.matchGlobPaths(ImmutableList.of(path)).get(0), - MatchResultMatcher.create( - ImmutableList.of( - MatchResult.Metadata.builder() - .setIsReadSeekEfficient(true) - .setResourceId( - S3ResourceId.fromComponents( - "s3", firstMatch.getBucketName(), firstMatch.getKey())) - .setSizeBytes(firstMatch.getSize()) - .setLastModifiedMillis(firstMatch.getLastModified().getTime()) - .build()))); - } - - @Test - public void matchGlobWithSlashesWithS3Options() throws IOException { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Options()); - - S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/foo/bar\\baz*"); - - ListObjectsV2Request request = - new ListObjectsV2Request() - .withBucketName(path.getBucket()) - .withPrefix(path.getKeyNonWildcardPrefix()) - .withContinuationToken(null); - - // Expected to be returned; prefix and wildcard/regex match - S3ObjectSummary firstMatch = new S3ObjectSummary(); - firstMatch.setBucketName(path.getBucket()); - firstMatch.setKey("foo/bar\\baz0"); - firstMatch.setSize(100); - firstMatch.setLastModified(new Date(1540000000001L)); - - // Expected to not be returned; prefix matches, but substring after wildcard does not - S3ObjectSummary secondMatch = new S3ObjectSummary(); - secondMatch.setBucketName(path.getBucket()); - secondMatch.setKey("foo/bar/baz1"); - secondMatch.setSize(200); - secondMatch.setLastModified(new Date(1540000000002L)); - - // Expected first request returns continuation token - ListObjectsV2Result result = new ListObjectsV2Result(); - result.getObjectSummaries().add(firstMatch); - result.getObjectSummaries().add(secondMatch); - when(s3FileSystem - .getAmazonS3Client() - .listObjectsV2(argThat(new ListObjectsV2RequestArgumentMatches(request)))) - .thenReturn(result); - - // Expect object metadata queries for content encoding - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setContentEncoding(""); - when(s3FileSystem.getAmazonS3Client().getObjectMetadata(anyObject())).thenReturn(metadata); - - assertThat( - s3FileSystem.matchGlobPaths(ImmutableList.of(path)).get(0), - MatchResultMatcher.create( - ImmutableList.of( - MatchResult.Metadata.builder() - .setIsReadSeekEfficient(true) - .setResourceId( - S3ResourceId.fromComponents( - "s3", firstMatch.getBucketName(), firstMatch.getKey())) - .setSizeBytes(firstMatch.getSize()) - .setLastModifiedMillis(firstMatch.getLastModified().getTime()) - .build()))); - } - - @Test - public void matchVariousInvokeThreadPool() throws IOException { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("s3")); - - AmazonS3Exception notFoundException = new AmazonS3Exception("mock exception"); - notFoundException.setStatusCode(404); - S3ResourceId pathNotExist = - S3ResourceId.fromUri("s3://testbucket/testdirectory/nonexistentfile"); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest( - pathNotExist.getBucket(), pathNotExist.getKey()))))) - .thenThrow(notFoundException); - - AmazonS3Exception forbiddenException = new AmazonS3Exception("mock exception"); - forbiddenException.setStatusCode(403); - S3ResourceId pathForbidden = - S3ResourceId.fromUri("s3://testbucket/testdirectory/forbiddenfile"); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest( - pathForbidden.getBucket(), pathForbidden.getKey()))))) - .thenThrow(forbiddenException); - - S3ResourceId pathExist = S3ResourceId.fromUri("s3://testbucket/testdirectory/filethatexists"); - ObjectMetadata s3ObjectMetadata = new ObjectMetadata(); - s3ObjectMetadata.setContentLength(100); - s3ObjectMetadata.setLastModified(new Date(1540000000000L)); - s3ObjectMetadata.setContentEncoding("not-gzip"); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(pathExist.getBucket(), pathExist.getKey()))))) - .thenReturn(s3ObjectMetadata); - - S3ResourceId pathGlob = S3ResourceId.fromUri("s3://testbucket/path/part*"); - - S3ObjectSummary foundListObject = new S3ObjectSummary(); - foundListObject.setBucketName(pathGlob.getBucket()); - foundListObject.setKey("path/part-0"); - foundListObject.setSize(200); - foundListObject.setLastModified(new Date(1541000000000L)); - - ListObjectsV2Result listObjectsResult = new ListObjectsV2Result(); - listObjectsResult.setNextContinuationToken(null); - listObjectsResult.getObjectSummaries().add(foundListObject); - when(s3FileSystem.getAmazonS3Client().listObjectsV2(notNull(ListObjectsV2Request.class))) - .thenReturn(listObjectsResult); - - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setContentEncoding(""); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(pathGlob.getBucket(), "path/part-0"))))) - .thenReturn(metadata); - - assertThat( - s3FileSystem.match( - ImmutableList.of( - pathNotExist.toString(), - pathForbidden.toString(), - pathExist.toString(), - pathGlob.toString())), - contains( - MatchResultMatcher.create(MatchResult.Status.NOT_FOUND, new FileNotFoundException()), - MatchResultMatcher.create( - MatchResult.Status.ERROR, new IOException(forbiddenException)), - MatchResultMatcher.create(100, 1540000000000L, pathExist, true), - MatchResultMatcher.create( - 200, - 1541000000000L, - S3ResourceId.fromComponents("s3", pathGlob.getBucket(), foundListObject.getKey()), - true))); - } - - @Test - public void matchVariousInvokeThreadPoolWithS3Options() throws IOException { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Options()); - - AmazonS3Exception notFoundException = new AmazonS3Exception("mock exception"); - notFoundException.setStatusCode(404); - S3ResourceId pathNotExist = - S3ResourceId.fromUri("s3://testbucket/testdirectory/nonexistentfile"); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest( - pathNotExist.getBucket(), pathNotExist.getKey()))))) - .thenThrow(notFoundException); - - AmazonS3Exception forbiddenException = new AmazonS3Exception("mock exception"); - forbiddenException.setStatusCode(403); - S3ResourceId pathForbidden = - S3ResourceId.fromUri("s3://testbucket/testdirectory/forbiddenfile"); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest( - pathForbidden.getBucket(), pathForbidden.getKey()))))) - .thenThrow(forbiddenException); - - S3ResourceId pathExist = S3ResourceId.fromUri("s3://testbucket/testdirectory/filethatexists"); - ObjectMetadata s3ObjectMetadata = new ObjectMetadata(); - s3ObjectMetadata.setContentLength(100); - s3ObjectMetadata.setLastModified(new Date(1540000000000L)); - s3ObjectMetadata.setContentEncoding("not-gzip"); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(pathExist.getBucket(), pathExist.getKey()))))) - .thenReturn(s3ObjectMetadata); - - S3ResourceId pathGlob = S3ResourceId.fromUri("s3://testbucket/path/part*"); - - S3ObjectSummary foundListObject = new S3ObjectSummary(); - foundListObject.setBucketName(pathGlob.getBucket()); - foundListObject.setKey("path/part-0"); - foundListObject.setSize(200); - foundListObject.setLastModified(new Date(1541000000000L)); - - ListObjectsV2Result listObjectsResult = new ListObjectsV2Result(); - listObjectsResult.setNextContinuationToken(null); - listObjectsResult.getObjectSummaries().add(foundListObject); - when(s3FileSystem.getAmazonS3Client().listObjectsV2(notNull(ListObjectsV2Request.class))) - .thenReturn(listObjectsResult); - - ObjectMetadata metadata = new ObjectMetadata(); - metadata.setContentEncoding(""); - when(s3FileSystem - .getAmazonS3Client() - .getObjectMetadata( - argThat( - new GetObjectMetadataRequestMatcher( - new GetObjectMetadataRequest(pathGlob.getBucket(), "path/part-0"))))) - .thenReturn(metadata); - - assertThat( - s3FileSystem.match( - ImmutableList.of( - pathNotExist.toString(), - pathForbidden.toString(), - pathExist.toString(), - pathGlob.toString())), - contains( - MatchResultMatcher.create(MatchResult.Status.NOT_FOUND, new FileNotFoundException()), - MatchResultMatcher.create( - MatchResult.Status.ERROR, new IOException(forbiddenException)), - MatchResultMatcher.create(100, 1540000000000L, pathExist, true), - MatchResultMatcher.create( - 200, - 1541000000000L, - S3ResourceId.fromComponents("s3", pathGlob.getBucket(), foundListObject.getKey()), - true))); - } - - @Test - public void testWriteAndRead() throws IOException { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("s3"), client); - - client.createBucket("testbucket"); - - byte[] writtenArray = new byte[] {0}; - ByteBuffer bb = ByteBuffer.allocate(writtenArray.length); - bb.put(writtenArray); - - // First create an object and write data to it - S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/foo/bar.txt"); - WritableByteChannel writableByteChannel = - s3FileSystem.create( - path, - CreateOptions.StandardCreateOptions.builder().setMimeType("application/text").build()); - writableByteChannel.write(bb); - writableByteChannel.close(); - - // Now read the same object - ByteBuffer bb2 = ByteBuffer.allocate(writtenArray.length); - ReadableByteChannel open = s3FileSystem.open(path); - open.read(bb2); - - // And compare the content with the one that was written - byte[] readArray = bb2.array(); - assertArrayEquals(readArray, writtenArray); - open.close(); - } - - @Test - public void testWriteAndReadWithS3Options() throws IOException { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Options(), client); - - client.createBucket("testbucket"); - - byte[] writtenArray = new byte[] {0}; - ByteBuffer bb = ByteBuffer.allocate(writtenArray.length); - bb.put(writtenArray); - - // First create an object and write data to it - S3ResourceId path = S3ResourceId.fromUri("s3://testbucket/foo/bar.txt"); - WritableByteChannel writableByteChannel = - s3FileSystem.create( - path, - CreateOptions.StandardCreateOptions.builder().setMimeType("application/text").build()); - writableByteChannel.write(bb); - writableByteChannel.close(); - - // Now read the same object - ByteBuffer bb2 = ByteBuffer.allocate(writtenArray.length); - ReadableByteChannel open = s3FileSystem.open(path); - open.read(bb2); - - // And compare the content with the one that was written - byte[] readArray = bb2.array(); - assertArrayEquals(readArray, writtenArray); - open.close(); - } - - @Test - public void testReportLineageOnBucket() { - verifyLineage("s3://testbucket", ImmutableList.of("testbucket")); - verifyLineage("s3://testbucket/", ImmutableList.of("testbucket")); - verifyLineage("s3://testbucket/foo/bar.txt", ImmutableList.of("testbucket", "foo/bar.txt")); - } - - private void verifyLineage(String uri, List expected) { - S3FileSystem s3FileSystem = buildMockedS3FileSystem(s3Config("mys3"), client); - S3ResourceId path = S3ResourceId.fromUri(uri); - Lineage mockLineage = mock(Lineage.class); - s3FileSystem.reportLineage(path, mockLineage); - verify(mockLineage, times(1)).add("s3", expected); - } - - /** A mockito argument matcher to implement equality on GetObjectMetadataRequest. */ - private static class GetObjectMetadataRequestMatcher - implements ArgumentMatcher { - private final GetObjectMetadataRequest expected; - - GetObjectMetadataRequestMatcher(GetObjectMetadataRequest expected) { - this.expected = expected; - } - - @Override - public boolean matches(GetObjectMetadataRequest obj) { - if (!(obj instanceof GetObjectMetadataRequest)) { - return false; - } - GetObjectMetadataRequest actual = (GetObjectMetadataRequest) obj; - return actual.getBucketName().equals(expected.getBucketName()) - && actual.getKey().equals(expected.getKey()); - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3ResourceIdTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3ResourceIdTest.java deleted file mode 100644 index dd759cb63dbd..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3ResourceIdTest.java +++ /dev/null @@ -1,348 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import static org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions.RESOLVE_DIRECTORY; -import static org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions.RESOLVE_FILE; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotEquals; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.ObjectInputStream; -import java.io.ObjectOutputStream; -import java.util.Arrays; -import java.util.Base64; -import java.util.Calendar; -import java.util.Date; -import java.util.List; -import org.apache.beam.sdk.io.FileSystems; -import org.apache.beam.sdk.io.aws.options.S3Options; -import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions; -import org.apache.beam.sdk.io.fs.ResourceId; -import org.apache.beam.sdk.io.fs.ResourceIdTester; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests {@link S3ResourceId}. */ -@RunWith(JUnit4.class) -public class S3ResourceIdTest { - - @Rule public ExpectedException thrown = ExpectedException.none(); - - static final class TestCase { - - final String baseUri; - final String relativePath; - final StandardResolveOptions resolveOptions; - final String expectedResult; - - TestCase( - String baseUri, - String relativePath, - StandardResolveOptions resolveOptions, - String expectedResult) { - this.baseUri = baseUri; - this.relativePath = relativePath; - this.resolveOptions = resolveOptions; - this.expectedResult = expectedResult; - } - } - - // Each test case is an expected URL, then the components used to build it. - // Empty components result in a double slash. - private static final List PATH_TEST_CASES = - Arrays.asList( - new TestCase("s3://bucket/", "", RESOLVE_DIRECTORY, "s3://bucket/"), - new TestCase("s3://bucket", "", RESOLVE_DIRECTORY, "s3://bucket/"), - new TestCase("s3://bucket", "path/to/dir", RESOLVE_DIRECTORY, "s3://bucket/path/to/dir/"), - new TestCase("s3://bucket", "path/to/object", RESOLVE_FILE, "s3://bucket/path/to/object"), - new TestCase( - "s3://bucket/path/to/dir/", "..", RESOLVE_DIRECTORY, "s3://bucket/path/to/")); - - private S3ResourceId deserializeFromB64(String base64) throws Exception { - ByteArrayInputStream b = new ByteArrayInputStream(Base64.getDecoder().decode(base64)); - try (ObjectInputStream s = new ObjectInputStream(b)) { - return (S3ResourceId) s.readObject(); - } - } - - private String serializeToB64(S3ResourceId r) throws Exception { - ByteArrayOutputStream b = new ByteArrayOutputStream(); - try (ObjectOutputStream s = new ObjectOutputStream(b)) { - s.writeObject(r); - } - return Base64.getEncoder().encodeToString(b.toByteArray()); - } - - @Test - public void testSerialization() throws Exception { - String r1Serialized = - "rO0ABXNyACpvcmcuYXBhY2hlLmJlYW0uc2RrLmlvLmF3cy5zMy5TM1Jlc291cmNlSWSN8nM8V4cVFwIABEwABmJ1Y2tldHQAEkxqYXZhL2xhbmcvU3RyaW5nO0wAA2tleXEAfgABTAAMbGFzdE1vZGlmaWVkdAAQTGphdmEvdXRpbC9EYXRlO0wABHNpemV0ABBMamF2YS9sYW5nL0xvbmc7eHB0AAZidWNrZXR0AAYvYS9iL2NwcA=="; - String r2Serialized = - "rO0ABXNyACpvcmcuYXBhY2hlLmJlYW0uc2RrLmlvLmF3cy5zMy5TM1Jlc291cmNlSWSN8nM8V4cVFwIABEwABmJ1Y2tldHQAEkxqYXZhL2xhbmcvU3RyaW5nO0wAA2tleXEAfgABTAAMbGFzdE1vZGlmaWVkdAAQTGphdmEvdXRpbC9EYXRlO0wABHNpemV0ABBMamF2YS9sYW5nL0xvbmc7eHB0AAxvdGhlci1idWNrZXR0AAYveC95L3pwc3IADmphdmEubGFuZy5Mb25nO4vkkMyPI98CAAFKAAV2YWx1ZXhyABBqYXZhLmxhbmcuTnVtYmVyhqyVHQuU4IsCAAB4cAAAAAAAAAB7"; - String r3Serialized = - "rO0ABXNyACpvcmcuYXBhY2hlLmJlYW0uc2RrLmlvLmF3cy5zMy5TM1Jlc291cmNlSWSN8nM8V4cVFwIABEwABmJ1Y2tldHQAEkxqYXZhL2xhbmcvU3RyaW5nO0wAA2tleXEAfgABTAAMbGFzdE1vZGlmaWVkdAAQTGphdmEvdXRpbC9EYXRlO0wABHNpemV0ABBMamF2YS9sYW5nL0xvbmc7eHB0AAx0aGlyZC1idWNrZXR0AAkvZm9vL2Jhci9zcgAOamF2YS51dGlsLkRhdGVoaoEBS1l0GQMAAHhwdwgAADgCgmXOAHhw"; - String r4Serialized = - "rO0ABXNyACpvcmcuYXBhY2hlLmJlYW0uc2RrLmlvLmF3cy5zMy5TM1Jlc291cmNlSWSN8nM8V4cVFwIABEwABmJ1Y2tldHQAEkxqYXZhL2xhbmcvU3RyaW5nO0wAA2tleXEAfgABTAAMbGFzdE1vZGlmaWVkdAAQTGphdmEvdXRpbC9EYXRlO0wABHNpemV0ABBMamF2YS9sYW5nL0xvbmc7eHB0AApiYXotYnVja2V0dAAGL2EvYi9jc3IADmphdmEudXRpbC5EYXRlaGqBAUtZdBkDAAB4cHcIAAA33gSV5gB4c3IADmphdmEubGFuZy5Mb25nO4vkkMyPI98CAAFKAAV2YWx1ZXhyABBqYXZhLmxhbmcuTnVtYmVyhqyVHQuU4IsCAAB4cAAAAAAAAAAq"; - - S3ResourceId r1 = S3ResourceId.fromComponents("s3", "bucket", "a/b/c"); - S3ResourceId r2 = S3ResourceId.fromComponents("s3", "other-bucket", "x/y/z").withSize(123); - S3ResourceId r3 = - S3ResourceId.fromComponents("s3", "third-bucket", "foo/bar/") - .withLastModified(new Date(121, Calendar.JULY, 3)); - S3ResourceId r4 = - S3ResourceId.fromComponents("s3", "baz-bucket", "a/b/c") - .withSize(42) - .withLastModified(new Date(116, Calendar.JULY, 15)); - S3ResourceId r5 = S3ResourceId.fromComponents("other-scheme", "bucket", "a/b/c"); - S3ResourceId r6 = - S3ResourceId.fromComponents("other-scheme", "baz-bucket", "foo/bar/") - .withSize(42) - .withLastModified(new Date(116, Calendar.JULY, 5)); - - // S3ResourceIds serialized by previous versions should still deserialize. - assertEquals(r1, deserializeFromB64(r1Serialized)); - assertEquals(r2, deserializeFromB64(r2Serialized)); - assertEquals(r3, deserializeFromB64(r3Serialized)); - assertEquals(r4, deserializeFromB64(r4Serialized)); - - // Current resource IDs should round-trip properly through serialization. - assertEquals(r1, deserializeFromB64(serializeToB64(r1))); - assertEquals(r2, deserializeFromB64(serializeToB64(r2))); - assertEquals(r3, deserializeFromB64(serializeToB64(r3))); - assertEquals(r4, deserializeFromB64(serializeToB64(r4))); - assertEquals(r5, deserializeFromB64(serializeToB64(r5))); - assertEquals(r6, deserializeFromB64(serializeToB64(r6))); - } - - @Test - public void testResolve() { - for (TestCase testCase : PATH_TEST_CASES) { - ResourceId resourceId = S3ResourceId.fromUri(testCase.baseUri); - ResourceId resolved = resourceId.resolve(testCase.relativePath, testCase.resolveOptions); - assertEquals(testCase.expectedResult, resolved.toString()); - } - - // Tests for common s3 paths. - assertEquals( - S3ResourceId.fromUri("s3://bucket/tmp/aa"), - S3ResourceId.fromUri("s3://bucket/tmp/").resolve("aa", RESOLVE_FILE)); - assertEquals( - S3ResourceId.fromUri("s3://bucket/tmp/aa/bb/cc/"), - S3ResourceId.fromUri("s3://bucket/tmp/") - .resolve("aa", RESOLVE_DIRECTORY) - .resolve("bb", RESOLVE_DIRECTORY) - .resolve("cc", RESOLVE_DIRECTORY)); - - // Tests absolute path. - assertEquals( - S3ResourceId.fromUri("s3://bucket/tmp/aa"), - S3ResourceId.fromUri("s3://bucket/tmp/bb/").resolve("s3://bucket/tmp/aa", RESOLVE_FILE)); - - // Tests bucket with no ending '/'. - assertEquals( - S3ResourceId.fromUri("s3://my-bucket/tmp"), - S3ResourceId.fromUri("s3://my-bucket").resolve("tmp", RESOLVE_FILE)); - - // Tests path with unicode - assertEquals( - S3ResourceId.fromUri("s3://bucket/输出 目录/输出 文件01.txt"), - S3ResourceId.fromUri("s3://bucket/输出 目录/").resolve("输出 文件01.txt", RESOLVE_FILE)); - } - - @Test - public void testResolveInvalidInputs() { - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("Cannot resolve a file with a directory path: [tmp/]"); - S3ResourceId.fromUri("s3://my_bucket/").resolve("tmp/", RESOLVE_FILE); - } - - @Test - public void testResolveInvalidNotDirectory() { - ResourceId tmpDir = S3ResourceId.fromUri("s3://my_bucket/").resolve("tmp dir", RESOLVE_FILE); - - thrown.expect(IllegalStateException.class); - thrown.expectMessage( - "Expected this resource to be a directory, but was [s3://my_bucket/tmp dir]"); - tmpDir.resolve("aa", RESOLVE_FILE); - } - - @Test - public void testS3ResolveWithFileBase() { - ResourceId resourceId = S3ResourceId.fromUri("s3://bucket/path/to/file"); - thrown.expect(IllegalStateException.class); - resourceId.resolve("child-path", RESOLVE_DIRECTORY); // resource is not a directory - } - - @Test - public void testResolveParentToFile() { - ResourceId resourceId = S3ResourceId.fromUri("s3://bucket/path/to/dir/"); - thrown.expect(IllegalArgumentException.class); - resourceId.resolve("..", RESOLVE_FILE); // '..' only resolves as dir, not as file - } - - @Test - public void testGetCurrentDirectory() { - // Tests s3 paths. - assertEquals( - S3ResourceId.fromUri("s3://my_bucket/tmp dir/"), - S3ResourceId.fromUri("s3://my_bucket/tmp dir/").getCurrentDirectory()); - - // Tests path with unicode. - assertEquals( - S3ResourceId.fromUri("s3://my_bucket/输出 目录/"), - S3ResourceId.fromUri("s3://my_bucket/输出 目录/文件01.txt").getCurrentDirectory()); - - // Tests bucket with no ending '/'. - assertEquals( - S3ResourceId.fromUri("s3://my_bucket/"), - S3ResourceId.fromUri("s3://my_bucket").getCurrentDirectory()); - assertEquals( - S3ResourceId.fromUri("s3://my_bucket/"), - S3ResourceId.fromUri("s3://my_bucket/not-directory").getCurrentDirectory()); - } - - @Test - public void testIsDirectory() { - assertTrue(S3ResourceId.fromUri("s3://my_bucket/tmp dir/").isDirectory()); - assertTrue(S3ResourceId.fromUri("s3://my_bucket/").isDirectory()); - assertTrue(S3ResourceId.fromUri("s3://my_bucket").isDirectory()); - assertFalse(S3ResourceId.fromUri("s3://my_bucket/file").isDirectory()); - } - - @Test - public void testInvalidPathNoBucket() { - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("Invalid S3 URI: [s3://]"); - S3ResourceId.fromUri("s3://"); - } - - @Test - public void testInvalidPathNoBucketAndSlash() { - thrown.expect(IllegalArgumentException.class); - thrown.expectMessage("Invalid S3 URI: [s3:///]"); - S3ResourceId.fromUri("s3:///"); - } - - @Test - public void testGetScheme() { - // Tests s3 paths. - assertEquals("s3", S3ResourceId.fromUri("s3://my_bucket/tmp dir/").getScheme()); - - // Tests bucket with no ending '/'. - assertEquals("s3", S3ResourceId.fromUri("s3://my_bucket").getScheme()); - } - - @Test - public void testGetFilename() { - assertNull(S3ResourceId.fromUri("s3://my_bucket/").getFilename()); - assertEquals("abc", S3ResourceId.fromUri("s3://my_bucket/abc").getFilename()); - assertEquals("abc", S3ResourceId.fromUri("s3://my_bucket/abc/").getFilename()); - assertEquals("def", S3ResourceId.fromUri("s3://my_bucket/abc/def").getFilename()); - assertEquals("def", S3ResourceId.fromUri("s3://my_bucket/abc/def/").getFilename()); - assertEquals("xyz.txt", S3ResourceId.fromUri("s3://my_bucket/abc/xyz.txt").getFilename()); - } - - @Test - public void testParentRelationship() { - S3ResourceId path = S3ResourceId.fromUri("s3://bucket/dir/subdir/object"); - assertEquals("bucket", path.getBucket()); - assertEquals("dir/subdir/object", path.getKey()); - - // s3://bucket/dir/ - path = S3ResourceId.fromUri("s3://bucket/dir/subdir/"); - S3ResourceId parent = (S3ResourceId) path.resolve("..", RESOLVE_DIRECTORY); - assertEquals("bucket", parent.getBucket()); - assertEquals("dir/", parent.getKey()); - assertNotEquals(path, parent); - assertTrue(path.getKey().startsWith(parent.getKey())); - assertFalse(parent.getKey().startsWith(path.getKey())); - - // s3://bucket/ - S3ResourceId grandParent = (S3ResourceId) parent.resolve("..", RESOLVE_DIRECTORY); - assertEquals("bucket", grandParent.getBucket()); - assertEquals("", grandParent.getKey()); - } - - @Test - public void testBucketParsing() { - S3ResourceId path = S3ResourceId.fromUri("s3://bucket"); - S3ResourceId path2 = S3ResourceId.fromUri("s3://bucket/"); - - assertEquals(path, path2); - assertEquals(path.toString(), path2.toString()); - } - - @Test - public void testS3ResourceIdToString() { - String filename = "s3://some-bucket/some/file.txt"; - S3ResourceId path = S3ResourceId.fromUri(filename); - assertEquals(filename, path.toString()); - - filename = "s3://some-bucket/some/"; - path = S3ResourceId.fromUri(filename); - assertEquals(filename, path.toString()); - - filename = "s3://some-bucket/"; - path = S3ResourceId.fromUri(filename); - assertEquals(filename, path.toString()); - } - - @Test - public void testEquals() { - S3ResourceId a = S3ResourceId.fromComponents("s3", "bucket", "a/b/c"); - S3ResourceId b = S3ResourceId.fromComponents("s3", "bucket", "a/b/c"); - assertEquals(a, b); - - b = S3ResourceId.fromComponents("s3", a.getBucket(), "a/b/c/"); - assertNotEquals(a, b); - - b = S3ResourceId.fromComponents("s3", a.getBucket(), "x/y/z"); - assertNotEquals(a, b); - - b = S3ResourceId.fromComponents("s3", "other-bucket", a.getKey()); - assertNotEquals(a, b); - assertNotEquals(b, a); - - b = S3ResourceId.fromComponents("other", "bucket", "a/b/c"); - assertNotEquals(a, b); - assertNotEquals(b, a); - } - - @Test - public void testInvalidBucket() { - thrown.expect(IllegalArgumentException.class); - S3ResourceId.fromComponents("s3", "invalid/", ""); - } - - @Test - public void testResourceIdTester() { - S3Options options = PipelineOptionsFactory.create().as(S3Options.class); - options.setAwsRegion("us-west-1"); - FileSystems.setDefaultPipelineOptions(options); - ResourceIdTester.runResourceIdBattery(S3ResourceId.fromUri("s3://bucket/foo/")); - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3TestUtils.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3TestUtils.java deleted file mode 100644 index 3df2f10f9c82..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3TestUtils.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.AmazonS3ClientBuilder; -import com.amazonaws.services.s3.model.ObjectMetadata; -import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams; -import com.amazonaws.services.s3.model.SSECustomerKey; -import com.amazonaws.util.Base64; -import org.apache.beam.sdk.io.aws.options.S3Options; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.commons.codec.digest.DigestUtils; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.mockito.Mockito; - -/** Utils to test S3 filesystem. */ -class S3TestUtils { - private static S3FileSystemConfiguration.Builder configBuilder(String scheme) { - S3Options options = PipelineOptionsFactory.as(S3Options.class); - options.setAwsRegion("us-west-1"); - options.setS3UploadBufferSizeBytes(5_242_880); - return S3FileSystemConfiguration.fromS3Options(options).setScheme(scheme); - } - - static S3FileSystemConfiguration s3Config(String scheme) { - return configBuilder(scheme).build(); - } - - static S3Options s3Options() { - S3Options options = PipelineOptionsFactory.as(S3Options.class); - options.setAwsRegion("us-west-1"); - options.setS3UploadBufferSizeBytes(5_242_880); - return options; - } - - static S3Options s3OptionsWithCustomEndpointAndPathStyleAccessEnabled() { - S3Options options = PipelineOptionsFactory.as(S3Options.class); - options.setAwsServiceEndpoint("https://s3.custom.dns"); - options.setAwsRegion("no-matter"); - options.setS3UploadBufferSizeBytes(5_242_880); - options.setS3ClientFactoryClass(PathStyleAccessS3ClientBuilderFactory.class); - return options; - } - - static S3FileSystemConfiguration s3ConfigWithCustomEndpointAndPathStyleAccessEnabled( - String scheme) { - return S3FileSystemConfiguration.fromS3Options( - s3OptionsWithCustomEndpointAndPathStyleAccessEnabled()) - .setScheme(scheme) - .build(); - } - - static S3FileSystemConfiguration s3ConfigWithSSEAlgorithm(String scheme) { - return configBuilder(scheme) - .setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION) - .build(); - } - - static S3Options s3OptionsWithSSEAlgorithm() { - S3Options options = s3Options(); - options.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION); - return options; - } - - static S3FileSystemConfiguration s3ConfigWithSSECustomerKey(String scheme) { - return configBuilder(scheme) - .setSSECustomerKey(new SSECustomerKey("86glyTlCNZgccSxW8JxMa6ZdjdK3N141glAysPUZ3AA=")) - .build(); - } - - static S3Options s3OptionsWithSSECustomerKey() { - S3Options options = s3Options(); - options.setSSECustomerKey(new SSECustomerKey("86glyTlCNZgccSxW8JxMa6ZdjdK3N141glAysPUZ3AA=")); - return options; - } - - static S3FileSystemConfiguration s3ConfigWithSSEAwsKeyManagementParams(String scheme) { - String awsKmsKeyId = - "arn:aws:kms:eu-west-1:123456789012:key/dc123456-7890-ABCD-EF01-234567890ABC"; - SSEAwsKeyManagementParams sseAwsKeyManagementParams = - new SSEAwsKeyManagementParams(awsKmsKeyId); - return configBuilder(scheme) - .setSSEAwsKeyManagementParams(sseAwsKeyManagementParams) - .setBucketKeyEnabled(true) - .build(); - } - - static S3Options s3OptionsWithSSEAwsKeyManagementParams() { - S3Options options = s3Options(); - String awsKmsKeyId = - "arn:aws:kms:eu-west-1:123456789012:key/dc123456-7890-ABCD-EF01-234567890ABC"; - SSEAwsKeyManagementParams sseAwsKeyManagementParams = - new SSEAwsKeyManagementParams(awsKmsKeyId); - options.setSSEAwsKeyManagementParams(sseAwsKeyManagementParams); - options.setBucketKeyEnabled(true); - return options; - } - - static S3FileSystemConfiguration s3ConfigWithMultipleSSEOptions(String scheme) { - return s3ConfigWithSSEAwsKeyManagementParams(scheme) - .toBuilder() - .setSSECustomerKey(new SSECustomerKey("86glyTlCNZgccSxW8JxMa6ZdjdK3N141glAysPUZ3AA=")) - .build(); - } - - static S3Options s3OptionsWithMultipleSSEOptions() { - S3Options options = s3OptionsWithSSEAwsKeyManagementParams(); - options.setSSECustomerKey(new SSECustomerKey("86glyTlCNZgccSxW8JxMa6ZdjdK3N141glAysPUZ3AA=")); - return options; - } - - static S3FileSystem buildMockedS3FileSystem(S3FileSystemConfiguration config) { - return buildMockedS3FileSystem(config, Mockito.mock(AmazonS3.class)); - } - - static S3FileSystem buildMockedS3FileSystem(S3Options options) { - return buildMockedS3FileSystem(options, Mockito.mock(AmazonS3.class)); - } - - static S3FileSystem buildMockedS3FileSystem(S3FileSystemConfiguration config, AmazonS3 client) { - S3FileSystem s3FileSystem = new S3FileSystem(config); - s3FileSystem.setAmazonS3Client(client); - return s3FileSystem; - } - - static S3FileSystem buildMockedS3FileSystem(S3Options options, AmazonS3 client) { - S3FileSystem s3FileSystem = new S3FileSystem(options); - s3FileSystem.setAmazonS3Client(client); - return s3FileSystem; - } - - static @Nullable String toMd5(SSECustomerKey key) { - if (key != null && key.getKey() != null) { - return Base64.encodeAsString(DigestUtils.md5(Base64.decode(key.getKey()))); - } - return null; - } - - static @Nullable String getSSECustomerKeyMd5(S3FileSystemConfiguration config) { - return toMd5(config.getSSECustomerKey()); - } - - static @Nullable String getSSECustomerKeyMd5(S3Options options) { - return toMd5(options.getSSECustomerKey()); - } - - private static class PathStyleAccessS3ClientBuilderFactory extends DefaultS3ClientBuilderFactory { - @Override - public AmazonS3ClientBuilder createBuilder(S3Options s3Options) { - return super.createBuilder(s3Options).withPathStyleAccessEnabled(true); - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3WritableByteChannelTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3WritableByteChannelTest.java deleted file mode 100644 index cb577d860322..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/s3/S3WritableByteChannelTest.java +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.s3; - -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3Config; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3ConfigWithMultipleSSEOptions; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3ConfigWithSSEAlgorithm; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3ConfigWithSSEAwsKeyManagementParams; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3ConfigWithSSECustomerKey; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3Options; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3OptionsWithMultipleSSEOptions; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3OptionsWithSSEAlgorithm; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3OptionsWithSSEAwsKeyManagementParams; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.s3OptionsWithSSECustomerKey; -import static org.apache.beam.sdk.io.aws.s3.S3TestUtils.toMd5; -import static org.apache.beam.sdk.io.aws.s3.S3WritableByteChannel.atMostOne; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.Matchers.notNull; -import static org.mockito.Mockito.RETURNS_SMART_NULLS; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyNoMoreInteractions; -import static org.mockito.Mockito.withSettings; - -import com.amazonaws.services.s3.AmazonS3; -import com.amazonaws.services.s3.model.CompleteMultipartUploadRequest; -import com.amazonaws.services.s3.model.CompleteMultipartUploadResult; -import com.amazonaws.services.s3.model.InitiateMultipartUploadRequest; -import com.amazonaws.services.s3.model.InitiateMultipartUploadResult; -import com.amazonaws.services.s3.model.SSEAwsKeyManagementParams; -import com.amazonaws.services.s3.model.UploadPartRequest; -import com.amazonaws.services.s3.model.UploadPartResult; -import java.io.IOException; -import java.nio.ByteBuffer; -import org.apache.beam.sdk.io.aws.options.S3Options; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests {@link S3WritableByteChannel}. */ -@RunWith(JUnit4.class) -public class S3WritableByteChannelTest { - @Rule public ExpectedException expected = ExpectedException.none(); - - @Test - public void write() throws IOException { - writeFromConfig(s3Config("s3"), false); - writeFromConfig(s3Config("s3"), true); - writeFromConfig(s3ConfigWithSSEAlgorithm("s3"), false); - writeFromConfig(s3ConfigWithSSECustomerKey("s3"), false); - writeFromConfig(s3ConfigWithSSEAwsKeyManagementParams("s3"), false); - expected.expect(IllegalArgumentException.class); - writeFromConfig(s3ConfigWithMultipleSSEOptions("s3"), false); - } - - @Test - public void writeWithS3Options() throws IOException { - writeFromOptions(s3Options(), false); - writeFromOptions(s3Options(), true); - writeFromOptions(s3OptionsWithSSEAlgorithm(), false); - writeFromOptions(s3OptionsWithSSECustomerKey(), false); - writeFromOptions(s3OptionsWithSSEAwsKeyManagementParams(), false); - expected.expect(IllegalArgumentException.class); - writeFromOptions(s3OptionsWithMultipleSSEOptions(), false); - } - - @FunctionalInterface - public interface Supplier { - S3WritableByteChannel get() throws IOException; - } - - private void writeFromOptions(S3Options options, boolean writeReadOnlyBuffer) throws IOException { - AmazonS3 mockAmazonS3 = mock(AmazonS3.class, withSettings().defaultAnswer(RETURNS_SMART_NULLS)); - S3ResourceId path = S3ResourceId.fromUri("s3://bucket/dir/file"); - Supplier channel = - () -> - new S3WritableByteChannel( - mockAmazonS3, - path, - "text/plain", - S3FileSystemConfiguration.fromS3Options(options).build()); - write( - mockAmazonS3, - channel, - path, - options.getSSEAlgorithm(), - toMd5(options.getSSECustomerKey()), - options.getSSEAwsKeyManagementParams(), - options.getS3UploadBufferSizeBytes(), - options.getBucketKeyEnabled(), - writeReadOnlyBuffer); - } - - private void writeFromConfig(S3FileSystemConfiguration config, boolean writeReadOnlyBuffer) - throws IOException { - AmazonS3 mockAmazonS3 = mock(AmazonS3.class, withSettings().defaultAnswer(RETURNS_SMART_NULLS)); - S3ResourceId path = S3ResourceId.fromUri("s3://bucket/dir/file"); - Supplier channel = () -> new S3WritableByteChannel(mockAmazonS3, path, "text/plain", config); - write( - mockAmazonS3, - channel, - path, - config.getSSEAlgorithm(), - toMd5(config.getSSECustomerKey()), - config.getSSEAwsKeyManagementParams(), - config.getS3UploadBufferSizeBytes(), - config.getBucketKeyEnabled(), - writeReadOnlyBuffer); - } - - private void write( - AmazonS3 mockAmazonS3, - Supplier channelSupplier, - S3ResourceId path, - String sseAlgorithm, - String sseCustomerKeyMd5, - SSEAwsKeyManagementParams sseAwsKeyManagementParams, - long s3UploadBufferSizeBytes, - boolean bucketKeyEnabled, - boolean writeReadOnlyBuffer) - throws IOException { - InitiateMultipartUploadResult initiateMultipartUploadResult = - new InitiateMultipartUploadResult(); - initiateMultipartUploadResult.setUploadId("upload-id"); - if (sseAlgorithm != null) { - initiateMultipartUploadResult.setSSEAlgorithm(sseAlgorithm); - } - if (sseCustomerKeyMd5 != null) { - initiateMultipartUploadResult.setSSECustomerKeyMd5(sseCustomerKeyMd5); - } - if (sseAwsKeyManagementParams != null) { - sseAlgorithm = "aws:kms"; - initiateMultipartUploadResult.setSSEAlgorithm(sseAlgorithm); - } - initiateMultipartUploadResult.setBucketKeyEnabled(bucketKeyEnabled); - doReturn(initiateMultipartUploadResult) - .when(mockAmazonS3) - .initiateMultipartUpload(any(InitiateMultipartUploadRequest.class)); - - InitiateMultipartUploadResult mockInitiateMultipartUploadResult = - mockAmazonS3.initiateMultipartUpload( - new InitiateMultipartUploadRequest(path.getBucket(), path.getKey())); - assertEquals(sseAlgorithm, mockInitiateMultipartUploadResult.getSSEAlgorithm()); - assertEquals(bucketKeyEnabled, mockInitiateMultipartUploadResult.getBucketKeyEnabled()); - assertEquals(sseCustomerKeyMd5, mockInitiateMultipartUploadResult.getSSECustomerKeyMd5()); - - UploadPartResult result = new UploadPartResult(); - result.setETag("etag"); - if (sseCustomerKeyMd5 != null) { - result.setSSECustomerKeyMd5(sseCustomerKeyMd5); - } - doReturn(result).when(mockAmazonS3).uploadPart(any(UploadPartRequest.class)); - - UploadPartResult mockUploadPartResult = mockAmazonS3.uploadPart(new UploadPartRequest()); - assertEquals(sseCustomerKeyMd5, mockUploadPartResult.getSSECustomerKeyMd5()); - - int contentSize = 34_078_720; - ByteBuffer uploadContent = ByteBuffer.allocate((int) (contentSize * 2.5)); - for (int i = 0; i < contentSize; i++) { - uploadContent.put((byte) 0xff); - } - uploadContent.flip(); - - S3WritableByteChannel channel = channelSupplier.get(); - int uploadedSize = - channel.write(writeReadOnlyBuffer ? uploadContent.asReadOnlyBuffer() : uploadContent); - assertEquals(contentSize, uploadedSize); - - CompleteMultipartUploadResult completeMultipartUploadResult = - new CompleteMultipartUploadResult(); - doReturn(completeMultipartUploadResult) - .when(mockAmazonS3) - .completeMultipartUpload(any(CompleteMultipartUploadRequest.class)); - - channel.close(); - - verify(mockAmazonS3, times(2)) - .initiateMultipartUpload(notNull(InitiateMultipartUploadRequest.class)); - int partQuantity = (int) Math.ceil((double) contentSize / s3UploadBufferSizeBytes) + 1; - verify(mockAmazonS3, times(partQuantity)).uploadPart(notNull(UploadPartRequest.class)); - verify(mockAmazonS3, times(1)) - .completeMultipartUpload(notNull(CompleteMultipartUploadRequest.class)); - verifyNoMoreInteractions(mockAmazonS3); - } - - @Test - public void testAtMostOne() { - assertTrue(atMostOne(true)); - assertTrue(atMostOne(false)); - assertFalse(atMostOne(true, true)); - assertTrue(atMostOne(true, false)); - assertTrue(atMostOne(false, true)); - assertTrue(atMostOne(false, false)); - assertFalse(atMostOne(true, true, true)); - assertFalse(atMostOne(true, true, false)); - assertFalse(atMostOne(true, false, true)); - assertTrue(atMostOne(true, false, false)); - assertFalse(atMostOne(false, true, true)); - assertTrue(atMostOne(false, true, false)); - assertTrue(atMostOne(false, false, true)); - assertTrue(atMostOne(false, false, false)); - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sns/PublishResultCodersTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sns/PublishResultCodersTest.java deleted file mode 100644 index e8f8643cbbd4..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sns/PublishResultCodersTest.java +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sns; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; - -import com.amazonaws.ResponseMetadata; -import com.amazonaws.http.HttpResponse; -import com.amazonaws.http.SdkHttpMetadata; -import com.amazonaws.services.sns.model.PublishResult; -import java.util.UUID; -import org.apache.beam.sdk.testing.CoderProperties; -import org.apache.beam.sdk.util.CoderUtils; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.junit.Test; - -/** Tests for PublishResult coders. */ -public class PublishResultCodersTest { - - @Test - public void testDefaultPublishResultDecodeEncodeEquals() throws Exception { - CoderProperties.coderDecodeEncodeEqual( - PublishResultCoders.defaultPublishResult(), - new PublishResult().withMessageId(UUID.randomUUID().toString())); - } - - @Test - public void testFullPublishResultWithoutHeadersDecodeEncodeEquals() throws Exception { - CoderProperties.coderDecodeEncodeEqual( - PublishResultCoders.fullPublishResultWithoutHeaders(), - new PublishResult().withMessageId(UUID.randomUUID().toString())); - - PublishResult value = buildFullPublishResult(); - PublishResult clone = - CoderUtils.clone(PublishResultCoders.fullPublishResultWithoutHeaders(), value); - assertThat( - clone.getSdkResponseMetadata().getRequestId(), - equalTo(value.getSdkResponseMetadata().getRequestId())); - assertThat( - clone.getSdkHttpMetadata().getHttpStatusCode(), - equalTo(value.getSdkHttpMetadata().getHttpStatusCode())); - assertThat(clone.getSdkHttpMetadata().getHttpHeaders().isEmpty(), equalTo(true)); - } - - @Test - public void testFullPublishResultIncludingHeadersDecodeEncodeEquals() throws Exception { - CoderProperties.coderDecodeEncodeEqual( - PublishResultCoders.fullPublishResult(), - new PublishResult().withMessageId(UUID.randomUUID().toString())); - - PublishResult value = buildFullPublishResult(); - PublishResult clone = CoderUtils.clone(PublishResultCoders.fullPublishResult(), value); - assertThat( - clone.getSdkResponseMetadata().getRequestId(), - equalTo(value.getSdkResponseMetadata().getRequestId())); - assertThat( - clone.getSdkHttpMetadata().getHttpStatusCode(), - equalTo(value.getSdkHttpMetadata().getHttpStatusCode())); - assertThat( - clone.getSdkHttpMetadata().getHttpHeaders(), - equalTo(value.getSdkHttpMetadata().getHttpHeaders())); - } - - private PublishResult buildFullPublishResult() { - PublishResult publishResult = new PublishResult().withMessageId(UUID.randomUUID().toString()); - publishResult.setSdkResponseMetadata( - new ResponseMetadata( - ImmutableMap.of(ResponseMetadata.AWS_REQUEST_ID, UUID.randomUUID().toString()))); - HttpResponse httpResponse = new HttpResponse(null, null); - httpResponse.setStatusCode(200); - httpResponse.addHeader("Content-Type", "application/json"); - publishResult.setSdkHttpMetadata(SdkHttpMetadata.from(httpResponse)); - return publishResult; - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sns/SnsIOIT.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sns/SnsIOIT.java deleted file mode 100644 index c19aada628fa..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sns/SnsIOIT.java +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sns; - -import static org.apache.beam.sdk.io.common.IOITHelper.executeWithRetry; -import static org.apache.beam.sdk.io.common.TestRow.getExpectedHashForRowCount; -import static org.apache.beam.sdk.values.TypeDescriptors.strings; -import static org.testcontainers.containers.localstack.LocalStackContainer.Service.SNS; -import static org.testcontainers.containers.localstack.LocalStackContainer.Service.SQS; - -import com.amazonaws.regions.Regions; -import com.amazonaws.services.sns.AmazonSNS; -import com.amazonaws.services.sns.AmazonSNSClientBuilder; -import com.amazonaws.services.sns.model.PublishRequest; -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.AmazonSQSClientBuilder; -import com.amazonaws.services.sqs.model.Message; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.io.Serializable; -import org.apache.beam.sdk.io.GenerateSequence; -import org.apache.beam.sdk.io.aws.ITEnvironment; -import org.apache.beam.sdk.io.aws.sqs.SqsIO; -import org.apache.beam.sdk.io.common.HashingFn; -import org.apache.beam.sdk.io.common.TestRow; -import org.apache.beam.sdk.io.common.TestRow.DeterministicallyConstructTestRowFn; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExternalResource; -import org.junit.rules.Timeout; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; -import org.testcontainers.containers.localstack.LocalStackContainer.Service; - -@RunWith(JUnit4.class) -public class SnsIOIT { - public interface ITOptions extends ITEnvironment.ITOptions {} - - private static final ObjectMapper MAPPER = new ObjectMapper(); - private static final TypeDescriptor publishRequests = - TypeDescriptor.of(PublishRequest.class); - - @ClassRule - public static ITEnvironment env = - new ITEnvironment<>(new Service[] {SQS, SNS}, ITOptions.class, "SQS_PROVIDER=elasticmq"); - - @Rule public Timeout globalTimeout = Timeout.seconds(600); - - @Rule public TestPipeline pipelineWrite = env.createTestPipeline(); - @Rule public TestPipeline pipelineRead = env.createTestPipeline(); - @Rule public AwsResources resources = new AwsResources(); - - @Test - public void testWriteThenRead() { - ITOptions opts = env.options(); - int rows = opts.getNumberOfRows(); - - // Write test dataset to SNS - - pipelineWrite - .apply("Generate Sequence", GenerateSequence.from(0).to(rows)) - .apply("Prepare TestRows", ParDo.of(new DeterministicallyConstructTestRowFn())) - .apply("SNS request", MapElements.into(publishRequests).via(resources::publishRequest)) - .apply( - "Write to SNS", - SnsIO.write() - .withTopicName(resources.snsTopic) - .withResultOutputTag(new TupleTag<>()) - .withAWSClientsProvider( - opts.getAwsCredentialsProvider().getCredentials().getAWSAccessKeyId(), - opts.getAwsCredentialsProvider().getCredentials().getAWSSecretKey(), - Regions.fromName(opts.getAwsRegion()), - opts.getAwsServiceEndpoint())); - - // Read test dataset from SQS. - PCollection output = - pipelineRead - .apply( - "Read from SQS", - SqsIO.read().withQueueUrl(resources.sqsQueue).withMaxNumRecords(rows)) - .apply("Extract message", MapElements.into(strings()).via(SnsIOIT::extractMessage)); - - PAssert.thatSingleton(output.apply("Count All", Count.globally())).isEqualTo((long) rows); - - PAssert.that(output.apply(Combine.globally(new HashingFn()).withoutDefaults())) - .containsInAnyOrder(getExpectedHashForRowCount(rows)); - - pipelineWrite.run(); - pipelineRead.run(); - } - - private static String extractMessage(Message msg) { - try { - return MAPPER.readTree(msg.getBody()).get("Message").asText(); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - } - - private static class AwsResources extends ExternalResource implements Serializable { - private transient AmazonSQS sqs = env.buildClient(AmazonSQSClientBuilder.standard()); - private transient AmazonSNS sns = env.buildClient(AmazonSNSClientBuilder.standard()); - - private String sqsQueue; - private String snsTopic; - private String sns2Sqs; - - PublishRequest publishRequest(TestRow r) { - return new PublishRequest(snsTopic, r.name()); - } - - @Override - protected void before() throws Throwable { - snsTopic = sns.createTopic("beam-snsio-it").getTopicArn(); - // add SQS subscription so we can read the messages again - sqsQueue = sqs.createQueue("beam-snsio-it").getQueueUrl(); - sns2Sqs = sns.subscribe(snsTopic, "sqs", sqsQueue).getSubscriptionArn(); - } - - @Override - protected void after() { - try { - executeWithRetry(() -> sns.unsubscribe(sns2Sqs)); - executeWithRetry(() -> sns.deleteTopic(snsTopic)); - executeWithRetry(() -> sqs.deleteQueue(sqsQueue)); - } catch (Exception e) { - throw new RuntimeException(e); - } finally { - sns.shutdown(); - sqs.shutdown(); - } - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sns/SnsIOTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sns/SnsIOTest.java deleted file mode 100644 index f86c0851a01c..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sns/SnsIOTest.java +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sns; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.joda.time.Duration.millis; -import static org.joda.time.Duration.standardSeconds; - -import com.amazonaws.http.SdkHttpMetadata; -import com.amazonaws.services.sns.AmazonSNS; -import com.amazonaws.services.sns.model.GetTopicAttributesResult; -import com.amazonaws.services.sns.model.InternalErrorException; -import com.amazonaws.services.sns.model.PublishRequest; -import com.amazonaws.services.sns.model.PublishResult; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.Serializable; -import java.util.HashMap; -import java.util.UUID; -import org.apache.beam.sdk.Pipeline; -import org.apache.beam.sdk.coders.AtomicCoder; -import org.apache.beam.sdk.coders.CoderException; -import org.apache.beam.sdk.testing.ExpectedLogs; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PCollectionTuple; -import org.apache.beam.sdk.values.TupleTag; -import org.apache.beam.sdk.values.TypeDescriptors; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; -import org.mockito.Mockito; -import org.slf4j.helpers.MessageFormatter; - -/** Tests to verify writes to Sns. */ -@RunWith(JUnit4.class) -public class SnsIOTest implements Serializable { - - private static final String topicName = "arn:aws:sns:us-west-2:5880:topic-FMFEHJ47NRFO"; - - @Rule public TestPipeline p = TestPipeline.create(); - - @Rule - public final transient ExpectedLogs snsWriterFnLogs = - ExpectedLogs.none(SnsIO.Write.SnsWriterFn.class); - - private static PublishRequest createSampleMessage(String message) { - return new PublishRequest().withTopicArn(topicName).withMessage(message); - } - - private static class Provider implements AwsClientsProvider { - - private static AmazonSNS publisher; - - public Provider(AmazonSNS pub) { - publisher = pub; - } - - @Override - public AmazonSNS createSnsPublisher() { - return publisher; - } - } - - @Test - public void testDataWritesToSNS() { - final PublishRequest request1 = createSampleMessage("my_first_message"); - final PublishRequest request2 = createSampleMessage("my_second_message"); - - final TupleTag results = new TupleTag<>(); - final AmazonSNS amazonSnsSuccess = getAmazonSnsMockSuccess(); - - final PCollectionTuple snsWrites = - p.apply(Create.of(request1, request2)) - .apply( - SnsIO.write() - .withTopicName(topicName) - .withRetryConfiguration( - SnsIO.RetryConfiguration.create( - 5, org.joda.time.Duration.standardMinutes(1))) - .withAWSClientsProvider(new Provider(amazonSnsSuccess)) - .withResultOutputTag(results)); - - final PCollection publishedResultsSize = snsWrites.get(results).apply(Count.globally()); - PAssert.that(publishedResultsSize).containsInAnyOrder(ImmutableList.of(2L)); - p.run().waitUntilFinish(); - } - - @Rule public ExpectedException thrown = ExpectedException.none(); - - @Test - public void testRetries() throws Throwable { - thrown.expect(IOException.class); - thrown.expectMessage("Error writing to SNS"); - thrown.expectMessage("No more attempts allowed"); - - final PublishRequest request1 = createSampleMessage("my message that will not be published"); - final TupleTag results = new TupleTag<>(); - final AmazonSNS amazonSnsErrors = getAmazonSnsMockErrors(); - p.apply(Create.of(request1)) - .apply( - SnsIO.write() - .withTopicName(topicName) - .withRetryConfiguration( - SnsIO.RetryConfiguration.create(4, standardSeconds(10), millis(1))) - .withAWSClientsProvider(new Provider(amazonSnsErrors)) - .withResultOutputTag(results)); - - try { - p.run(); - } catch (final Pipeline.PipelineExecutionException e) { - // check 3 retries were initiated by inspecting the log before passing on the exception - snsWriterFnLogs.verifyWarn( - MessageFormatter.format(SnsIO.Write.SnsWriterFn.RETRY_ATTEMPT_LOG, 1).getMessage()); - snsWriterFnLogs.verifyWarn( - MessageFormatter.format(SnsIO.Write.SnsWriterFn.RETRY_ATTEMPT_LOG, 2).getMessage()); - snsWriterFnLogs.verifyWarn( - MessageFormatter.format(SnsIO.Write.SnsWriterFn.RETRY_ATTEMPT_LOG, 3).getMessage()); - throw e.getCause(); - } - } - - @Test - public void testCustomCoder() throws Exception { - final PublishRequest request1 = createSampleMessage("my_first_message"); - - final TupleTag results = new TupleTag<>(); - final AmazonSNS amazonSnsSuccess = getAmazonSnsMockSuccess(); - final MockCoder mockCoder = new MockCoder(); - - final PCollectionTuple snsWrites = - p.apply(Create.of(request1)) - .apply( - SnsIO.write() - .withTopicName(topicName) - .withAWSClientsProvider(new Provider(amazonSnsSuccess)) - .withResultOutputTag(results) - .withCoder(mockCoder)); - - final PCollection publishedResultsSize = - snsWrites - .get(results) - .apply(MapElements.into(TypeDescriptors.strings()).via(result -> result.getMessageId())) - .apply(Count.globally()); - PAssert.that(publishedResultsSize).containsInAnyOrder(ImmutableList.of(1L)); - p.run().waitUntilFinish(); - assertThat(mockCoder.captured).isNotNull(); - } - - // Hand-code mock because Mockito mocks cause NotSerializableException even with - // withSettings().serializable(). - private static class MockCoder extends AtomicCoder { - - private PublishResult captured; - - @Override - public void encode(PublishResult value, OutputStream outStream) - throws CoderException, IOException { - this.captured = value; - PublishResultCoders.defaultPublishResult().encode(value, outStream); - } - - @Override - public PublishResult decode(InputStream inStream) throws CoderException, IOException { - return PublishResultCoders.defaultPublishResult().decode(inStream); - } - }; - - private static AmazonSNS getAmazonSnsMockSuccess() { - final AmazonSNS amazonSNS = Mockito.mock(AmazonSNS.class); - configureAmazonSnsMock(amazonSNS); - - final PublishResult result = Mockito.mock(PublishResult.class); - final SdkHttpMetadata metadata = Mockito.mock(SdkHttpMetadata.class); - Mockito.when(metadata.getHttpHeaders()).thenReturn(new HashMap<>()); - Mockito.when(metadata.getHttpStatusCode()).thenReturn(200); - Mockito.when(result.getSdkHttpMetadata()).thenReturn(metadata); - Mockito.when(result.getMessageId()).thenReturn(UUID.randomUUID().toString()); - Mockito.when(amazonSNS.publish(Mockito.any())).thenReturn(result); - return amazonSNS; - } - - private static AmazonSNS getAmazonSnsMockErrors() { - final AmazonSNS amazonSNS = Mockito.mock(AmazonSNS.class); - configureAmazonSnsMock(amazonSNS); - - Mockito.when(amazonSNS.publish(Mockito.any())) - .thenThrow(new InternalErrorException("Service unavailable")); - return amazonSNS; - } - - private static void configureAmazonSnsMock(AmazonSNS amazonSNS) { - final GetTopicAttributesResult result = Mockito.mock(GetTopicAttributesResult.class); - final SdkHttpMetadata metadata = Mockito.mock(SdkHttpMetadata.class); - Mockito.when(metadata.getHttpHeaders()).thenReturn(new HashMap<>()); - Mockito.when(metadata.getHttpStatusCode()).thenReturn(200); - Mockito.when(result.getSdkHttpMetadata()).thenReturn(metadata); - Mockito.when(amazonSNS.getTopicAttributes(Mockito.anyString())).thenReturn(result); - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/EmbeddedSqsServer.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/EmbeddedSqsServer.java deleted file mode 100644 index 543df65bbe11..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/EmbeddedSqsServer.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sqs; - -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.AmazonSQSClientBuilder; -import com.amazonaws.services.sqs.model.CreateQueueResult; -import org.elasticmq.rest.sqs.SQSRestServer; -import org.elasticmq.rest.sqs.SQSRestServerBuilder; -import org.junit.rules.ExternalResource; - -class EmbeddedSqsServer extends ExternalResource { - - private SQSRestServer sqsRestServer; - private AmazonSQS client; - private String queueUrl; - - @Override - protected void before() { - sqsRestServer = SQSRestServerBuilder.withDynamicPort().start(); - int port = sqsRestServer.waitUntilStarted().localAddress().getPort(); - - String endpoint = String.format("http://localhost:%d", port); - String region = "elasticmq"; - String accessKey = "x"; - String secretKey = "x"; - - client = - AmazonSQSClientBuilder.standard() - .withCredentials( - new AWSStaticCredentialsProvider(new BasicAWSCredentials(accessKey, secretKey))) - .withEndpointConfiguration(new AwsClientBuilder.EndpointConfiguration(endpoint, region)) - .build(); - final CreateQueueResult queue = client.createQueue("test"); - queueUrl = queue.getQueueUrl(); - } - - @Override - protected void after() { - sqsRestServer.stopAndWait(); - client.shutdown(); - } - - public AmazonSQS getClient() { - return client; - } - - public String getQueueUrl() { - return queueUrl; - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsIOIT.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsIOIT.java deleted file mode 100644 index a44cb29a1abc..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsIOIT.java +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sqs; - -import static org.apache.beam.sdk.io.common.TestRow.getExpectedHashForRowCount; -import static org.apache.beam.sdk.values.TypeDescriptors.strings; -import static org.testcontainers.containers.localstack.LocalStackContainer.Service.SQS; - -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.AmazonSQSClientBuilder; -import com.amazonaws.services.sqs.model.Message; -import com.amazonaws.services.sqs.model.SendMessageRequest; -import java.io.Serializable; -import org.apache.beam.sdk.io.GenerateSequence; -import org.apache.beam.sdk.io.aws.ITEnvironment; -import org.apache.beam.sdk.io.common.HashingFn; -import org.apache.beam.sdk.io.common.TestRow; -import org.apache.beam.sdk.io.common.TestRow.DeterministicallyConstructTestRowFn; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExternalResource; -import org.junit.rules.Timeout; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -@RunWith(JUnit4.class) -public class SqsIOIT { - public interface SqsITOptions extends ITEnvironment.ITOptions {} - - private static final TypeDescriptor requestType = - TypeDescriptor.of(SendMessageRequest.class); - - @ClassRule - public static ITEnvironment env = - new ITEnvironment<>(SQS, SqsITOptions.class, "SQS_PROVIDER=elasticmq"); - - @Rule public Timeout globalTimeout = Timeout.seconds(600); - - @Rule public TestPipeline pipelineWrite = env.createTestPipeline(); - @Rule public TestPipeline pipelineRead = env.createTestPipeline(); - @Rule public SqsQueue sqsQueue = new SqsQueue(); - - @Test - public void testWriteThenRead() { - int rows = env.options().getNumberOfRows(); - - // Write test dataset to SQS. - pipelineWrite - .apply("Generate Sequence", GenerateSequence.from(0).to(rows)) - .apply("Prepare TestRows", ParDo.of(new DeterministicallyConstructTestRowFn())) - .apply("Prepare SQS message", MapElements.into(requestType).via(sqsQueue::messageRequest)) - .apply("Write to SQS", SqsIO.write()); - - // Read test dataset from SQS. - PCollection output = - pipelineRead - .apply("Read from SQS", SqsIO.read().withQueueUrl(sqsQueue.url).withMaxNumRecords(rows)) - .apply("Extract body", MapElements.into(strings()).via(Message::getBody)); - - PAssert.thatSingleton(output.apply("Count All", Count.globally())).isEqualTo((long) rows); - - PAssert.that(output.apply(Combine.globally(new HashingFn()).withoutDefaults())) - .containsInAnyOrder(getExpectedHashForRowCount(rows)); - - pipelineWrite.run(); - pipelineRead.run(); - } - - private static class SqsQueue extends ExternalResource implements Serializable { - private transient AmazonSQS client = env.buildClient(AmazonSQSClientBuilder.standard()); - private String url; - - SendMessageRequest messageRequest(TestRow r) { - return new SendMessageRequest(url, r.name()); - } - - @Override - protected void before() { - url = client.createQueue("beam-sqsio-it").getQueueUrl(); - } - - @Override - protected void after() { - client.deleteQueue(url); - client.shutdown(); - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsIOTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsIOTest.java deleted file mode 100644 index 23cc56a9438d..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsIOTest.java +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sqs; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.model.Message; -import com.amazonaws.services.sqs.model.ReceiveMessageResult; -import com.amazonaws.services.sqs.model.SendMessageRequest; -import java.util.ArrayList; -import java.util.List; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Create; -import org.junit.Rule; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests on {@link SqsIO}. */ -@RunWith(JUnit4.class) -public class SqsIOTest { - - @Rule public TestPipeline pipeline = TestPipeline.create(); - - @Rule public EmbeddedSqsServer embeddedSqsRestServer = new EmbeddedSqsServer(); - - @Test - public void testWrite() { - final AmazonSQS client = embeddedSqsRestServer.getClient(); - final String queueUrl = embeddedSqsRestServer.getQueueUrl(); - - List messages = new ArrayList<>(); - for (int i = 0; i < 100; i++) { - final SendMessageRequest request = new SendMessageRequest(queueUrl, "This is a test " + i); - messages.add(request); - } - pipeline.apply(Create.of(messages)).apply(SqsIO.write()); - pipeline.run().waitUntilFinish(); - - List received = new ArrayList<>(); - while (received.size() < 100) { - final ReceiveMessageResult receiveMessageResult = client.receiveMessage(queueUrl); - - if (receiveMessageResult.getMessages() != null) { - for (Message message : receiveMessageResult.getMessages()) { - received.add(message.getBody()); - } - } - } - assertEquals(100, received.size()); - for (int i = 0; i < 100; i++) { - assertTrue(received.contains("This is a test " + i)); - } - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsMessageCoderTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsMessageCoderTest.java deleted file mode 100644 index 933028306d8b..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsMessageCoderTest.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sqs; - -import static com.amazonaws.services.sqs.model.MessageSystemAttributeName.SentTimestamp; -import static org.apache.beam.sdk.io.aws.sqs.SqsUnboundedReader.REQUEST_TIME; -import static org.assertj.core.api.Assertions.assertThat; - -import com.amazonaws.services.sqs.model.Message; -import com.amazonaws.services.sqs.model.MessageAttributeValue; -import java.util.Random; -import org.apache.beam.sdk.util.CoderUtils; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.junit.Test; - -public class SqsMessageCoderTest { - - @Test - public void testMessageDecodeEncodeEquals() throws Exception { - Message message = - new Message() - .withMessageId("messageId") - .withReceiptHandle("receiptHandle") - .withBody("body") - .withAttributes( - ImmutableMap.of(SentTimestamp.name(), Long.toString(new Random().nextLong()))) - .withMessageAttributes( - ImmutableMap.of( - REQUEST_TIME, - new MessageAttributeValue() - .withStringValue(Long.toString(new Random().nextLong())))); - - Message clone = CoderUtils.clone(SqsMessageCoder.of(), message); - assertThat(clone).isEqualTo(message); - } - - @Test - public void testVerifyDeterministic() throws Exception { - SqsMessageCoder.of().verifyDeterministic(); // must not throw - } - - @Test - public void testConsistentWithEquals() { - // some attributes might be omitted - assertThat(SqsMessageCoder.of().consistentWithEquals()).isFalse(); - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedReaderTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedReaderTest.java deleted file mode 100644 index a6e986251cb6..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedReaderTest.java +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sqs; - -import static junit.framework.TestCase.assertFalse; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import com.amazonaws.services.sqs.AmazonSQS; -import com.amazonaws.services.sqs.model.Message; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.io.aws.options.AwsOptions; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.util.CoderUtils; -import org.junit.Rule; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests on {@link SqsUnboundedReader}. */ -@RunWith(JUnit4.class) -public class SqsUnboundedReaderTest { - private static final String DATA = "testData"; - - @Rule public TestPipeline pipeline = TestPipeline.create(); - - @Rule public EmbeddedSqsServer embeddedSqsRestServer = new EmbeddedSqsServer(); - - private SqsUnboundedSource source; - - private void setupOneMessage() { - final AmazonSQS client = embeddedSqsRestServer.getClient(); - final String queueUrl = embeddedSqsRestServer.getQueueUrl(); - client.sendMessage(queueUrl, DATA); - source = - new SqsUnboundedSource( - SqsIO.read().withQueueUrl(queueUrl).withMaxNumRecords(1), - new SqsConfiguration(pipeline.getOptions().as(AwsOptions.class)), - SqsMessageCoder.of()); - } - - private void setupMessages(List messages) { - final AmazonSQS client = embeddedSqsRestServer.getClient(); - final String queueUrl = embeddedSqsRestServer.getQueueUrl(); - for (String message : messages) { - client.sendMessage(queueUrl, message); - } - source = - new SqsUnboundedSource( - SqsIO.read().withQueueUrl(queueUrl).withMaxNumRecords(1), - new SqsConfiguration(pipeline.getOptions().as(AwsOptions.class)), - SqsMessageCoder.of()); - } - - @Test - public void testReadOneMessage() throws IOException { - setupOneMessage(); - UnboundedSource.UnboundedReader reader = - source.createReader(pipeline.getOptions(), null); - // Read one message. - assertTrue(reader.start()); - assertEquals(DATA, reader.getCurrent().getBody()); - assertFalse(reader.advance()); - // ACK the message. - UnboundedSource.CheckpointMark checkpoint = reader.getCheckpointMark(); - checkpoint.finalizeCheckpoint(); - reader.close(); - } - - @Test - public void testTimeoutAckAndRereadOneMessage() throws IOException { - setupOneMessage(); - UnboundedSource.UnboundedReader reader = - source.createReader(pipeline.getOptions(), null); - AmazonSQS sqsClient = embeddedSqsRestServer.getClient(); - assertTrue(reader.start()); - assertEquals(DATA, reader.getCurrent().getBody()); - String receiptHandle = reader.getCurrent().getReceiptHandle(); - // Set the message to timeout. - sqsClient.changeMessageVisibility(source.getRead().queueUrl(), receiptHandle, 0); - // We'll now receive the same message again. - assertTrue(reader.advance()); - assertEquals(DATA, reader.getCurrent().getBody()); - assertFalse(reader.advance()); - // Now ACK the message. - UnboundedSource.CheckpointMark checkpoint = reader.getCheckpointMark(); - checkpoint.finalizeCheckpoint(); - reader.close(); - } - - @Test - public void testMultipleReaders() throws IOException { - List incoming = new ArrayList<>(); - for (int i = 0; i < 2; i++) { - incoming.add(String.format("data_%d", i)); - } - setupMessages(incoming); - UnboundedSource.UnboundedReader reader = - source.createReader(pipeline.getOptions(), null); - // Consume two messages, only read one. - assertTrue(reader.start()); - assertEquals("data_0", reader.getCurrent().getBody()); - - // Grab checkpoint. - SqsCheckpointMark checkpoint = (SqsCheckpointMark) reader.getCheckpointMark(); - checkpoint.finalizeCheckpoint(); - assertEquals(1, checkpoint.notYetReadReceipts.size()); - - // Read second message. - assertTrue(reader.advance()); - assertEquals("data_1", reader.getCurrent().getBody()); - - // Restore from checkpoint. - byte[] checkpointBytes = - CoderUtils.encodeToByteArray(source.getCheckpointMarkCoder(), checkpoint); - checkpoint = CoderUtils.decodeFromByteArray(source.getCheckpointMarkCoder(), checkpointBytes); - assertEquals(1, checkpoint.notYetReadReceipts.size()); - - // Re-read second message. - reader = source.createReader(pipeline.getOptions(), checkpoint); - assertTrue(reader.start()); - assertEquals("data_1", reader.getCurrent().getBody()); - - // We are done. - assertFalse(reader.advance()); - - // ACK final message. - checkpoint = (SqsCheckpointMark) reader.getCheckpointMark(); - checkpoint.finalizeCheckpoint(); - reader.close(); - } - - @Test - public void testReadMany() throws IOException { - - HashSet messages = new HashSet<>(); - List incoming = new ArrayList<>(); - for (int i = 0; i < 100; i++) { - String content = String.format("data_%d", i); - messages.add(content); - incoming.add(String.format("data_%d", i)); - } - setupMessages(incoming); - - SqsUnboundedReader reader = - (SqsUnboundedReader) source.createReader(pipeline.getOptions(), null); - - for (int i = 0; i < 100; i++) { - if (i == 0) { - assertTrue(reader.start()); - } else { - assertTrue(reader.advance()); - } - String data = reader.getCurrent().getBody(); - boolean messageNum = messages.remove(data); - // No duplicate messages. - assertTrue(messageNum); - } - // We are done. - assertFalse(reader.advance()); - // We saw each message exactly once. - assertTrue(messages.isEmpty()); - reader.close(); - } - - /** Tests that checkpoints finalized after the reader is closed succeed. */ - @Test - public void testCloseWithActiveCheckpoints() throws Exception { - setupOneMessage(); - UnboundedSource.UnboundedReader reader = - source.createReader(pipeline.getOptions(), null); - reader.start(); - UnboundedSource.CheckpointMark checkpoint = reader.getCheckpointMark(); - reader.close(); - checkpoint.finalizeCheckpoint(); - } -} diff --git a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedSourceTest.java b/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedSourceTest.java deleted file mode 100644 index 58099dc17ee5..000000000000 --- a/sdks/java/io/amazon-web-services/src/test/java/org/apache/beam/sdk/io/aws/sqs/SqsUnboundedSourceTest.java +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.aws.sqs; - -import com.amazonaws.services.sqs.AmazonSQS; -import org.apache.beam.sdk.io.aws.options.AwsOptions; -import org.apache.beam.sdk.testing.CoderProperties; -import org.apache.beam.sdk.testing.TestPipeline; -import org.junit.Rule; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests on {@link SqsUnboundedSource}. */ -@RunWith(JUnit4.class) -public class SqsUnboundedSourceTest { - - private static final String DATA = "testData"; - - @Rule public TestPipeline pipeline = TestPipeline.create(); - - @Rule public EmbeddedSqsServer embeddedSqsRestServer = new EmbeddedSqsServer(); - - @Test - public void testCheckpointCoderIsSane() { - final AmazonSQS client = embeddedSqsRestServer.getClient(); - final String queueUrl = embeddedSqsRestServer.getQueueUrl(); - client.sendMessage(queueUrl, DATA); - SqsUnboundedSource source = - new SqsUnboundedSource( - SqsIO.read().withQueueUrl(queueUrl).withMaxNumRecords(1), - new SqsConfiguration(pipeline.getOptions().as(AwsOptions.class)), - SqsMessageCoder.of()); - CoderProperties.coderSerializable(source.getCheckpointMarkCoder()); - } -} diff --git a/sdks/java/io/kinesis/build.gradle b/sdks/java/io/kinesis/build.gradle deleted file mode 100644 index 60058f4469ad..000000000000 --- a/sdks/java/io/kinesis/build.gradle +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -plugins { id 'org.apache.beam.module' } -applyJavaNature( automaticModuleName: 'org.apache.beam.sdk.io.kinesis') -provideIntegrationTestingDependencies() -enableJavaPerformanceTesting() - -description = "Apache Beam :: SDKs :: Java :: IO :: Kinesis" -ext.summary = "Library to read Kinesis streams." - -test { - maxParallelForks 4 -} - -dependencies { - implementation project(path: ":sdks:java:core", configuration: "shadow") - implementation library.java.aws_java_sdk_cloudwatch - implementation library.java.aws_java_sdk_core - implementation library.java.aws_java_sdk_kinesis - implementation library.java.commons_lang3 - implementation library.java.guava - implementation library.java.joda_time - implementation library.java.slf4j_api - implementation "com.amazonaws:amazon-kinesis-client:1.14.2" - implementation "com.amazonaws:amazon-kinesis-producer:0.14.1" - implementation "commons-lang:commons-lang:2.6" - implementation library.java.vendored_guava_32_1_2_jre - implementation library.java.jackson_core - implementation library.java.jackson_annotations - implementation library.java.jackson_databind - testImplementation project(path: ":sdks:java:io:common") - testImplementation library.java.junit - testImplementation library.java.mockito_core - testImplementation library.java.guava_testlib - testImplementation library.java.powermock - testImplementation library.java.powermock_mockito - testImplementation library.java.testcontainers_localstack - testImplementation "org.assertj:assertj-core:3.11.1" - testRuntimeOnly library.java.slf4j_jdk14 - testRuntimeOnly project(path: ":runners:direct-java", configuration: "shadow") -} diff --git a/sdks/java/io/kinesis/expansion-service/build.gradle b/sdks/java/io/kinesis/expansion-service/build.gradle deleted file mode 100644 index 3bb7317924d7..000000000000 --- a/sdks/java/io/kinesis/expansion-service/build.gradle +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * License); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -apply plugin: 'org.apache.beam.module' -apply plugin: 'application' -mainClassName = "org.apache.beam.sdk.expansion.service.ExpansionService" - -applyJavaNature( - automaticModuleName: 'org.apache.beam.sdk.io.kinesis.expansion.service', - exportJavadoc: false, - validateShadowJar: false, - shadowClosure: {}, -) - -description = "Apache Beam :: SDKs :: Java :: IO :: Kinesis :: Expansion Service" -ext.summary = "Expansion service serving KinesisIO" - -dependencies { - implementation project(":sdks:java:expansion-service") - permitUnusedDeclared project(":sdks:java:expansion-service") // BEAM-11761 - implementation project(":sdks:java:io:kinesis") - permitUnusedDeclared project(":sdks:java:io:kinesis") // BEAM-11761 - runtimeOnly library.java.slf4j_jdk14 -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/AWSClientsProvider.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/AWSClientsProvider.java deleted file mode 100644 index fa3351ccf778..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/AWSClientsProvider.java +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import com.amazonaws.services.cloudwatch.AmazonCloudWatch; -import com.amazonaws.services.kinesis.AmazonKinesis; -import com.amazonaws.services.kinesis.producer.IKinesisProducer; -import com.amazonaws.services.kinesis.producer.KinesisProducerConfiguration; -import java.io.Serializable; - -/** - * Provides instances of AWS clients. - * - *

Please note, that any instance of {@link AWSClientsProvider} must be {@link Serializable} to - * ensure it can be sent to worker machines. - */ -public interface AWSClientsProvider extends Serializable { - AmazonKinesis getKinesisClient(); - - AmazonCloudWatch getCloudWatchClient(); - - IKinesisProducer createKinesisProducer(KinesisProducerConfiguration config); -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/BasicKinesisProvider.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/BasicKinesisProvider.java deleted file mode 100644 index ada59996609e..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/BasicKinesisProvider.java +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.regions.Regions; -import com.amazonaws.services.cloudwatch.AmazonCloudWatch; -import com.amazonaws.services.cloudwatch.AmazonCloudWatchClientBuilder; -import com.amazonaws.services.kinesis.AmazonKinesis; -import com.amazonaws.services.kinesis.AmazonKinesisClientBuilder; -import com.amazonaws.services.kinesis.producer.IKinesisProducer; -import com.amazonaws.services.kinesis.producer.KinesisProducer; -import com.amazonaws.services.kinesis.producer.KinesisProducerConfiguration; -import java.net.URI; -import java.util.Objects; -import org.apache.beam.sdk.io.kinesis.serde.AwsSerializableUtils; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** Basic implementation of {@link AWSClientsProvider} used by default in {@link KinesisIO}. */ -class BasicKinesisProvider implements AWSClientsProvider { - private final String awsCredentialsProviderSerialized; - private final Regions region; - private final @Nullable String serviceEndpoint; - private final boolean verifyCertificate; - - BasicKinesisProvider( - AWSCredentialsProvider awsCredentialsProvider, - Regions region, - @Nullable String serviceEndpoint, - boolean verifyCertificate) { - checkArgument(awsCredentialsProvider != null, "awsCredentialsProvider can not be null"); - checkArgument(region != null, "region can not be null"); - this.awsCredentialsProviderSerialized = AwsSerializableUtils.serialize(awsCredentialsProvider); - checkNotNull(awsCredentialsProviderSerialized, "awsCredentialsProviderString can not be null"); - this.region = region; - this.serviceEndpoint = serviceEndpoint; - this.verifyCertificate = verifyCertificate; - } - - private AWSCredentialsProvider getCredentialsProvider() { - return AwsSerializableUtils.deserialize(awsCredentialsProviderSerialized); - } - - @Override - public AmazonKinesis getKinesisClient() { - AmazonKinesisClientBuilder clientBuilder = - AmazonKinesisClientBuilder.standard().withCredentials(getCredentialsProvider()); - if (serviceEndpoint == null) { - clientBuilder.withRegion(region); - } else { - clientBuilder.withEndpointConfiguration( - new AwsClientBuilder.EndpointConfiguration(serviceEndpoint, region.getName())); - } - return clientBuilder.build(); - } - - @Override - public AmazonCloudWatch getCloudWatchClient() { - AmazonCloudWatchClientBuilder clientBuilder = - AmazonCloudWatchClientBuilder.standard().withCredentials(getCredentialsProvider()); - if (serviceEndpoint == null) { - clientBuilder.withRegion(region); - } else { - clientBuilder.withEndpointConfiguration( - new AwsClientBuilder.EndpointConfiguration(serviceEndpoint, region.getName())); - } - return clientBuilder.build(); - } - - @Override - public IKinesisProducer createKinesisProducer(KinesisProducerConfiguration config) { - config.setRegion(region.getName()); - config.setCredentialsProvider(getCredentialsProvider()); - if (serviceEndpoint != null) { - URI uri = URI.create(serviceEndpoint); - config.setKinesisEndpoint(uri.getHost()); - config.setKinesisPort(uri.getPort()); - } - config.setVerifyCertificate(verifyCertificate); - return new KinesisProducer(config); - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - BasicKinesisProvider that = (BasicKinesisProvider) o; - return verifyCertificate == that.verifyCertificate - && Objects.equals(awsCredentialsProviderSerialized, that.awsCredentialsProviderSerialized) - && Objects.equals(region, that.region) - && Objects.equals(serviceEndpoint, that.serviceEndpoint); - } - - @Override - public int hashCode() { - return Objects.hash( - awsCredentialsProviderSerialized, region, serviceEndpoint, verifyCertificate); - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CheckpointGenerator.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CheckpointGenerator.java deleted file mode 100644 index 08515c7f3457..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CheckpointGenerator.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import java.io.Serializable; - -/** - * Used to generate checkpoint object on demand. How exactly the checkpoint is generated is up to - * implementing class. - */ -interface CheckpointGenerator extends Serializable { - - KinesisReaderCheckpoint generate(SimplifiedKinesisClient client) throws TransientKinesisException; -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CustomOptional.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CustomOptional.java deleted file mode 100644 index 1baeddd3bf8f..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CustomOptional.java +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import java.util.NoSuchElementException; -import java.util.Objects; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** - * Similar to Guava {@code Optional}, but throws {@link NoSuchElementException} for missing element. - */ -abstract class CustomOptional { - - @SuppressWarnings("unchecked") - public static CustomOptional absent() { - return (Absent) Absent.INSTANCE; - } - - public static CustomOptional of(T v) { - return new Present<>(v); - } - - public abstract boolean isPresent(); - - public abstract T get(); - - private static class Present extends CustomOptional { - - private final T value; - - private Present(T value) { - this.value = value; - } - - @Override - public boolean isPresent() { - return true; - } - - @Override - public T get() { - return value; - } - - @Override - public boolean equals(@Nullable Object o) { - if (!(o instanceof Present)) { - return false; - } - - Present present = (Present) o; - return Objects.equals(value, present.value); - } - - @Override - public int hashCode() { - return Objects.hash(value); - } - } - - private static class Absent extends CustomOptional { - - private static final Absent INSTANCE = new Absent<>(); - - private Absent() {} - - @Override - public boolean isPresent() { - return false; - } - - @Override - public T get() { - throw new NoSuchElementException(); - } - - @Override - public boolean equals(@Nullable Object o) { - return o instanceof Absent; - } - - @Override - public int hashCode() { - return 0; - } - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGenerator.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGenerator.java deleted file mode 100644 index 8ef1274947b5..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGenerator.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import com.amazonaws.services.kinesis.model.Shard; -import java.util.List; -import java.util.stream.Collectors; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Creates {@link KinesisReaderCheckpoint}, which spans over all shards in given stream. List of - * shards is obtained dynamically on call to {@link #generate(SimplifiedKinesisClient)}. - */ -class DynamicCheckpointGenerator implements CheckpointGenerator { - - private static final Logger LOG = LoggerFactory.getLogger(DynamicCheckpointGenerator.class); - private final String streamName; - private final StartingPoint startingPoint; - - public DynamicCheckpointGenerator(String streamName, StartingPoint startingPoint) { - this.streamName = streamName; - this.startingPoint = startingPoint; - } - - @Override - public KinesisReaderCheckpoint generate(SimplifiedKinesisClient kinesis) - throws TransientKinesisException { - List streamShards = kinesis.listShardsAtPoint(streamName, startingPoint); - LOG.info( - "Creating a checkpoint with following shards {} at {}", - streamShards, - startingPoint.getTimestamp()); - return new KinesisReaderCheckpoint( - streamShards.stream() - .map(shard -> new ShardCheckpoint(streamName, shard.getShardId(), startingPoint)) - .collect(Collectors.toList())); - } - - @Override - public String toString() { - return String.format("Checkpoint generator for %s: %s", streamName, startingPoint); - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/GetKinesisRecordsResult.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/GetKinesisRecordsResult.java deleted file mode 100644 index 6fefb43dee0f..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/GetKinesisRecordsResult.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import com.amazonaws.services.kinesis.clientlibrary.types.UserRecord; -import java.util.List; -import java.util.stream.Collectors; - -/** Represents the output of 'get' operation on Kinesis stream. */ -class GetKinesisRecordsResult { - - private final List records; - private final String nextShardIterator; - private final long millisBehindLatest; - - public GetKinesisRecordsResult( - List records, - String nextShardIterator, - long millisBehindLatest, - final String streamName, - final String shardId) { - this.records = - records.stream() - .map( - input -> { - assert input != null; // to make FindBugs happy - return new KinesisRecord(input, streamName, shardId); - }) - .collect(Collectors.toList()); - this.nextShardIterator = nextShardIterator; - this.millisBehindLatest = millisBehindLatest; - } - - public List getRecords() { - return records; - } - - public String getNextShardIterator() { - return nextShardIterator; - } - - public long getMillisBehindLatest() { - return millisBehindLatest; - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisClientThrottledException.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisClientThrottledException.java deleted file mode 100644 index 0cf4bdb0d85b..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisClientThrottledException.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import com.amazonaws.AmazonClientException; - -/** Thrown when the Kinesis client was throttled due to rate limits. */ -public class KinesisClientThrottledException extends TransientKinesisException { - - public KinesisClientThrottledException(String s, AmazonClientException e) { - super(s, e); - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisIO.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisIO.java deleted file mode 100644 index 3b64a6e71947..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisIO.java +++ /dev/null @@ -1,1116 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.regions.Regions; -import com.amazonaws.services.cloudwatch.AmazonCloudWatch; -import com.amazonaws.services.kinesis.AmazonKinesis; -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; -import com.amazonaws.services.kinesis.producer.Attempt; -import com.amazonaws.services.kinesis.producer.IKinesisProducer; -import com.amazonaws.services.kinesis.producer.KinesisProducerConfiguration; -import com.amazonaws.services.kinesis.producer.UserRecordFailedException; -import com.amazonaws.services.kinesis.producer.UserRecordResult; -import com.google.auto.value.AutoValue; -import com.google.common.util.concurrent.ListenableFuture; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Properties; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.concurrent.LinkedBlockingDeque; -import java.util.function.Supplier; -import org.apache.beam.sdk.coders.ByteArrayCoder; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.io.Read.Unbounded; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.MapElements; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.values.PBegin; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; -import org.apache.beam.sdk.values.TypeDescriptor; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * {@link PTransform}s for reading from and writing to Kinesis streams. - * - *

Reading from Kinesis

- * - *

Example usages: - * - *

{@code
- * p.apply(KinesisIO.read()
- *     .withStreamName("streamName")
- *     .withInitialPositionInStream(InitialPositionInStream.LATEST)
- *     // using AWS default credentials provider chain (recommended)
- *     .withAWSClientsProvider(DefaultAWSCredentialsProviderChain.getInstance(), STREAM_REGION)
- *  .apply( ... ) // other transformations
- * }
- * - *
{@code
- * p.apply(KinesisIO.read()
- *     .withStreamName("streamName")
- *     .withInitialPositionInStream(InitialPositionInStream.LATEST)
- *     // using plain AWS key and secret
- *     .withAWSClientsProvider("AWS_KEY", "AWS_SECRET", STREAM_REGION)
- *  .apply( ... ) // other transformations
- * }
- * - *

As you can see you need to provide 3 things: - * - *

    - *
  • name of the stream you're going to read - *
  • position in the stream where reading should start. There are two options: - *
      - *
    • {@link InitialPositionInStream#LATEST} - reading will begin from end of the stream - *
    • {@link InitialPositionInStream#TRIM_HORIZON} - reading will begin at the very - * beginning of the stream - *
    - *
  • data used to initialize {@link AmazonKinesis} and {@link AmazonCloudWatch} clients: - *
      - *
    • AWS credentials - *
    • region where the stream is located - *
    - *
- * - *

In case when you want to set up {@link AmazonKinesis} or {@link AmazonCloudWatch} client by - * your own (for example if you're using more sophisticated authorization methods like Amazon STS, - * etc.) you can do it by implementing {@link AWSClientsProvider} class: - * - *

{@code
- * public class MyCustomKinesisClientProvider implements AWSClientsProvider {
- *   public AmazonKinesis getKinesisClient() {
- *     // set up your client here
- *   }
- *
- *   public AmazonCloudWatch getCloudWatchClient() {
- *     // set up your client here
- *   }
- *
- * }
- * }
- * - *

Usage is pretty straightforward: - * - *

{@code
- * p.apply(KinesisIO.read()
- *    .withStreamName("streamName")
- *    .withInitialPositionInStream(InitialPositionInStream.LATEST)
- *    .withAWSClientsProvider(new MyCustomKinesisClientProvider())
- *  .apply( ... ) // other transformations
- * }
- * - *

There's also possibility to start reading using arbitrary point in time - in this case you - * need to provide {@link Instant} object: - * - *

{@code
- * p.apply(KinesisIO.read()
- *     .withStreamName("streamName")
- *     .withInitialTimestampInStream(instant)
- *     .withAWSClientsProvider(new MyCustomKinesisClientProvider())
- *  .apply( ... ) // other transformations
- * }
- * - *

Kinesis IO uses ArrivalTimeWatermarkPolicy by default. To use Processing time as event time: - * - *

{@code
- * p.apply(KinesisIO.read()
- *    .withStreamName("streamName")
- *    .withInitialPositionInStream(InitialPositionInStream.LATEST)
- *    .withProcessingTimeWatermarkPolicy())
- * }
- * - *

It is also possible to specify a custom watermark policy to control watermark computation. - * Below is an example - * - *

{@code
- * // custom policy
- * class MyCustomPolicy implements WatermarkPolicy {
- *     private WatermarkPolicyFactory.CustomWatermarkPolicy customWatermarkPolicy;
- *
- *     MyCustomPolicy() {
- *       this.customWatermarkPolicy = new WatermarkPolicyFactory.CustomWatermarkPolicy(WatermarkParameters.create());
- *     }
- *
- *     public Instant getWatermark() {
- *       return customWatermarkPolicy.getWatermark();
- *     }
- *
- *     public void update(KinesisRecord record) {
- *       customWatermarkPolicy.update(record);
- *     }
- *   }
- *
- * // custom factory
- * class MyCustomPolicyFactory implements WatermarkPolicyFactory {
- *     public WatermarkPolicy createWatermarkPolicy() {
- *       return new MyCustomPolicy();
- *     }
- * }
- *
- * p.apply(KinesisIO.read()
- *    .withStreamName("streamName")
- *    .withInitialPositionInStream(InitialPositionInStream.LATEST)
- *    .withCustomWatermarkPolicy(new MyCustomPolicyFactory())
- * }
- * - *

By default Kinesis IO will poll the Kinesis getRecords() API as fast as possible which may - * lead to excessive read throttling. To limit the rate of getRecords() calls you can set a rate - * limit policy. For example, the default fixed delay policy will limit the rate to one API call per - * second per shard: - * - *

{@code
- * p.apply(KinesisIO.read()
- *    .withStreamName("streamName")
- *    .withInitialPositionInStream(InitialPositionInStream.LATEST)
- *    .withFixedDelayRateLimitPolicy())
- * }
- * - *

You can also use a fixed delay policy with a specified delay interval, for example: - * - *

{@code
- * p.apply(KinesisIO.read()
- *    .withStreamName("streamName")
- *    .withInitialPositionInStream(InitialPositionInStream.LATEST)
- *    .withFixedDelayRateLimitPolicy(Duration.millis(500))
- * }
- * - *

If you need to change the polling interval of a Kinesis pipeline at runtime, for example to - * compensate for adding and removing additional consumers to the stream, then you can supply the - * delay interval as a function so that you can obtain the current delay interval from some external - * source: - * - *

{@code
- * p.apply(KinesisIO.read()
- *    .withStreamName("streamName")
- *    .withInitialPositionInStream(InitialPositionInStream.LATEST)
- *    .withDynamicDelayRateLimitPolicy(() -> Duration.millis())
- * }
- * - *

Finally, you can create a custom rate limit policy that responds to successful read calls - * and/or read throttling exceptions with your own rate-limiting logic: - * - *

{@code
- * // custom policy
- * public class MyCustomPolicy implements RateLimitPolicy {
- *
- *   public void onSuccess(List records) throws InterruptedException {
- *     // handle successful getRecords() call
- *   }
- *
- *   public void onThrottle(KinesisClientThrottledException e) throws InterruptedException {
- *     // handle Kinesis read throttling exception
- *   }
- * }
- *
- * // custom factory
- * class MyCustomPolicyFactory implements RateLimitPolicyFactory {
- *
- *   public RateLimitPolicy getRateLimitPolicy() {
- *     return new MyCustomPolicy();
- *   }
- * }
- *
- * p.apply(KinesisIO.read()
- *    .withStreamName("streamName")
- *    .withInitialPositionInStream(InitialPositionInStream.LATEST)
- *    .withCustomRateLimitPolicy(new MyCustomPolicyFactory())
- * }
- * - *

Writing to Kinesis

- * - *

Example usages: - * - *

{@code
- * PCollection data = ...;
- *
- * data.apply(KinesisIO.write()
- *     .withStreamName("streamName")
- *     .withPartitionKey("partitionKey")
- *     // using AWS default credentials provider chain (recommended)
- *     .withAWSClientsProvider(DefaultAWSCredentialsProviderChain.getInstance(), STREAM_REGION));
- * }
- * - *
{@code
- * PCollection data = ...;
- *
- * data.apply(KinesisIO.write()
- *     .withStreamName("streamName")
- *     .withPartitionKey("partitionKey")
- *      // using plain AWS key and secret
- *     .withAWSClientsProvider("AWS_KEY", "AWS_SECRET", STREAM_REGION));
- * }
- * - *

As a client, you need to provide at least 3 things: - * - *

    - *
  • name of the stream where you're going to write - *
  • partition key (or implementation of {@link KinesisPartitioner}) that defines which - * partition will be used for writing - *
  • data used to initialize {@link AmazonKinesis} and {@link AmazonCloudWatch} clients: - *
      - *
    • AWS credentials - *
    • region where the stream is located - *
    - *
- * - *

In case if you need to define more complicated logic for key partitioning then you can create - * your own implementation of {@link KinesisPartitioner} and set it by {@link - * KinesisIO.Write#withPartitioner(KinesisPartitioner)} - * - *

Internally, {@link KinesisIO.Write} relies on Amazon Kinesis Producer Library (KPL). This - * library can be configured with a set of {@link Properties} if needed. - * - *

Example usage of KPL configuration: - * - *

{@code
- * Properties properties = new Properties();
- * properties.setProperty("KinesisEndpoint", "localhost");
- * properties.setProperty("KinesisPort", "4567");
- *
- * PCollection data = ...;
- *
- * data.apply(KinesisIO.write()
- *     .withStreamName("streamName")
- *     .withPartitionKey("partitionKey")
- *     .withAWSClientsProvider(AWS_KEY, AWS_SECRET, STREAM_REGION)
- *     .withProducerProperties(properties));
- * }
- * - *

For more information about configuratiom parameters, see the sample - * of configuration file. - * - * @deprecated Module beam-sdks-java-io-kinesis is deprecated and will be eventually - * removed. Please migrate to {@link org.apache.beam.sdk.io.aws2.kinesis.KinesisIO} in module - * beam-sdks-java-io-amazon-web-services2. - */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -@Deprecated -public final class KinesisIO { - - private static final Logger LOG = LoggerFactory.getLogger(KinesisIO.class); - - private static final int DEFAULT_NUM_RETRIES = 6; - - /** Returns a new {@link Read} transform for reading from Kinesis. */ - public static Read read() { - return Read.newBuilder().setCoder(KinesisRecordCoder.of()).build(); - } - - /** - * A {@link PTransform} to read from Kinesis stream as bytes without metadata and returns a {@link - * PCollection} of {@link byte[]}. - */ - public static Read readData() { - return Read.newBuilder(KinesisRecord::getDataAsBytes).setCoder(ByteArrayCoder.of()).build(); - } - - /** A {@link PTransform} writing data to Kinesis. */ - public static Write write() { - return new AutoValue_KinesisIO_Write.Builder().setRetries(DEFAULT_NUM_RETRIES).build(); - } - - /** Implementation of {@link #read}. */ - @AutoValue - public abstract static class Read extends PTransform> { - - abstract @Nullable String getStreamName(); - - abstract @Nullable StartingPoint getInitialPosition(); - - abstract @Nullable AWSClientsProvider getAWSClientsProvider(); - - abstract long getMaxNumRecords(); - - abstract @Nullable Duration getMaxReadTime(); - - abstract Duration getUpToDateThreshold(); - - abstract @Nullable Integer getRequestRecordsLimit(); - - abstract WatermarkPolicyFactory getWatermarkPolicyFactory(); - - abstract RateLimitPolicyFactory getRateLimitPolicyFactory(); - - abstract Integer getMaxCapacityPerShard(); - - abstract Coder getCoder(); - - abstract @Nullable SerializableFunction getParseFn(); - - abstract Builder toBuilder(); - - static Builder newBuilder(SerializableFunction parseFn) { - return new AutoValue_KinesisIO_Read.Builder() - .setParseFn(parseFn) - .setMaxNumRecords(Long.MAX_VALUE) - .setUpToDateThreshold(Duration.ZERO) - .setWatermarkPolicyFactory(WatermarkPolicyFactory.withArrivalTimePolicy()) - .setRateLimitPolicyFactory(RateLimitPolicyFactory.withDefaultRateLimiter()) - .setMaxCapacityPerShard(ShardReadersPool.DEFAULT_CAPACITY_PER_SHARD); - } - - static Builder newBuilder() { - return newBuilder(x -> x); - } - - @AutoValue.Builder - abstract static class Builder { - - abstract Builder setStreamName(String streamName); - - abstract Builder setInitialPosition(StartingPoint startingPoint); - - abstract Builder setAWSClientsProvider(AWSClientsProvider clientProvider); - - abstract Builder setMaxNumRecords(long maxNumRecords); - - abstract Builder setMaxReadTime(Duration maxReadTime); - - abstract Builder setUpToDateThreshold(Duration upToDateThreshold); - - abstract Builder setRequestRecordsLimit(Integer limit); - - abstract Builder setWatermarkPolicyFactory(WatermarkPolicyFactory watermarkPolicyFactory); - - abstract Builder setRateLimitPolicyFactory(RateLimitPolicyFactory rateLimitPolicyFactory); - - abstract Builder setMaxCapacityPerShard(Integer maxCapacity); - - abstract Builder setParseFn(SerializableFunction parseFn); - - abstract Builder setCoder(Coder coder); - - abstract Read build(); - } - - /** Specify reading from streamName. */ - public Read withStreamName(String streamName) { - return toBuilder().setStreamName(streamName).build(); - } - - /** Specify reading from some initial position in stream. */ - public Read withInitialPositionInStream(InitialPositionInStream initialPosition) { - return toBuilder().setInitialPosition(new StartingPoint(initialPosition)).build(); - } - - /** - * Specify reading beginning at given {@link Instant}. This {@link Instant} must be in the past, - * i.e. before {@link Instant#now()}. - */ - public Read withInitialTimestampInStream(Instant initialTimestamp) { - return toBuilder().setInitialPosition(new StartingPoint(initialTimestamp)).build(); - } - - /** - * Allows to specify custom {@link AWSClientsProvider}. {@link AWSClientsProvider} provides - * {@link AmazonKinesis} and {@link AmazonCloudWatch} instances which are later used for - * communication with Kinesis. You should use this method if {@link - * Read#withAWSClientsProvider(AWSCredentialsProvider, Regions)} does not suit your needs. - */ - public Read withAWSClientsProvider(AWSClientsProvider awsClientsProvider) { - return toBuilder().setAWSClientsProvider(awsClientsProvider).build(); - } - - /** - * Specify {@link AWSCredentialsProvider} and region to be used to read from Kinesis. If you - * need more sophisticated credential protocol, then you should look at {@link - * Read#withAWSClientsProvider(AWSClientsProvider)}. - */ - public Read withAWSClientsProvider( - AWSCredentialsProvider awsCredentialsProvider, Regions region) { - return withAWSClientsProvider(awsCredentialsProvider, region, null); - } - - /** - * Specify credential details and region to be used to read from Kinesis. If you need more - * sophisticated credential protocol, then you should look at {@link - * Read#withAWSClientsProvider(AWSClientsProvider)}. - */ - public Read withAWSClientsProvider( - String awsAccessKey, String awsSecretKey, Regions region) { - return withAWSClientsProvider(awsAccessKey, awsSecretKey, region, null); - } - - /** - * Specify {@link AWSCredentialsProvider} and region to be used to read from Kinesis. If you - * need more sophisticated credential protocol, then you should look at {@link - * Read#withAWSClientsProvider(AWSClientsProvider)}. - * - *

The {@code serviceEndpoint} sets an alternative service host. This is useful to execute - * the tests with a kinesis service emulator. - */ - public Read withAWSClientsProvider( - AWSCredentialsProvider awsCredentialsProvider, Regions region, String serviceEndpoint) { - return withAWSClientsProvider(awsCredentialsProvider, region, serviceEndpoint, true); - } - - /** - * Specify credential details and region to be used to read from Kinesis. If you need more - * sophisticated credential protocol, then you should look at {@link - * Read#withAWSClientsProvider(AWSClientsProvider)}. - * - *

The {@code serviceEndpoint} sets an alternative service host. This is useful to execute - * the tests with a kinesis service emulator. - */ - public Read withAWSClientsProvider( - String awsAccessKey, String awsSecretKey, Regions region, String serviceEndpoint) { - return withAWSClientsProvider(awsAccessKey, awsSecretKey, region, serviceEndpoint, true); - } - - /** - * Specify {@link AWSCredentialsProvider} and region to be used to read from Kinesis. If you - * need more sophisticated credential protocol, then you should look at {@link - * Read#withAWSClientsProvider(AWSClientsProvider)}. - * - *

The {@code serviceEndpoint} sets an alternative service host. This is useful to execute - * the tests with Kinesis service emulator. - * - *

The {@code verifyCertificate} disables or enables certificate verification. Never set it - * to false in production. - */ - public Read withAWSClientsProvider( - AWSCredentialsProvider awsCredentialsProvider, - Regions region, - String serviceEndpoint, - boolean verifyCertificate) { - return withAWSClientsProvider( - new BasicKinesisProvider( - awsCredentialsProvider, region, serviceEndpoint, verifyCertificate)); - } - - /** - * Specify credential details and region to be used to read from Kinesis. If you need more - * sophisticated credential protocol, then you should look at {@link - * Read#withAWSClientsProvider(AWSClientsProvider)}. - * - *

The {@code serviceEndpoint} sets an alternative service host. This is useful to execute - * the tests with Kinesis service emulator. - * - *

The {@code verifyCertificate} disables or enables certificate verification. Never set it - * to false in production. - */ - public Read withAWSClientsProvider( - String awsAccessKey, - String awsSecretKey, - Regions region, - String serviceEndpoint, - boolean verifyCertificate) { - AWSCredentialsProvider awsCredentialsProvider = - new AWSStaticCredentialsProvider(new BasicAWSCredentials(awsAccessKey, awsSecretKey)); - return withAWSClientsProvider( - awsCredentialsProvider, region, serviceEndpoint, verifyCertificate); - } - - /** Specifies to read at most a given number of records. */ - public Read withMaxNumRecords(long maxNumRecords) { - checkArgument( - maxNumRecords > 0, "maxNumRecords must be positive, but was: %s", maxNumRecords); - return toBuilder().setMaxNumRecords(maxNumRecords).build(); - } - - /** Specifies to read records during {@code maxReadTime}. */ - public Read withMaxReadTime(Duration maxReadTime) { - checkArgument(maxReadTime != null, "maxReadTime can not be null"); - return toBuilder().setMaxReadTime(maxReadTime).build(); - } - - /** - * Specifies how late records consumed by this source can be to still be considered on time. - * When this limit is exceeded the actual backlog size will be evaluated and the runner might - * decide to scale the amount of resources allocated to the pipeline in order to speed up - * ingestion. - */ - public Read withUpToDateThreshold(Duration upToDateThreshold) { - checkArgument(upToDateThreshold != null, "upToDateThreshold can not be null"); - return toBuilder().setUpToDateThreshold(upToDateThreshold).build(); - } - - /** - * Specifies the maximum number of records in GetRecordsResult returned by GetRecords call which - * is limited by 10K records. If should be adjusted according to average size of data record to - * prevent shard overloading. More details can be found here: API_GetRecords - */ - public Read withRequestRecordsLimit(int limit) { - checkArgument(limit > 0, "limit must be positive, but was: %s", limit); - checkArgument(limit <= 10_000, "limit must be up to 10,000, but was: %s", limit); - return toBuilder().setRequestRecordsLimit(limit).build(); - } - - /** Specifies the {@code WatermarkPolicyFactory} as ArrivalTimeWatermarkPolicyFactory. */ - public Read withArrivalTimeWatermarkPolicy() { - return toBuilder() - .setWatermarkPolicyFactory(WatermarkPolicyFactory.withArrivalTimePolicy()) - .build(); - } - - /** - * Specifies the {@code WatermarkPolicyFactory} as ArrivalTimeWatermarkPolicyFactory. - * - *

{@param watermarkIdleDurationThreshold} Denotes the duration for which the watermark can - * be idle. - */ - public Read withArrivalTimeWatermarkPolicy(Duration watermarkIdleDurationThreshold) { - return toBuilder() - .setWatermarkPolicyFactory( - WatermarkPolicyFactory.withArrivalTimePolicy(watermarkIdleDurationThreshold)) - .build(); - } - - /** Specifies the {@code WatermarkPolicyFactory} as ProcessingTimeWatermarkPolicyFactory. */ - public Read withProcessingTimeWatermarkPolicy() { - return toBuilder() - .setWatermarkPolicyFactory(WatermarkPolicyFactory.withProcessingTimePolicy()) - .build(); - } - - /** - * Specifies the {@code WatermarkPolicyFactory} as a custom watermarkPolicyFactory. - * - * @param watermarkPolicyFactory Custom Watermark policy factory. - */ - public Read withCustomWatermarkPolicy(WatermarkPolicyFactory watermarkPolicyFactory) { - checkArgument(watermarkPolicyFactory != null, "watermarkPolicyFactory cannot be null"); - return toBuilder().setWatermarkPolicyFactory(watermarkPolicyFactory).build(); - } - - /** Specifies a fixed delay rate limit policy with the default delay of 1 second. */ - public Read withFixedDelayRateLimitPolicy() { - return toBuilder().setRateLimitPolicyFactory(RateLimitPolicyFactory.withFixedDelay()).build(); - } - - /** - * Specifies a fixed delay rate limit policy with the given delay. - * - * @param delay Denotes the fixed delay duration. - */ - public Read withFixedDelayRateLimitPolicy(Duration delay) { - checkArgument(delay != null, "delay cannot be null"); - return toBuilder() - .setRateLimitPolicyFactory(RateLimitPolicyFactory.withFixedDelay(delay)) - .build(); - } - - /** - * Specifies a dynamic delay rate limit policy with the given function being called at each - * polling interval to get the next delay value. This can be used to change the polling interval - * of a running pipeline based on some external configuration source, for example. - * - * @param delay The function to invoke to get the next delay duration. - */ - public Read withDynamicDelayRateLimitPolicy(Supplier delay) { - checkArgument(delay != null, "delay cannot be null"); - return toBuilder().setRateLimitPolicyFactory(RateLimitPolicyFactory.withDelay(delay)).build(); - } - - /** - * Specifies the {@code RateLimitPolicyFactory} for a custom rate limiter. - * - * @param rateLimitPolicyFactory Custom rate limit policy factory. - */ - public Read withCustomRateLimitPolicy(RateLimitPolicyFactory rateLimitPolicyFactory) { - checkArgument(rateLimitPolicyFactory != null, "rateLimitPolicyFactory cannot be null"); - return toBuilder().setRateLimitPolicyFactory(rateLimitPolicyFactory).build(); - } - - /** Specifies the maximum number of messages per one shard. */ - public Read withMaxCapacityPerShard(Integer maxCapacity) { - checkArgument(maxCapacity > 0, "maxCapacity must be positive, but was: %s", maxCapacity); - return toBuilder().setMaxCapacityPerShard(maxCapacity).build(); - } - - @Override - public PCollection expand(PBegin input) { - LOG.warn( - "You are using a deprecated IO for Kinesis. Please migrate to module " - + "'org.apache.beam:beam-sdks-java-io-amazon-web-services2'."); - - Unbounded unbounded = - org.apache.beam.sdk.io.Read.from( - new KinesisSource( - getAWSClientsProvider(), - getStreamName(), - getInitialPosition(), - getUpToDateThreshold(), - getWatermarkPolicyFactory(), - getRateLimitPolicyFactory(), - getRequestRecordsLimit(), - getMaxCapacityPerShard())); - - PTransform> transform = unbounded; - - if (getMaxNumRecords() < Long.MAX_VALUE || getMaxReadTime() != null) { - transform = - unbounded.withMaxReadTime(getMaxReadTime()).withMaxNumRecords(getMaxNumRecords()); - } - - return input - .apply(transform) - .apply(MapElements.into(new TypeDescriptor() {}).via(getParseFn())) - .setCoder(getCoder()); - } - } - - /** Implementation of {@link #write}. */ - @AutoValue - public abstract static class Write extends PTransform, PDone> { - - abstract @Nullable String getStreamName(); - - abstract @Nullable String getPartitionKey(); - - abstract @Nullable KinesisPartitioner getPartitioner(); - - abstract @Nullable Properties getProducerProperties(); - - abstract @Nullable AWSClientsProvider getAWSClientsProvider(); - - abstract int getRetries(); - - abstract Builder builder(); - - @AutoValue.Builder - abstract static class Builder { - abstract Builder setStreamName(String streamName); - - abstract Builder setPartitionKey(String partitionKey); - - abstract Builder setPartitioner(KinesisPartitioner partitioner); - - abstract Builder setProducerProperties(Properties properties); - - abstract Builder setAWSClientsProvider(AWSClientsProvider clientProvider); - - abstract Builder setRetries(int retries); - - abstract Write build(); - } - - /** Specify Kinesis stream name which will be used for writing, this name is required. */ - public Write withStreamName(String streamName) { - return builder().setStreamName(streamName).build(); - } - - /** - * Specify default partition key. - * - *

In case if you need to define more complicated logic for key partitioning then you can - * create your own implementation of {@link KinesisPartitioner} and specify it by {@link - * KinesisIO.Write#withPartitioner(KinesisPartitioner)} - * - *

Using one of the methods {@link KinesisIO.Write#withPartitioner(KinesisPartitioner)} or - * {@link KinesisIO.Write#withPartitionKey(String)} is required but not both in the same time. - */ - public Write withPartitionKey(String partitionKey) { - return builder().setPartitionKey(partitionKey).build(); - } - - /** - * Allows to specify custom implementation of {@link KinesisPartitioner}. - * - *

This method should be used to balance a distribution of new written records among all - * stream shards. - * - *

Using one of the methods {@link KinesisIO.Write#withPartitioner(KinesisPartitioner)} or - * {@link KinesisIO.Write#withPartitionKey(String)} is required but not both in the same time. - */ - public Write withPartitioner(KinesisPartitioner partitioner) { - return builder().setPartitioner(partitioner).build(); - } - - /** - * Specify the configuration properties for Kinesis Producer Library (KPL). - * - *

Example of creating new KPL configuration: - * - *

{@code Properties properties = new Properties(); - * properties.setProperty("CollectionMaxCount", "1000"); - * properties.setProperty("ConnectTimeout", "10000");} - */ - public Write withProducerProperties(Properties properties) { - return builder().setProducerProperties(properties).build(); - } - - /** - * Allows to specify custom {@link AWSClientsProvider}. {@link AWSClientsProvider} creates new - * {@link IKinesisProducer} which is later used for writing to Kinesis. - * - *

This method should be used if {@link Write#withAWSClientsProvider(AWSCredentialsProvider, - * Regions)} does not suit well. - */ - public Write withAWSClientsProvider(AWSClientsProvider awsClientsProvider) { - return builder().setAWSClientsProvider(awsClientsProvider).build(); - } - - /** - * Specify {@link AWSCredentialsProvider} and region to be used to write to Kinesis. If you need - * more sophisticated credential protocol, then you should look at {@link - * Write#withAWSClientsProvider(AWSClientsProvider)}. - */ - public Write withAWSClientsProvider( - AWSCredentialsProvider awsCredentialsProvider, Regions region) { - return withAWSClientsProvider(awsCredentialsProvider, region, null); - } - - /** - * Specify credential details and region to be used to write to Kinesis. If you need more - * sophisticated credential protocol, then you should look at {@link - * Write#withAWSClientsProvider(AWSClientsProvider)}. - */ - public Write withAWSClientsProvider(String awsAccessKey, String awsSecretKey, Regions region) { - return withAWSClientsProvider(awsAccessKey, awsSecretKey, region, null); - } - - /** - * Specify {@link AWSCredentialsProvider} and region to be used to write to Kinesis. If you need - * more sophisticated credential protocol, then you should look at {@link - * Write#withAWSClientsProvider(AWSClientsProvider)}. - * - *

The {@code serviceEndpoint} sets an alternative service host. This is useful to execute - * the tests with Kinesis service emulator. - */ - public Write withAWSClientsProvider( - AWSCredentialsProvider awsCredentialsProvider, Regions region, String serviceEndpoint) { - return withAWSClientsProvider(awsCredentialsProvider, region, serviceEndpoint, true); - } - - /** - * Specify credential details and region to be used to write to Kinesis. If you need more - * sophisticated credential protocol, then you should look at {@link - * Write#withAWSClientsProvider(AWSClientsProvider)}. - * - *

The {@code serviceEndpoint} sets an alternative service host. This is useful to execute - * the tests with Kinesis service emulator. - */ - public Write withAWSClientsProvider( - String awsAccessKey, String awsSecretKey, Regions region, String serviceEndpoint) { - return withAWSClientsProvider(awsAccessKey, awsSecretKey, region, serviceEndpoint, true); - } - - /** - * Specify credential details and region to be used to write to Kinesis. If you need more - * sophisticated credential protocol, then you should look at {@link - * Write#withAWSClientsProvider(AWSClientsProvider)}. - * - *

The {@code serviceEndpoint} sets an alternative service host. This is useful to execute - * the tests with Kinesis service emulator. - * - *

The {@code verifyCertificate} disables or enables certificate verification. Never set it - * to false in production. - */ - public Write withAWSClientsProvider( - AWSCredentialsProvider awsCredentialsProvider, - Regions region, - String serviceEndpoint, - boolean verifyCertificate) { - return withAWSClientsProvider( - new BasicKinesisProvider( - awsCredentialsProvider, region, serviceEndpoint, verifyCertificate)); - } - - /** - * Specify credential details and region to be used to write to Kinesis. If you need more - * sophisticated credential protocol, then you should look at {@link - * Write#withAWSClientsProvider(AWSClientsProvider)}. - * - *

The {@code serviceEndpoint} sets an alternative service host. This is useful to execute - * the tests with Kinesis service emulator. - * - *

The {@code verifyCertificate} disables or enables certificate verification. Never set it - * to false in production. - */ - public Write withAWSClientsProvider( - String awsAccessKey, - String awsSecretKey, - Regions region, - String serviceEndpoint, - boolean verifyCertificate) { - AWSCredentialsProvider awsCredentialsProvider = - new AWSStaticCredentialsProvider(new BasicAWSCredentials(awsAccessKey, awsSecretKey)); - return withAWSClientsProvider( - awsCredentialsProvider, region, serviceEndpoint, verifyCertificate); - } - - /** - * Specify the number of retries that will be used to flush the outstanding records in case if - * they were not flushed from the first time. Default number of retries is {@code - * DEFAULT_NUM_RETRIES = 10}. - * - *

This is used for testing. - */ - @VisibleForTesting - Write withRetries(int retries) { - return builder().setRetries(retries).build(); - } - - @Override - public PDone expand(PCollection input) { - LOG.warn( - "You are using a deprecated IO for Kinesis. Please migrate to module " - + "'org.apache.beam:beam-sdks-java-io-amazon-web-services2'."); - - checkArgument(getStreamName() != null, "withStreamName() is required"); - checkArgument( - (getPartitionKey() != null) || (getPartitioner() != null), - "withPartitionKey() or withPartitioner() is required"); - checkArgument( - getPartitionKey() == null || (getPartitioner() == null), - "only one of either withPartitionKey() or withPartitioner() is possible"); - checkArgument(getAWSClientsProvider() != null, "withAWSClientsProvider() is required"); - createProducerConfiguration(); // verify Kinesis producer configuration can be built - - input.apply(ParDo.of(new KinesisWriterFn(this))); - return PDone.in(input.getPipeline()); - } - - private KinesisProducerConfiguration createProducerConfiguration() { - Properties props = getProducerProperties(); - if (props == null) { - props = new Properties(); - } - return KinesisProducerConfiguration.fromProperties(props); - } - - private static class KinesisWriterFn extends DoFn { - private static final int MAX_NUM_FAILURES = 10; - - /** Usage count of static, shared Kinesis producer. */ - private static int producerRefCount = 0; - - /** Static, shared Kinesis producer. */ - private static IKinesisProducer producer; - - private final KinesisIO.Write spec; - - private transient KinesisPartitioner partitioner; - private transient LinkedBlockingDeque failures; - private transient List> putFutures; - - KinesisWriterFn(KinesisIO.Write spec) { - this.spec = spec; - } - - /** - * Initialize statically shared Kinesis producer if required and count usage. - * - *

NOTE: If there is, for whatever reasons, another instance of a {@link KinesisWriterFn} - * with different producer properties or even a different implementation of {@link - * AWSClientsProvider}, these changes will be silently discarded in favor of an existing - * producer instance. - */ - private void setupSharedProducer() { - synchronized (KinesisWriterFn.class) { - if (producer == null) { - producer = - spec.getAWSClientsProvider() - .createKinesisProducer(spec.createProducerConfiguration()); - producerRefCount = 0; - } - producerRefCount++; - } - } - - /** - * Discard statically shared producer if it is not used anymore according to the usage count. - */ - private void teardownSharedProducer() { - IKinesisProducer obsolete = null; - synchronized (KinesisWriterFn.class) { - if (--producerRefCount == 0) { - obsolete = producer; - producer = null; - } - } - if (obsolete != null) { - obsolete.flushSync(); // should be a noop, but just in case - obsolete.destroy(); - } - } - - @Setup - public void setup() { - setupSharedProducer(); - // Use custom partitioner if it exists - if (spec.getPartitioner() != null) { - partitioner = spec.getPartitioner(); - } - } - - @StartBundle - public void startBundle() { - putFutures = Collections.synchronizedList(new ArrayList<>()); - /** Keep only the first {@link MAX_NUM_FAILURES} occurred exceptions */ - failures = new LinkedBlockingDeque<>(MAX_NUM_FAILURES); - } - - /** - * It adds a record asynchronously which then should be delivered by Kinesis producer in - * background (Kinesis producer forks native processes to do this job). - * - *

The records can be batched and then they will be sent in one HTTP request. Amazon KPL - * supports two types of batching - aggregation and collection - and they can be configured by - * producer properties. - * - *

More details can be found here: KPL Key - * Concepts and Configuring - * the KPL - */ - @ProcessElement - public void processElement(ProcessContext c) { - ByteBuffer data = ByteBuffer.wrap(c.element()); - String partitionKey = spec.getPartitionKey(); - String explicitHashKey = null; - - // Use custom partitioner - if (partitioner != null) { - partitionKey = partitioner.getPartitionKey(c.element()); - explicitHashKey = partitioner.getExplicitHashKey(c.element()); - } - - ListenableFuture f = - producer.addUserRecord(spec.getStreamName(), partitionKey, explicitHashKey, data); - putFutures.add(f); - } - - @FinishBundle - public void finishBundle() throws Exception { - flushBundle(); - } - - /** - * Flush outstanding records until the total number of failed records will be less than 0 or - * the number of retries will be exhausted. The retry timeout starts from 1 second and it - * doubles on every iteration. - */ - private void flushBundle() throws InterruptedException, ExecutionException, IOException { - int retries = spec.getRetries(); - int numFailedRecords; - int retryTimeout = 1000; // initial timeout, 1 sec - String message = ""; - - do { - numFailedRecords = 0; - producer.flush(); - - // Wait for puts to finish and check the results - for (Future f : putFutures) { - UserRecordResult result = f.get(); // this does block - if (!result.isSuccessful()) { - numFailedRecords++; - } - } - - // wait until outstanding records will be flushed - Thread.sleep(retryTimeout); - retryTimeout *= 2; // exponential backoff - } while (numFailedRecords > 0 && retries-- > 0); - - if (numFailedRecords > 0) { - for (Future f : putFutures) { - UserRecordResult result = f.get(); - if (!result.isSuccessful()) { - failures.offer( - new KinesisWriteException( - "Put record was not successful.", new UserRecordFailedException(result))); - } - } - - LOG.error( - "After [{}] retries, number of failed records [{}] is still greater than 0", - spec.getRetries(), - numFailedRecords); - } - - checkForFailures(message); - } - - /** If any write has asynchronously failed, fail the bundle with a useful error. */ - private void checkForFailures(String message) throws IOException { - if (failures.isEmpty()) { - return; - } - - StringBuilder logEntry = new StringBuilder(); - logEntry.append(message).append(System.lineSeparator()); - - int i = 0; - while (!failures.isEmpty()) { - i++; - KinesisWriteException exc = failures.remove(); - - logEntry.append(System.lineSeparator()).append(exc.getMessage()); - Throwable cause = exc.getCause(); - if (cause != null) { - logEntry.append(": ").append(cause.getMessage()); - - if (cause instanceof UserRecordFailedException) { - List attempts = - ((UserRecordFailedException) cause).getResult().getAttempts(); - for (Attempt attempt : attempts) { - if (attempt.getErrorMessage() != null) { - logEntry.append(System.lineSeparator()).append(attempt.getErrorMessage()); - } - } - } - } - } - - String errorMessage = - String.format( - "Some errors occurred writing to Kinesis. First %d errors: %s", - i, logEntry.toString()); - throw new IOException(errorMessage); - } - - @Teardown - public void teardown() throws Exception { - teardownSharedProducer(); - } - } - } - - /** An exception that puts information about the failed record. */ - static class KinesisWriteException extends IOException { - KinesisWriteException(String message, Throwable cause) { - super(message, cause); - } - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisPartitioner.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisPartitioner.java deleted file mode 100644 index 9bd46eaef682..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisPartitioner.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import java.io.Serializable; - -/** Kinesis interface for custom partitioner. */ -public interface KinesisPartitioner extends Serializable { - String getPartitionKey(byte[] value); - - String getExplicitHashKey(byte[] value); -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReader.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReader.java deleted file mode 100644 index a4a935eed7b9..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReader.java +++ /dev/null @@ -1,222 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import java.io.IOException; -import java.util.NoSuchElementException; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Reads data from multiple kinesis shards in a single thread. It uses simple round robin algorithm - * when fetching data from shards. - */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class KinesisReader extends UnboundedSource.UnboundedReader { - - private static final Logger LOG = LoggerFactory.getLogger(KinesisReader.class); - - private final SimplifiedKinesisClient kinesis; - private final KinesisSource source; - private final CheckpointGenerator initialCheckpointGenerator; - private final WatermarkPolicyFactory watermarkPolicyFactory; - private final RateLimitPolicyFactory rateLimitPolicyFactory; - private final Duration upToDateThreshold; - private final Duration backlogBytesCheckThreshold; - private CustomOptional currentRecord = CustomOptional.absent(); - private long lastBacklogBytes; - private Instant backlogBytesLastCheckTime = new Instant(0L); - private ShardReadersPool shardReadersPool; - private final Integer maxCapacityPerShard; - - KinesisReader( - SimplifiedKinesisClient kinesis, - CheckpointGenerator initialCheckpointGenerator, - KinesisSource source, - WatermarkPolicyFactory watermarkPolicyFactory, - RateLimitPolicyFactory rateLimitPolicyFactory, - Duration upToDateThreshold, - Integer maxCapacityPerShard) { - this( - kinesis, - initialCheckpointGenerator, - source, - watermarkPolicyFactory, - rateLimitPolicyFactory, - upToDateThreshold, - Duration.standardSeconds(30), - maxCapacityPerShard); - } - - KinesisReader( - SimplifiedKinesisClient kinesis, - CheckpointGenerator initialCheckpointGenerator, - KinesisSource source, - WatermarkPolicyFactory watermarkPolicyFactory, - RateLimitPolicyFactory rateLimitPolicyFactory, - Duration upToDateThreshold, - Duration backlogBytesCheckThreshold, - Integer maxCapacityPerShard) { - this.kinesis = checkNotNull(kinesis, "kinesis"); - this.initialCheckpointGenerator = - checkNotNull(initialCheckpointGenerator, "initialCheckpointGenerator"); - this.watermarkPolicyFactory = watermarkPolicyFactory; - this.rateLimitPolicyFactory = rateLimitPolicyFactory; - this.source = source; - this.upToDateThreshold = upToDateThreshold; - this.backlogBytesCheckThreshold = backlogBytesCheckThreshold; - this.maxCapacityPerShard = maxCapacityPerShard; - } - - /** Generates initial checkpoint and instantiates iterators for shards. */ - @Override - public boolean start() throws IOException { - LOG.info("Starting reader using {}", initialCheckpointGenerator); - - try { - shardReadersPool = createShardReadersPool(); - shardReadersPool.start(); - } catch (TransientKinesisException e) { - throw new IOException(e); - } - - return advance(); - } - - /** Retrieves next record from internal buffer. */ - @Override - public boolean advance() throws IOException { - currentRecord = shardReadersPool.nextRecord(); - return currentRecord.isPresent(); - } - - @Override - public byte[] getCurrentRecordId() throws NoSuchElementException { - return currentRecord.get().getUniqueId(); - } - - @Override - public KinesisRecord getCurrent() throws NoSuchElementException { - return currentRecord.get(); - } - - /** - * Returns the approximate time that the current record was inserted into the stream. It is not - * guaranteed to be accurate - this could lead to mark some records as "late" even if they were - * not. Beware of this when setting {@link - * org.apache.beam.sdk.values.WindowingStrategy#withAllowedLateness} - */ - @Override - public Instant getCurrentTimestamp() throws NoSuchElementException { - return currentRecord.get().getApproximateArrivalTimestamp(); - } - - @Override - public void close() throws IOException { - shardReadersPool.stop(); - } - - @Override - public Instant getWatermark() { - return shardReadersPool.getWatermark(); - } - - @Override - public UnboundedSource.CheckpointMark getCheckpointMark() { - return shardReadersPool.getCheckpointMark(); - } - - @Override - public UnboundedSource getCurrentSource() { - return source; - } - - /** - * Returns total size of all records that remain in Kinesis stream. The size is estimated taking - * into account size of the records that were added to the stream after timestamp of the most - * recent record returned by the reader. If no records have yet been retrieved from the reader - * {@link UnboundedSource.UnboundedReader#BACKLOG_UNKNOWN} is returned. When currently processed - * record is not further behind than {@link #upToDateThreshold} then this method returns 0. - * - *

The method can over-estimate size of the records for the split as it reports the backlog - * across all shards. This can lead to unnecessary decisions to scale up the number of workers but - * will never fail to scale up when this is necessary due to backlog size. - * - * @see BEAM-9439 - */ - @Override - public long getSplitBacklogBytes() { - Instant latestRecordTimestamp = shardReadersPool.getLatestRecordTimestamp(); - - if (latestRecordTimestamp.equals(BoundedWindow.TIMESTAMP_MIN_VALUE)) { - LOG.debug("Split backlog bytes for stream {} unknown", source.getStreamName()); - return UnboundedSource.UnboundedReader.BACKLOG_UNKNOWN; - } - - if (latestRecordTimestamp.plus(upToDateThreshold).isAfterNow()) { - LOG.debug( - "Split backlog bytes for stream {} with latest record timestamp {}: 0 (latest record timestamp is up-to-date with threshold of {})", - source.getStreamName(), - latestRecordTimestamp, - upToDateThreshold); - return 0L; - } - - if (backlogBytesLastCheckTime.plus(backlogBytesCheckThreshold).isAfterNow()) { - LOG.debug( - "Split backlog bytes for {} stream with latest record timestamp {}: {} (cached value)", - source.getStreamName(), - latestRecordTimestamp, - lastBacklogBytes); - return lastBacklogBytes; - } - - try { - lastBacklogBytes = kinesis.getBacklogBytes(source.getStreamName(), latestRecordTimestamp); - backlogBytesLastCheckTime = Instant.now(); - } catch (TransientKinesisException e) { - LOG.warn( - "Transient exception occurred during backlog estimation for stream {}.", - source.getStreamName(), - e); - } - LOG.info( - "Split backlog bytes for {} stream with {} latest record timestamp: {}", - source.getStreamName(), - latestRecordTimestamp, - lastBacklogBytes); - return lastBacklogBytes; - } - - ShardReadersPool createShardReadersPool() throws TransientKinesisException { - return new ShardReadersPool( - kinesis, - initialCheckpointGenerator.generate(kinesis), - watermarkPolicyFactory, - rateLimitPolicyFactory, - maxCapacityPerShard); - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpoint.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpoint.java deleted file mode 100644 index 4b4bcc3898c7..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpoint.java +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists.newArrayList; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists.partition; - -import java.io.IOException; -import java.io.Serializable; -import java.util.Iterator; -import java.util.List; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; - -/** - * Checkpoint representing a total progress in a set of shards in single stream. The set of shards - * covered by {@link KinesisReaderCheckpoint} may or may not be equal to set of all shards present - * in the stream. This class is immutable. - */ -class KinesisReaderCheckpoint - implements Iterable, UnboundedSource.CheckpointMark, Serializable { - - private final List shardCheckpoints; - - public KinesisReaderCheckpoint(Iterable shardCheckpoints) { - this.shardCheckpoints = ImmutableList.copyOf(shardCheckpoints); - } - - /** - * Splits given multi-shard checkpoint into partitions of approximately equal size. - * - * @param desiredNumSplits - upper limit for number of partitions to generate. - * @return list of checkpoints covering consecutive partitions of current checkpoint. - */ - public List splitInto(int desiredNumSplits) { - int partitionSize = divideAndRoundUp(shardCheckpoints.size(), desiredNumSplits); - - List checkpoints = newArrayList(); - for (List shardPartition : partition(shardCheckpoints, partitionSize)) { - checkpoints.add(new KinesisReaderCheckpoint(shardPartition)); - } - return checkpoints; - } - - private int divideAndRoundUp(int nominator, int denominator) { - return (nominator + denominator - 1) / denominator; - } - - String getStreamName() { - Iterator iterator = iterator(); - return iterator.hasNext() ? iterator.next().getStreamName() : "[unknown]"; - } - - @Override - public void finalizeCheckpoint() throws IOException {} - - @Override - public String toString() { - return shardCheckpoints.toString(); - } - - @Override - public Iterator iterator() { - return shardCheckpoints.iterator(); - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecord.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecord.java deleted file mode 100644 index 381ee0d81064..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecord.java +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.commons.lang3.builder.HashCodeBuilder.reflectionHashCode; - -import com.amazonaws.services.kinesis.clientlibrary.types.ExtendedSequenceNumber; -import com.amazonaws.services.kinesis.clientlibrary.types.UserRecord; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import org.apache.commons.lang.builder.EqualsBuilder; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Instant; - -/** {@link UserRecord} enhanced with utility methods. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class KinesisRecord { - - private Instant readTime; - private String streamName; - private String shardId; - private long subSequenceNumber; - private String sequenceNumber; - private Instant approximateArrivalTimestamp; - private ByteBuffer data; - private String partitionKey; - - public KinesisRecord(UserRecord record, String streamName, String shardId) { - this( - record.getData(), - record.getSequenceNumber(), - record.getSubSequenceNumber(), - record.getPartitionKey(), - new Instant(record.getApproximateArrivalTimestamp()), - Instant.now(), - streamName, - shardId); - } - - public KinesisRecord( - ByteBuffer data, - String sequenceNumber, - long subSequenceNumber, - String partitionKey, - Instant approximateArrivalTimestamp, - Instant readTime, - String streamName, - String shardId) { - this.data = data; - this.sequenceNumber = sequenceNumber; - this.subSequenceNumber = subSequenceNumber; - this.partitionKey = partitionKey; - this.approximateArrivalTimestamp = approximateArrivalTimestamp; - this.readTime = readTime; - this.streamName = streamName; - this.shardId = shardId; - } - - public ExtendedSequenceNumber getExtendedSequenceNumber() { - return new ExtendedSequenceNumber(getSequenceNumber(), getSubSequenceNumber()); - } - - /** @return The unique identifier of the record based on its position in the stream. */ - public byte[] getUniqueId() { - return getExtendedSequenceNumber().toString().getBytes(StandardCharsets.UTF_8); - } - - public Instant getReadTime() { - return readTime; - } - - public String getStreamName() { - return streamName; - } - - public String getShardId() { - return shardId; - } - - public byte[] getDataAsBytes() { - return getData().array(); - } - - @Override - public boolean equals(@Nullable Object obj) { - return EqualsBuilder.reflectionEquals(this, obj); - } - - @Override - public int hashCode() { - return reflectionHashCode(this); - } - - public long getSubSequenceNumber() { - return subSequenceNumber; - } - - /** @return The unique identifier of the record within its shard. */ - public String getSequenceNumber() { - return sequenceNumber; - } - - /** @return The approximate time that the record was inserted into the stream. */ - public Instant getApproximateArrivalTimestamp() { - return approximateArrivalTimestamp; - } - - /** @return The data blob. */ - public ByteBuffer getData() { - return data; - } - - public String getPartitionKey() { - return partitionKey; - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoder.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoder.java deleted file mode 100644 index efe4d2346797..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoder.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.ByteBuffer; -import org.apache.beam.sdk.coders.AtomicCoder; -import org.apache.beam.sdk.coders.ByteArrayCoder; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.InstantCoder; -import org.apache.beam.sdk.coders.StringUtf8Coder; -import org.apache.beam.sdk.coders.VarLongCoder; -import org.joda.time.Instant; - -/** A {@link Coder} for {@link KinesisRecord}. */ -class KinesisRecordCoder extends AtomicCoder { - - private static final StringUtf8Coder STRING_CODER = StringUtf8Coder.of(); - private static final ByteArrayCoder BYTE_ARRAY_CODER = ByteArrayCoder.of(); - private static final InstantCoder INSTANT_CODER = InstantCoder.of(); - private static final VarLongCoder VAR_LONG_CODER = VarLongCoder.of(); - - public static KinesisRecordCoder of() { - return new KinesisRecordCoder(); - } - - @Override - public void encode(KinesisRecord value, OutputStream outStream) throws IOException { - BYTE_ARRAY_CODER.encode(value.getData().array(), outStream); - STRING_CODER.encode(value.getSequenceNumber(), outStream); - STRING_CODER.encode(value.getPartitionKey(), outStream); - INSTANT_CODER.encode(value.getApproximateArrivalTimestamp(), outStream); - VAR_LONG_CODER.encode(value.getSubSequenceNumber(), outStream); - INSTANT_CODER.encode(value.getReadTime(), outStream); - STRING_CODER.encode(value.getStreamName(), outStream); - STRING_CODER.encode(value.getShardId(), outStream); - } - - @Override - public KinesisRecord decode(InputStream inStream) throws IOException { - ByteBuffer data = ByteBuffer.wrap(BYTE_ARRAY_CODER.decode(inStream)); - String sequenceNumber = STRING_CODER.decode(inStream); - String partitionKey = STRING_CODER.decode(inStream); - Instant approximateArrivalTimestamp = INSTANT_CODER.decode(inStream); - long subSequenceNumber = VAR_LONG_CODER.decode(inStream); - Instant readTimestamp = INSTANT_CODER.decode(inStream); - String streamName = STRING_CODER.decode(inStream); - String shardId = STRING_CODER.decode(inStream); - return new KinesisRecord( - data, - sequenceNumber, - subSequenceNumber, - partitionKey, - approximateArrivalTimestamp, - readTimestamp, - streamName, - shardId); - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisShardClosedException.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisShardClosedException.java deleted file mode 100644 index 322b78a418e9..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisShardClosedException.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -/** Internal exception thrown when shard end is encountered during iteration. */ -class KinesisShardClosedException extends Exception { - - KinesisShardClosedException(String message) { - super(message); - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisSource.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisSource.java deleted file mode 100644 index e53d71ed0b81..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisSource.java +++ /dev/null @@ -1,164 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists.newArrayList; - -import java.util.List; -import org.apache.beam.sdk.coders.Coder; -import org.apache.beam.sdk.coders.SerializableCoder; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.options.PipelineOptions; -import org.joda.time.Duration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** Represents source for single stream in Kinesis. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class KinesisSource extends UnboundedSource { - - private static final Logger LOG = LoggerFactory.getLogger(KinesisSource.class); - - private final AWSClientsProvider awsClientsProvider; - private final String streamName; - private final Duration upToDateThreshold; - private final WatermarkPolicyFactory watermarkPolicyFactory; - private final RateLimitPolicyFactory rateLimitPolicyFactory; - private CheckpointGenerator initialCheckpointGenerator; - private final Integer limit; - private final Integer maxCapacityPerShard; - - KinesisSource( - AWSClientsProvider awsClientsProvider, - String streamName, - StartingPoint startingPoint, - Duration upToDateThreshold, - WatermarkPolicyFactory watermarkPolicyFactory, - RateLimitPolicyFactory rateLimitPolicyFactory, - Integer limit, - Integer maxCapacityPerShard) { - this( - awsClientsProvider, - new DynamicCheckpointGenerator(streamName, startingPoint), - streamName, - upToDateThreshold, - watermarkPolicyFactory, - rateLimitPolicyFactory, - limit, - maxCapacityPerShard); - } - - private KinesisSource( - AWSClientsProvider awsClientsProvider, - CheckpointGenerator initialCheckpoint, - String streamName, - Duration upToDateThreshold, - WatermarkPolicyFactory watermarkPolicyFactory, - RateLimitPolicyFactory rateLimitPolicyFactory, - Integer limit, - Integer maxCapacityPerShard) { - this.awsClientsProvider = awsClientsProvider; - this.initialCheckpointGenerator = initialCheckpoint; - this.streamName = streamName; - this.upToDateThreshold = upToDateThreshold; - this.watermarkPolicyFactory = watermarkPolicyFactory; - this.rateLimitPolicyFactory = rateLimitPolicyFactory; - this.limit = limit; - this.maxCapacityPerShard = maxCapacityPerShard; - validate(); - } - - /** - * Generate splits for reading from the stream. Basically, it'll try to evenly split set of shards - * in the stream into {@code desiredNumSplits} partitions. Each partition is then a split. - */ - @Override - public List split(int desiredNumSplits, PipelineOptions options) throws Exception { - KinesisReaderCheckpoint checkpoint = - initialCheckpointGenerator.generate( - SimplifiedKinesisClient.from(awsClientsProvider, limit)); - - List sources = newArrayList(); - - for (KinesisReaderCheckpoint partition : checkpoint.splitInto(desiredNumSplits)) { - sources.add( - new KinesisSource( - awsClientsProvider, - new StaticCheckpointGenerator(partition), - streamName, - upToDateThreshold, - watermarkPolicyFactory, - rateLimitPolicyFactory, - limit, - maxCapacityPerShard)); - } - return sources; - } - - /** - * Creates reader based on given {@link KinesisReaderCheckpoint}. If {@link - * KinesisReaderCheckpoint} is not given, then we use {@code initialCheckpointGenerator} to - * generate new checkpoint. - */ - @Override - public UnboundedReader createReader( - PipelineOptions options, KinesisReaderCheckpoint checkpointMark) { - - CheckpointGenerator checkpointGenerator = initialCheckpointGenerator; - - if (checkpointMark != null) { - checkpointGenerator = new StaticCheckpointGenerator(checkpointMark); - } - - LOG.info("Creating new reader using {}", checkpointGenerator); - - return new KinesisReader( - SimplifiedKinesisClient.from(awsClientsProvider, limit), - checkpointGenerator, - this, - watermarkPolicyFactory, - rateLimitPolicyFactory, - upToDateThreshold, - maxCapacityPerShard); - } - - @Override - public Coder getCheckpointMarkCoder() { - return SerializableCoder.of(KinesisReaderCheckpoint.class); - } - - @Override - public void validate() { - checkNotNull(awsClientsProvider); - checkNotNull(initialCheckpointGenerator); - checkNotNull(watermarkPolicyFactory); - checkNotNull(rateLimitPolicyFactory); - } - - @Override - public Coder getOutputCoder() { - return KinesisRecordCoder.of(); - } - - String getStreamName() { - return streamName; - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisTransformRegistrar.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisTransformRegistrar.java deleted file mode 100644 index b8e1a38c73ff..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisTransformRegistrar.java +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import com.amazonaws.regions.Regions; -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; -import com.google.auto.service.AutoService; -import java.util.Map; -import java.util.Properties; -import org.apache.beam.sdk.expansion.ExternalTransformRegistrar; -import org.apache.beam.sdk.transforms.ExternalTransformBuilder; -import org.apache.beam.sdk.transforms.PTransform; -import org.apache.beam.sdk.values.PBegin; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.sdk.values.PDone; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Duration; -import org.joda.time.Instant; - -/** - * Exposes {@link KinesisIO.Write} and {@link KinesisIO.Read} as an external transform for - * cross-language usage. - */ -@AutoService(ExternalTransformRegistrar.class) -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -public class KinesisTransformRegistrar implements ExternalTransformRegistrar { - public static final String WRITE_URN = "beam:transform:org.apache.beam:kinesis_write:v1"; - public static final String READ_DATA_URN = "beam:transform:org.apache.beam:kinesis_read_data:v1"; - - @Override - public Map> knownBuilderInstances() { - return ImmutableMap.of(WRITE_URN, new WriteBuilder(), READ_DATA_URN, new ReadDataBuilder()); - } - - private abstract static class CrossLanguageConfiguration { - String streamName; - String awsAccessKey; - String awsSecretKey; - Regions region; - @Nullable String serviceEndpoint; - boolean verifyCertificate; - - public void setStreamName(String streamName) { - this.streamName = streamName; - } - - public void setAwsAccessKey(String awsAccessKey) { - this.awsAccessKey = awsAccessKey; - } - - public void setAwsSecretKey(String awsSecretKey) { - this.awsSecretKey = awsSecretKey; - } - - public void setRegion(String region) { - this.region = Regions.fromName(region); - } - - public void setServiceEndpoint(@Nullable String serviceEndpoint) { - this.serviceEndpoint = serviceEndpoint; - } - - public void setVerifyCertificate(@Nullable Boolean verifyCertificate) { - this.verifyCertificate = verifyCertificate == null || verifyCertificate; - } - } - - public static class WriteBuilder - implements ExternalTransformBuilder, PDone> { - - public static class Configuration extends CrossLanguageConfiguration { - private Properties producerProperties; - private String partitionKey; - - public void setProducerProperties(Map producerProperties) { - if (producerProperties != null) { - Properties properties = new Properties(); - producerProperties.forEach(properties::setProperty); - this.producerProperties = properties; - } - } - - public void setPartitionKey(String partitionKey) { - this.partitionKey = partitionKey; - } - } - - @Override - public PTransform, PDone> buildExternal(Configuration configuration) { - KinesisIO.Write writeTransform = - KinesisIO.write() - .withStreamName(configuration.streamName) - .withAWSClientsProvider( - configuration.awsAccessKey, - configuration.awsSecretKey, - configuration.region, - configuration.serviceEndpoint, - configuration.verifyCertificate) - .withPartitionKey(configuration.partitionKey); - - if (configuration.producerProperties != null) { - writeTransform = writeTransform.withProducerProperties(configuration.producerProperties); - } - - return writeTransform; - } - } - - public static class ReadDataBuilder - implements ExternalTransformBuilder< - ReadDataBuilder.Configuration, PBegin, PCollection> { - - public static class Configuration extends CrossLanguageConfiguration { - private @Nullable Long maxNumRecords; - private @Nullable Duration maxReadTime; - private @Nullable InitialPositionInStream initialPositionInStream; - private @Nullable Instant initialTimestampInStream; - private @Nullable Integer requestRecordsLimit; - private @Nullable Duration upToDateThreshold; - private @Nullable Long maxCapacityPerShard; - private @Nullable WatermarkPolicy watermarkPolicy; - private @Nullable Duration watermarkIdleDurationThreshold; - private @Nullable Duration rateLimit; - - public void setMaxNumRecords(@Nullable Long maxNumRecords) { - this.maxNumRecords = maxNumRecords; - } - - public void setMaxReadTime(@Nullable Long maxReadTime) { - if (maxReadTime != null) { - this.maxReadTime = Duration.millis(maxReadTime); - } - } - - public void setInitialPositionInStream(@Nullable String initialPositionInStream) { - if (initialPositionInStream != null) { - this.initialPositionInStream = InitialPositionInStream.valueOf(initialPositionInStream); - } - } - - public void setInitialTimestampInStream(@Nullable Long initialTimestampInStream) { - if (initialTimestampInStream != null) { - this.initialTimestampInStream = Instant.ofEpochMilli(initialTimestampInStream); - } - } - - public void setRequestRecordsLimit(@Nullable Long requestRecordsLimit) { - if (requestRecordsLimit != null) { - this.requestRecordsLimit = requestRecordsLimit.intValue(); - } - } - - public void setUpToDateThreshold(@Nullable Long upToDateThreshold) { - if (upToDateThreshold != null) { - this.upToDateThreshold = Duration.millis(upToDateThreshold); - } - } - - public void setMaxCapacityPerShard(@Nullable Long maxCapacityPerShard) { - this.maxCapacityPerShard = maxCapacityPerShard; - } - - public void setWatermarkPolicy(@Nullable String watermarkPolicy) { - if (watermarkPolicy != null) { - this.watermarkPolicy = WatermarkPolicy.valueOf(watermarkPolicy); - } - } - - public void setWatermarkIdleDurationThreshold(@Nullable Long watermarkIdleDurationThreshold) { - if (watermarkIdleDurationThreshold != null) { - this.watermarkIdleDurationThreshold = Duration.millis(watermarkIdleDurationThreshold); - } - } - - public void setRateLimit(@Nullable Long rateLimit) { - if (rateLimit != null) { - this.rateLimit = Duration.millis(rateLimit); - } - } - } - - private enum WatermarkPolicy { - ARRIVAL_TIME, - PROCESSING_TIME - } - - @Override - public PTransform> buildExternal( - ReadDataBuilder.Configuration configuration) { - KinesisIO.Read readTransform = - KinesisIO.readData() - .withStreamName(configuration.streamName) - .withAWSClientsProvider( - configuration.awsAccessKey, - configuration.awsSecretKey, - configuration.region, - configuration.serviceEndpoint, - configuration.verifyCertificate); - - if (configuration.maxNumRecords != null) { - readTransform = readTransform.withMaxNumRecords(configuration.maxNumRecords); - } - if (configuration.upToDateThreshold != null) { - readTransform = readTransform.withUpToDateThreshold(configuration.upToDateThreshold); - } - if (configuration.maxCapacityPerShard != null) { - readTransform = - readTransform.withMaxCapacityPerShard(configuration.maxCapacityPerShard.intValue()); - } - if (configuration.watermarkPolicy != null) { - switch (configuration.watermarkPolicy) { - case ARRIVAL_TIME: - readTransform = - configuration.watermarkIdleDurationThreshold != null - ? readTransform.withArrivalTimeWatermarkPolicy( - configuration.watermarkIdleDurationThreshold) - : readTransform.withArrivalTimeWatermarkPolicy(); - break; - case PROCESSING_TIME: - readTransform = readTransform.withProcessingTimeWatermarkPolicy(); - break; - default: - throw new RuntimeException( - String.format( - "Unsupported watermark policy type: %s", configuration.watermarkPolicy)); - } - } - if (configuration.rateLimit != null) { - readTransform = readTransform.withFixedDelayRateLimitPolicy(configuration.rateLimit); - } - if (configuration.maxReadTime != null) { - readTransform = readTransform.withMaxReadTime(configuration.maxReadTime); - } - if (configuration.initialPositionInStream != null) { - readTransform = - readTransform.withInitialPositionInStream(configuration.initialPositionInStream); - } - if (configuration.requestRecordsLimit != null) { - readTransform = readTransform.withRequestRecordsLimit(configuration.requestRecordsLimit); - } - if (configuration.initialTimestampInStream != null) { - readTransform = - readTransform.withInitialTimestampInStream(configuration.initialTimestampInStream); - } - return readTransform; - } - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RateLimitPolicy.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RateLimitPolicy.java deleted file mode 100644 index 8ee1e81558f7..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RateLimitPolicy.java +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import java.util.List; - -public interface RateLimitPolicy { - - /** - * Called after Kinesis records are successfully retrieved. - * - * @param records The list of retrieved records. - */ - default void onSuccess(List records) throws InterruptedException {} - - /** - * Called after the Kinesis client is throttled. - * - * @param e The {@code KinesisClientThrottledException} thrown by the client. - */ - default void onThrottle(KinesisClientThrottledException e) throws InterruptedException {} -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RateLimitPolicyFactory.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RateLimitPolicyFactory.java deleted file mode 100644 index 12e013136abc..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RateLimitPolicyFactory.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import java.io.IOException; -import java.io.Serializable; -import java.util.List; -import java.util.function.Supplier; -import org.apache.beam.sdk.util.BackOff; -import org.apache.beam.sdk.util.BackOffUtils; -import org.apache.beam.sdk.util.FluentBackoff; -import org.apache.beam.sdk.util.Sleeper; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.joda.time.Duration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Implement this interface to create a {@code RateLimitPolicy}. Used to create a rate limiter for - * each shard. The factory will be called from multiple threads, so if it returns a singleton - * instance of RateLimitPolicy then that instance should be thread-safe, otherwise it should return - * separate RateLimitPolicy instances. - */ -public interface RateLimitPolicyFactory extends Serializable { - - RateLimitPolicy getRateLimitPolicy(); - - static RateLimitPolicyFactory withoutLimiter() { - return () -> new RateLimitPolicy() {}; - } - - static RateLimitPolicyFactory withDefaultRateLimiter() { - return withDefaultRateLimiter( - Duration.millis(100), Duration.millis(500), Duration.standardSeconds(1)); - } - - static RateLimitPolicyFactory withDefaultRateLimiter( - Duration emptySuccessBaseDelay, Duration throttledBaseDelay, Duration maxDelay) { - return () -> new DefaultRateLimiter(emptySuccessBaseDelay, throttledBaseDelay, maxDelay); - } - - static RateLimitPolicyFactory withFixedDelay() { - return DelayIntervalRateLimiter::new; - } - - static RateLimitPolicyFactory withFixedDelay(Duration delay) { - return () -> new DelayIntervalRateLimiter(() -> delay); - } - - static RateLimitPolicyFactory withDelay(Supplier delay) { - return () -> new DelayIntervalRateLimiter(delay); - } - - class DelayIntervalRateLimiter implements RateLimitPolicy { - - private static final Supplier DEFAULT_DELAY = () -> Duration.standardSeconds(1); - - private final Supplier delay; - - public DelayIntervalRateLimiter() { - this(DEFAULT_DELAY); - } - - public DelayIntervalRateLimiter(Supplier delay) { - this.delay = delay; - } - - @Override - public void onSuccess(List records) throws InterruptedException { - Thread.sleep(delay.get().getMillis()); - } - } - - /** - * Default rate limiter that throttles reading from a shard using an exponential backoff if the - * response is empty or if the consumer is throttled by AWS. - */ - class DefaultRateLimiter implements RateLimitPolicy { - private static final Logger LOG = LoggerFactory.getLogger(DefaultRateLimiter.class); - private final Sleeper sleeper; - private final BackOff throttled; - private final BackOff emptySuccess; - - @VisibleForTesting - DefaultRateLimiter(BackOff emptySuccess, BackOff throttled, Sleeper sleeper) { - this.emptySuccess = emptySuccess; - this.throttled = throttled; - this.sleeper = sleeper; - } - - public DefaultRateLimiter(BackOff emptySuccess, BackOff throttled) { - this(emptySuccess, throttled, Sleeper.DEFAULT); - } - - public DefaultRateLimiter( - Duration emptySuccessBaseDelay, Duration throttledBaseDelay, Duration maxDelay) { - this( - FluentBackoff.DEFAULT - .withInitialBackoff(emptySuccessBaseDelay) - .withMaxBackoff(maxDelay) - .backoff(), - FluentBackoff.DEFAULT - .withInitialBackoff(throttledBaseDelay) - .withMaxBackoff(maxDelay) - .backoff()); - } - - @Override - public void onSuccess(List records) throws InterruptedException { - try { - if (records.isEmpty()) { - BackOffUtils.next(sleeper, emptySuccess); - } else { - emptySuccess.reset(); - } - throttled.reset(); - } catch (IOException e) { - LOG.warn("Error applying onSuccess rate limit policy", e); - } - } - - @Override - public void onThrottle(KinesisClientThrottledException e) throws InterruptedException { - try { - BackOffUtils.next(sleeper, throttled); - } catch (IOException ioe) { - LOG.warn("Error applying onThrottle rate limit policy", e); - } - } - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RecordFilter.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RecordFilter.java deleted file mode 100644 index 2a0456e04052..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RecordFilter.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists.newArrayList; - -import java.util.List; - -/** - * Filters out records, which were already processed and checkpointed. - * - *

We need this step, because we can get iterators from Kinesis only with "sequenceNumber" - * accuracy, not with "subSequenceNumber" accuracy. - */ -class RecordFilter { - - public List apply(List records, ShardCheckpoint checkpoint) { - List filteredRecords = newArrayList(); - for (KinesisRecord record : records) { - if (checkpoint.isBeforeOrAt(record)) { - filteredRecords.add(record); - } - } - return filteredRecords; - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardCheckpoint.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardCheckpoint.java deleted file mode 100644 index b185a396d1fd..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardCheckpoint.java +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static com.amazonaws.services.kinesis.model.ShardIteratorType.AFTER_SEQUENCE_NUMBER; -import static com.amazonaws.services.kinesis.model.ShardIteratorType.AT_SEQUENCE_NUMBER; -import static com.amazonaws.services.kinesis.model.ShardIteratorType.AT_TIMESTAMP; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.amazonaws.services.kinesis.clientlibrary.types.ExtendedSequenceNumber; -import com.amazonaws.services.kinesis.model.Record; -import com.amazonaws.services.kinesis.model.ShardIteratorType; -import java.io.Serializable; -import org.joda.time.Instant; - -/** - * Checkpoint mark for single shard in the stream. Current position in the shard is determined by - * either: - * - *

    - *
  • {@link #shardIteratorType} if it is equal to {@link ShardIteratorType#LATEST} or {@link - * ShardIteratorType#TRIM_HORIZON} - *
  • combination of {@link #sequenceNumber} and {@link #subSequenceNumber} if {@link - * ShardIteratorType#AFTER_SEQUENCE_NUMBER} or {@link ShardIteratorType#AT_SEQUENCE_NUMBER} - *
- * - * This class is immutable. - */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class ShardCheckpoint implements Serializable { - - private final String streamName; - private final String shardId; - private final String sequenceNumber; - private final ShardIteratorType shardIteratorType; - private final Long subSequenceNumber; - private final Instant timestamp; - - public ShardCheckpoint(String streamName, String shardId, StartingPoint startingPoint) { - this( - streamName, - shardId, - ShardIteratorType.fromValue(startingPoint.getPositionName()), - startingPoint.getTimestamp()); - } - - public ShardCheckpoint( - String streamName, String shardId, ShardIteratorType shardIteratorType, Instant timestamp) { - this(streamName, shardId, shardIteratorType, null, null, timestamp); - } - - public ShardCheckpoint( - String streamName, - String shardId, - ShardIteratorType shardIteratorType, - String sequenceNumber, - Long subSequenceNumber) { - this(streamName, shardId, shardIteratorType, sequenceNumber, subSequenceNumber, null); - } - - private ShardCheckpoint( - String streamName, - String shardId, - ShardIteratorType shardIteratorType, - String sequenceNumber, - Long subSequenceNumber, - Instant timestamp) { - this.shardIteratorType = checkNotNull(shardIteratorType, "shardIteratorType"); - this.streamName = checkNotNull(streamName, "streamName"); - this.shardId = checkNotNull(shardId, "shardId"); - if (shardIteratorType == AT_SEQUENCE_NUMBER || shardIteratorType == AFTER_SEQUENCE_NUMBER) { - checkNotNull( - sequenceNumber, - "You must provide sequence number for AT_SEQUENCE_NUMBER" + " or AFTER_SEQUENCE_NUMBER"); - } else { - checkArgument( - sequenceNumber == null, - "Sequence number must be null for LATEST, TRIM_HORIZON or AT_TIMESTAMP"); - } - if (shardIteratorType == AT_TIMESTAMP) { - checkNotNull(timestamp, "You must provide timestamp for AT_TIMESTAMP"); - } else { - checkArgument( - timestamp == null, "Timestamp must be null for an iterator type other than AT_TIMESTAMP"); - } - - this.subSequenceNumber = subSequenceNumber; - this.sequenceNumber = sequenceNumber; - this.timestamp = timestamp; - } - - /** - * Used to compare {@link ShardCheckpoint} object to {@link KinesisRecord}. Depending on the - * underlying shardIteratorType, it will either compare the timestamp or the {@link - * ExtendedSequenceNumber}. - * - * @param other - * @return if current checkpoint mark points before or at given {@link ExtendedSequenceNumber} - */ - public boolean isBeforeOrAt(KinesisRecord other) { - if (shardIteratorType == AT_TIMESTAMP) { - return timestamp.compareTo(other.getApproximateArrivalTimestamp()) <= 0; - } - int result = extendedSequenceNumber().compareTo(other.getExtendedSequenceNumber()); - if (result == 0) { - return shardIteratorType == AT_SEQUENCE_NUMBER; - } - return result < 0; - } - - private ExtendedSequenceNumber extendedSequenceNumber() { - String fullSequenceNumber = sequenceNumber; - if (fullSequenceNumber == null) { - fullSequenceNumber = shardIteratorType.toString(); - } - return new ExtendedSequenceNumber(fullSequenceNumber, subSequenceNumber); - } - - @Override - public String toString() { - return String.format( - "Checkpoint %s for stream %s, shard %s: %s", - shardIteratorType, streamName, shardId, sequenceNumber); - } - - public String getShardIterator(SimplifiedKinesisClient kinesisClient) - throws TransientKinesisException { - if (checkpointIsInTheMiddleOfAUserRecord()) { - return kinesisClient.getShardIterator( - streamName, shardId, AT_SEQUENCE_NUMBER, sequenceNumber, null); - } - return kinesisClient.getShardIterator( - streamName, shardId, shardIteratorType, sequenceNumber, timestamp); - } - - private boolean checkpointIsInTheMiddleOfAUserRecord() { - return shardIteratorType == AFTER_SEQUENCE_NUMBER && subSequenceNumber != null; - } - - /** - * Used to advance checkpoint mark to position after given {@link Record}. - * - * @param record - * @return new checkpoint object pointing directly after given {@link Record} - */ - public ShardCheckpoint moveAfter(KinesisRecord record) { - return new ShardCheckpoint( - streamName, - shardId, - AFTER_SEQUENCE_NUMBER, - record.getSequenceNumber(), - record.getSubSequenceNumber()); - } - - public String getStreamName() { - return streamName; - } - - public String getShardId() { - return shardId; - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardReadersPool.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardReadersPool.java deleted file mode 100644 index 703d10d3640e..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardReadersPool.java +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static java.util.concurrent.TimeUnit.MILLISECONDS; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; - -import java.util.Collection; -import java.util.Comparator; -import java.util.List; -import java.util.concurrent.ArrayBlockingQueue; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.StreamSupport; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Internal shard iterators pool. It maintains the thread pool for reading Kinesis shards in - * separate threads. Read records are stored in a blocking queue of limited capacity. - */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class ShardReadersPool { - - private static final Logger LOG = LoggerFactory.getLogger(ShardReadersPool.class); - public static final int DEFAULT_CAPACITY_PER_SHARD = 10_000; - private static final int ATTEMPTS_TO_SHUTDOWN = 3; - private static final int QUEUE_OFFER_TIMEOUT_MS = 500; - private static final int QUEUE_POLL_TIMEOUT_MS = 1000; - - /** - * Executor service for running the threads that read records from shards handled by this pool. - * Each thread runs the {@link ShardReadersPool#readLoop(ShardRecordsIterator, RateLimitPolicy)} - * method and handles exactly one shard. - */ - private final ExecutorService executorService; - - /** - * A Bounded buffer for read records. Records are added to this buffer within {@link - * ShardReadersPool#readLoop(ShardRecordsIterator, RateLimitPolicy)} method and removed in {@link - * ShardReadersPool#nextRecord()}. - */ - private BlockingQueue recordsQueue; - - /** - * A reference to an immutable mapping of {@link ShardRecordsIterator} instances to shard ids. - * This map is replaced with a new one when resharding operation on any handled shard occurs. - */ - private final AtomicReference> shardIteratorsMap; - - /** A map for keeping the current number of records stored in a buffer per shard. */ - private final ConcurrentMap numberOfRecordsInAQueueByShard; - - private final SimplifiedKinesisClient kinesis; - private final WatermarkPolicyFactory watermarkPolicyFactory; - private final RateLimitPolicyFactory rateLimitPolicyFactory; - private final KinesisReaderCheckpoint initialCheckpoint; - private final int queueCapacityPerShard; - private final AtomicBoolean poolOpened = new AtomicBoolean(true); - - ShardReadersPool( - SimplifiedKinesisClient kinesis, - KinesisReaderCheckpoint initialCheckpoint, - WatermarkPolicyFactory watermarkPolicyFactory, - RateLimitPolicyFactory rateLimitPolicyFactory, - int queueCapacityPerShard) { - this.kinesis = kinesis; - this.initialCheckpoint = initialCheckpoint; - this.watermarkPolicyFactory = watermarkPolicyFactory; - this.rateLimitPolicyFactory = rateLimitPolicyFactory; - this.queueCapacityPerShard = queueCapacityPerShard; - this.executorService = Executors.newCachedThreadPool(); - this.numberOfRecordsInAQueueByShard = new ConcurrentHashMap<>(); - this.shardIteratorsMap = new AtomicReference<>(); - } - - void start() throws TransientKinesisException { - ImmutableMap.Builder shardsMap = ImmutableMap.builder(); - for (ShardCheckpoint checkpoint : initialCheckpoint) { - shardsMap.put(checkpoint.getShardId(), createShardIterator(kinesis, checkpoint)); - } - shardIteratorsMap.set(shardsMap.build()); - if (!shardIteratorsMap.get().isEmpty()) { - recordsQueue = - new ArrayBlockingQueue<>(queueCapacityPerShard * shardIteratorsMap.get().size()); - String streamName = initialCheckpoint.getStreamName(); - startReadingShards(shardIteratorsMap.get().values(), streamName); - } else { - // There are no shards to handle when restoring from an empty checkpoint. Empty checkpoints - // are generated when the last shard handled by this pool was closed - recordsQueue = new ArrayBlockingQueue<>(1); - } - } - - // Note: readLoop() will log any Throwable raised so opt to ignore the future result - @SuppressWarnings("FutureReturnValueIgnored") - void startReadingShards(Iterable shardRecordsIterators, String streamName) { - if (!shardRecordsIterators.iterator().hasNext()) { - LOG.info("Stream {} will not be read, no shard records iterators available", streamName); - return; - } - LOG.info( - "Starting to read {} stream from {} shards", - streamName, - getShardIdsFromRecordsIterators(shardRecordsIterators)); - for (final ShardRecordsIterator recordsIterator : shardRecordsIterators) { - numberOfRecordsInAQueueByShard.put(recordsIterator.getShardId(), new AtomicInteger()); - executorService.submit( - () -> readLoop(recordsIterator, rateLimitPolicyFactory.getRateLimitPolicy())); - } - } - - private void readLoop(ShardRecordsIterator shardRecordsIterator, RateLimitPolicy rateLimiter) { - while (poolOpened.get()) { - try { - try { - List kinesisRecords = shardRecordsIterator.readNextBatch(); - try { - for (KinesisRecord kinesisRecord : kinesisRecords) { - while (true) { - if (!poolOpened.get()) { - return; - } - if (recordsQueue.offer(kinesisRecord, QUEUE_OFFER_TIMEOUT_MS, MILLISECONDS)) { - numberOfRecordsInAQueueByShard.get(kinesisRecord.getShardId()).incrementAndGet(); - break; - } - } - } - } finally { - // One of the paths into this finally block is recordsQueue.put() throwing - // InterruptedException so we should check the thread's interrupted status before - // calling onSuccess(). - if (!Thread.currentThread().isInterrupted()) { - rateLimiter.onSuccess(kinesisRecords); - } - } - } catch (KinesisShardClosedException e) { - LOG.info( - "Shard iterator for {} shard is closed, finishing the read loop", - shardRecordsIterator.getShardId(), - e); - // Wait until all records from already closed shard are taken from the buffer and only - // then start reading successive shards. This guarantees that checkpoints will contain - // either parent or child shard and never both. Such approach allows for more - // straightforward checkpoint restoration than in a case when new shards are read - // immediately. - waitUntilAllShardRecordsRead(shardRecordsIterator); - readFromSuccessiveShards(shardRecordsIterator); - break; - } - } catch (KinesisClientThrottledException e) { - try { - rateLimiter.onThrottle(e); - } catch (InterruptedException ex) { - LOG.warn("Thread was interrupted, finishing the read loop", ex); - Thread.currentThread().interrupt(); - break; - } - } catch (TransientKinesisException e) { - LOG.warn("Transient exception occurred.", e); - } catch (InterruptedException e) { - LOG.warn("Thread was interrupted, finishing the read loop", e); - Thread.currentThread().interrupt(); - break; - } catch (Throwable e) { - LOG.error("Unexpected exception occurred", e); - } - } - LOG.info("Kinesis Shard read loop has finished"); - } - - CustomOptional nextRecord() { - try { - KinesisRecord record = recordsQueue.poll(QUEUE_POLL_TIMEOUT_MS, MILLISECONDS); - if (record == null) { - return CustomOptional.absent(); - } - shardIteratorsMap.get().get(record.getShardId()).ackRecord(record); - - // numberOfRecordsInAQueueByShard contains the counter for a given shard until the shard is - // closed and then it's counter reaches 0. Thus the access here is safe - numberOfRecordsInAQueueByShard.get(record.getShardId()).decrementAndGet(); - return CustomOptional.of(record); - } catch (InterruptedException e) { - LOG.warn("Interrupted while waiting for KinesisRecord from the buffer"); - return CustomOptional.absent(); - } - } - - void stop() { - LOG.info("Closing shard iterators pool"); - poolOpened.set(false); - executorService.shutdown(); - awaitTermination(); - if (!executorService.isTerminated()) { - LOG.warn( - "Executor service was not completely terminated after {} attempts, trying to forcibly stop it.", - ATTEMPTS_TO_SHUTDOWN); - executorService.shutdownNow(); - awaitTermination(); - } - } - - private void awaitTermination() { - int attemptsLeft = ATTEMPTS_TO_SHUTDOWN; - boolean isTerminated = executorService.isTerminated(); - - while (!isTerminated && attemptsLeft-- > 0) { - try { - isTerminated = executorService.awaitTermination(10, TimeUnit.SECONDS); - } catch (InterruptedException e) { - LOG.error("Interrupted while waiting for the executor service to shutdown"); - throw new RuntimeException(e); - } - if (!isTerminated && attemptsLeft > 0) { - LOG.warn( - "Executor service is taking long time to shutdown, will retry. {} attempts left", - attemptsLeft); - } - } - } - - Instant getWatermark() { - return getMinTimestamp(ShardRecordsIterator::getShardWatermark); - } - - Instant getLatestRecordTimestamp() { - return getMinTimestamp(ShardRecordsIterator::getLatestRecordTimestamp); - } - - private Instant getMinTimestamp(Function timestampExtractor) { - return shardIteratorsMap.get().values().stream() - .map(timestampExtractor) - .min(Comparator.naturalOrder()) - .orElse(BoundedWindow.TIMESTAMP_MAX_VALUE); - } - - KinesisReaderCheckpoint getCheckpointMark() { - ImmutableMap currentShardIterators = shardIteratorsMap.get(); - return new KinesisReaderCheckpoint( - currentShardIterators.values().stream() - .map( - shardRecordsIterator -> { - checkArgument( - shardRecordsIterator != null, "shardRecordsIterator can not be null"); - return shardRecordsIterator.getCheckpoint(); - }) - .collect(Collectors.toList())); - } - - ShardRecordsIterator createShardIterator( - SimplifiedKinesisClient kinesis, ShardCheckpoint checkpoint) - throws TransientKinesisException { - return new ShardRecordsIterator(checkpoint, kinesis, watermarkPolicyFactory); - } - - /** - * Waits until all records read from given shardRecordsIterator are taken from {@link - * #recordsQueue} and acked. Uses {@link #numberOfRecordsInAQueueByShard} map to track the amount - * of remaining events. - */ - private void waitUntilAllShardRecordsRead(ShardRecordsIterator shardRecordsIterator) - throws InterruptedException { - // Given shard is already closed so no more records will be read from it. Thus the counter for - // that shard will be strictly decreasing to 0. - AtomicInteger numberOfShardRecordsInAQueue = - numberOfRecordsInAQueueByShard.get(shardRecordsIterator.getShardId()); - while (!(numberOfShardRecordsInAQueue.get() == 0)) { - Thread.sleep(TimeUnit.SECONDS.toMillis(1)); - } - } - - /** - * Tries to find successors of a given shard and start reading them. Each closed shard can have 0, - * 1 or 2 successors - * - *
    - *
  • 0 successors - when shard was merged with another shard and this one is considered - * adjacent by merge operation - *
  • 1 successor - when shard was merged with another shard and this one is considered a - * parent by merge operation - *
  • 2 successors - when shard was split into two shards - *
- * - *

Once shard successors are established, the transition to reading new shards can begin. - * During this operation, the immutable {@link ShardReadersPool#shardIteratorsMap} is replaced - * with a new one holding references to {@link ShardRecordsIterator} instances for open shards - * only. Potentially there might be more shard iterators closing at the same time so {@link - * ShardReadersPool#shardIteratorsMap} is updated in a loop using CAS pattern to keep all the - * updates. Then, the counter for already closed shard is removed from {@link - * ShardReadersPool#numberOfRecordsInAQueueByShard} map. - * - *

Finally when update is finished, new threads are spawned for reading the successive shards. - * The thread that handled reading from already closed shard can finally complete. - */ - private void readFromSuccessiveShards(final ShardRecordsIterator closedShardIterator) - throws TransientKinesisException { - List successiveShardRecordIterators = - closedShardIterator.findSuccessiveShardRecordIterators(); - - ImmutableMap current; - ImmutableMap updated; - do { - current = shardIteratorsMap.get(); - updated = - createMapWithSuccessiveShards( - current, closedShardIterator, successiveShardRecordIterators); - } while (!shardIteratorsMap.compareAndSet(current, updated)); - numberOfRecordsInAQueueByShard.remove(closedShardIterator.getShardId()); - - logSuccessiveShardsFromRecordsIterators(closedShardIterator, successiveShardRecordIterators); - - String streamName = closedShardIterator.getStreamName(); - startReadingShards(successiveShardRecordIterators, streamName); - } - - private static void logSuccessiveShardsFromRecordsIterators( - final ShardRecordsIterator closedShardIterator, - final Collection shardRecordsIterators) { - if (shardRecordsIterators.isEmpty()) { - LOG.info( - "Shard {} for {} stream is closed. Found no successive shards to read from " - + "as it was merged with another shard and this one is considered adjacent by merge operation", - closedShardIterator.getShardId(), - closedShardIterator.getStreamName()); - } else { - LOG.info( - "Shard {} for {} stream is closed, found successive shards to read from: {}", - closedShardIterator.getShardId(), - closedShardIterator.getStreamName(), - getShardIdsFromRecordsIterators(shardRecordsIterators)); - } - } - - private static List getShardIdsFromRecordsIterators( - final Iterable iterators) { - return StreamSupport.stream(iterators.spliterator(), false) - .map(ShardRecordsIterator::getShardId) - .collect(Collectors.toList()); - } - - private ImmutableMap createMapWithSuccessiveShards( - ImmutableMap current, - ShardRecordsIterator closedShardIterator, - List successiveShardRecordIterators) - throws TransientKinesisException { - ImmutableMap.Builder shardsMap = ImmutableMap.builder(); - Iterable allShards = - Iterables.concat(current.values(), successiveShardRecordIterators); - for (ShardRecordsIterator iterator : allShards) { - if (!closedShardIterator.getShardId().equals(iterator.getShardId())) { - shardsMap.put(iterator.getShardId(), iterator); - } - } - return shardsMap.build(); - } - - @VisibleForTesting - BlockingQueue getRecordsQueue() { - return recordsQueue; - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIterator.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIterator.java deleted file mode 100644 index aae179373a2c..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIterator.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; -import com.amazonaws.services.kinesis.model.ExpiredIteratorException; -import com.amazonaws.services.kinesis.model.Shard; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.atomic.AtomicReference; -import org.joda.time.Instant; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Iterates over records in a single shard. Records are retrieved in batches via calls to {@link - * ShardRecordsIterator#readNextBatch()}. Client has to confirm processed records by calling {@link - * ShardRecordsIterator#ackRecord(KinesisRecord)} method. - */ -class ShardRecordsIterator { - - private static final Logger LOG = LoggerFactory.getLogger(ShardRecordsIterator.class); - - private final SimplifiedKinesisClient kinesis; - private final RecordFilter filter; - private final String streamName; - private final String shardId; - private final AtomicReference checkpoint; - private final WatermarkPolicy watermarkPolicy; - private final WatermarkPolicyFactory watermarkPolicyFactory; - private final WatermarkPolicy latestRecordTimestampPolicy = - WatermarkPolicyFactory.withArrivalTimePolicy().createWatermarkPolicy(); - private String shardIterator; - - ShardRecordsIterator( - ShardCheckpoint initialCheckpoint, - SimplifiedKinesisClient simplifiedKinesisClient, - WatermarkPolicyFactory watermarkPolicyFactory) - throws TransientKinesisException { - this(initialCheckpoint, simplifiedKinesisClient, watermarkPolicyFactory, new RecordFilter()); - } - - ShardRecordsIterator( - ShardCheckpoint initialCheckpoint, - SimplifiedKinesisClient simplifiedKinesisClient, - WatermarkPolicyFactory watermarkPolicyFactory, - RecordFilter filter) - throws TransientKinesisException { - this.checkpoint = new AtomicReference<>(checkNotNull(initialCheckpoint, "initialCheckpoint")); - this.filter = checkNotNull(filter, "filter"); - this.kinesis = checkNotNull(simplifiedKinesisClient, "simplifiedKinesisClient"); - this.streamName = initialCheckpoint.getStreamName(); - this.shardId = initialCheckpoint.getShardId(); - this.shardIterator = initialCheckpoint.getShardIterator(kinesis); - this.watermarkPolicy = watermarkPolicyFactory.createWatermarkPolicy(); - this.watermarkPolicyFactory = watermarkPolicyFactory; - } - - List readNextBatch() - throws TransientKinesisException, KinesisShardClosedException { - if (shardIterator == null) { - throw new KinesisShardClosedException( - String.format( - "Shard iterator reached end of the shard: streamName=%s, shardId=%s", - streamName, shardId)); - } - GetKinesisRecordsResult response = fetchRecords(); - LOG.debug( - "Fetched {} new records from shard: streamName={}, shardId={}", - response.getRecords().size(), - streamName, - shardId); - - List filteredRecords = filter.apply(response.getRecords(), checkpoint.get()); - return filteredRecords; - } - - private GetKinesisRecordsResult fetchRecords() throws TransientKinesisException { - try { - GetKinesisRecordsResult response = kinesis.getRecords(shardIterator, streamName, shardId); - shardIterator = response.getNextShardIterator(); - return response; - } catch (ExpiredIteratorException e) { - LOG.info( - "Refreshing expired iterator for shard: streamName={}, shardId={}", - streamName, - shardId, - e); - shardIterator = checkpoint.get().getShardIterator(kinesis); - return fetchRecords(); - } - } - - ShardCheckpoint getCheckpoint() { - return checkpoint.get(); - } - - void ackRecord(KinesisRecord record) { - checkpoint.set(checkpoint.get().moveAfter(record)); - watermarkPolicy.update(record); - latestRecordTimestampPolicy.update(record); - } - - Instant getShardWatermark() { - return watermarkPolicy.getWatermark(); - } - - Instant getLatestRecordTimestamp() { - return latestRecordTimestampPolicy.getWatermark(); - } - - String getShardId() { - return shardId; - } - - String getStreamName() { - return streamName; - } - - List findSuccessiveShardRecordIterators() throws TransientKinesisException { - List shards = kinesis.listShardsFollowingClosedShard(streamName, shardId); - List successiveShardRecordIterators = new ArrayList<>(); - for (Shard shard : shards) { - if (shardId.equals(shard.getParentShardId())) { - ShardCheckpoint shardCheckpoint = - new ShardCheckpoint( - streamName, - shard.getShardId(), - new StartingPoint(InitialPositionInStream.TRIM_HORIZON)); - successiveShardRecordIterators.add( - new ShardRecordsIterator(shardCheckpoint, kinesis, watermarkPolicyFactory)); - } - } - return successiveShardRecordIterators; - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClient.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClient.java deleted file mode 100644 index 88fcc7fcec35..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClient.java +++ /dev/null @@ -1,359 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.amazonaws.AmazonClientException; -import com.amazonaws.AmazonServiceException; -import com.amazonaws.services.cloudwatch.AmazonCloudWatch; -import com.amazonaws.services.cloudwatch.model.Datapoint; -import com.amazonaws.services.cloudwatch.model.Dimension; -import com.amazonaws.services.cloudwatch.model.GetMetricStatisticsRequest; -import com.amazonaws.services.cloudwatch.model.GetMetricStatisticsResult; -import com.amazonaws.services.kinesis.AmazonKinesis; -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; -import com.amazonaws.services.kinesis.clientlibrary.types.UserRecord; -import com.amazonaws.services.kinesis.model.DescribeStreamSummaryRequest; -import com.amazonaws.services.kinesis.model.ExpiredIteratorException; -import com.amazonaws.services.kinesis.model.GetRecordsRequest; -import com.amazonaws.services.kinesis.model.GetRecordsResult; -import com.amazonaws.services.kinesis.model.GetShardIteratorRequest; -import com.amazonaws.services.kinesis.model.LimitExceededException; -import com.amazonaws.services.kinesis.model.ListShardsRequest; -import com.amazonaws.services.kinesis.model.ListShardsResult; -import com.amazonaws.services.kinesis.model.ProvisionedThroughputExceededException; -import com.amazonaws.services.kinesis.model.Shard; -import com.amazonaws.services.kinesis.model.ShardFilter; -import com.amazonaws.services.kinesis.model.ShardFilterType; -import com.amazonaws.services.kinesis.model.ShardIteratorType; -import com.amazonaws.services.kinesis.model.StreamDescriptionSummary; -import java.io.IOException; -import java.util.Collections; -import java.util.Date; -import java.util.List; -import java.util.concurrent.Callable; -import java.util.function.Supplier; -import org.apache.beam.sdk.util.BackOff; -import org.apache.beam.sdk.util.BackOffUtils; -import org.apache.beam.sdk.util.FluentBackoff; -import org.apache.beam.sdk.util.Sleeper; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.joda.time.Minutes; - -/** Wraps {@link AmazonKinesis} class providing much simpler interface and proper error handling. */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class SimplifiedKinesisClient { - - private static final String KINESIS_NAMESPACE = "AWS/Kinesis"; - private static final String INCOMING_RECORDS_METRIC = "IncomingBytes"; - private static final int PERIOD_GRANULARITY_IN_SECONDS = 60; - private static final String SUM_STATISTIC = "Sum"; - private static final String STREAM_NAME_DIMENSION = "StreamName"; - private static final int LIST_SHARDS_MAX_RESULTS = 1_000; - private static final Duration - SPACING_FOR_TIMESTAMP_LIST_SHARDS_REQUEST_TO_NOT_EXCEED_TRIM_HORIZON = - Duration.standardMinutes(5); - private static final int DESCRIBE_STREAM_SUMMARY_MAX_ATTEMPTS = 10; - private static final Duration DESCRIBE_STREAM_SUMMARY_INITIAL_BACKOFF = - Duration.standardSeconds(1); - - private final AmazonKinesis kinesis; - private final AmazonCloudWatch cloudWatch; - private final Integer limit; - private final Supplier currentInstantSupplier; - - public SimplifiedKinesisClient( - AmazonKinesis kinesis, AmazonCloudWatch cloudWatch, Integer limit) { - this(kinesis, cloudWatch, limit, Instant::now); - } - - SimplifiedKinesisClient( - AmazonKinesis kinesis, - AmazonCloudWatch cloudWatch, - Integer limit, - Supplier currentInstantSupplier) { - this.kinesis = checkNotNull(kinesis, "kinesis"); - this.cloudWatch = checkNotNull(cloudWatch, "cloudWatch"); - this.limit = limit; - this.currentInstantSupplier = currentInstantSupplier; - } - - public static SimplifiedKinesisClient from(AWSClientsProvider provider, Integer limit) { - return new SimplifiedKinesisClient( - provider.getKinesisClient(), provider.getCloudWatchClient(), limit); - } - - public String getShardIterator( - final String streamName, - final String shardId, - final ShardIteratorType shardIteratorType, - final String startingSequenceNumber, - final Instant timestamp) - throws TransientKinesisException { - final Date date = timestamp != null ? timestamp.toDate() : null; - return wrapExceptions( - () -> - kinesis - .getShardIterator( - new GetShardIteratorRequest() - .withStreamName(streamName) - .withShardId(shardId) - .withShardIteratorType(shardIteratorType) - .withStartingSequenceNumber(startingSequenceNumber) - .withTimestamp(date)) - .getShardIterator()); - } - - public List listShardsAtPoint(final String streamName, final StartingPoint startingPoint) - throws TransientKinesisException { - ShardFilter shardFilter = - wrapExceptions(() -> buildShardFilterForStartingPoint(streamName, startingPoint)); - return listShards(streamName, shardFilter); - } - - private ShardFilter buildShardFilterForStartingPoint( - String streamName, StartingPoint startingPoint) throws IOException, InterruptedException { - InitialPositionInStream position = startingPoint.getPosition(); - switch (position) { - case LATEST: - return new ShardFilter().withType(ShardFilterType.AT_LATEST); - case TRIM_HORIZON: - return new ShardFilter().withType(ShardFilterType.AT_TRIM_HORIZON); - case AT_TIMESTAMP: - return buildShardFilterForTimestamp(streamName, startingPoint.getTimestamp()); - default: - throw new IllegalArgumentException( - String.format("Unrecognized '%s' position to create shard filter with", position)); - } - } - - private ShardFilter buildShardFilterForTimestamp( - String streamName, Instant startingPointTimestamp) throws IOException, InterruptedException { - StreamDescriptionSummary streamDescription = describeStreamSummary(streamName); - - Instant streamCreationTimestamp = new Instant(streamDescription.getStreamCreationTimestamp()); - if (streamCreationTimestamp.isAfter(startingPointTimestamp)) { - return new ShardFilter().withType(ShardFilterType.AT_TRIM_HORIZON); - } - - Duration retentionPeriod = Duration.standardHours(streamDescription.getRetentionPeriodHours()); - - Instant streamTrimHorizonTimestamp = - currentInstantSupplier - .get() - .minus(retentionPeriod) - .plus(SPACING_FOR_TIMESTAMP_LIST_SHARDS_REQUEST_TO_NOT_EXCEED_TRIM_HORIZON); - if (startingPointTimestamp.isAfter(streamTrimHorizonTimestamp)) { - return new ShardFilter() - .withType(ShardFilterType.AT_TIMESTAMP) - .withTimestamp(startingPointTimestamp.toDate()); - } else { - return new ShardFilter().withType(ShardFilterType.AT_TRIM_HORIZON); - } - } - - private StreamDescriptionSummary describeStreamSummary(final String streamName) - throws IOException, InterruptedException { - // DescribeStreamSummary has limits that can be hit fairly easily if we are attempting - // to configure multiple KinesisIO inputs in the same account. Retry up to - // DESCRIBE_STREAM_SUMMARY_MAX_ATTEMPTS times if we end up hitting that limit. - // - // Only pass the wrapped exception up once that limit is reached. Use FluentBackoff - // to implement the retry policy. - FluentBackoff retryBackoff = - FluentBackoff.DEFAULT - .withMaxRetries(DESCRIBE_STREAM_SUMMARY_MAX_ATTEMPTS) - .withInitialBackoff(DESCRIBE_STREAM_SUMMARY_INITIAL_BACKOFF); - BackOff backoff = retryBackoff.backoff(); - Sleeper sleeper = Sleeper.DEFAULT; - - DescribeStreamSummaryRequest request = new DescribeStreamSummaryRequest(); - request.setStreamName(streamName); - while (true) { - try { - return kinesis.describeStreamSummary(request).getStreamDescriptionSummary(); - } catch (LimitExceededException exc) { - if (!BackOffUtils.next(sleeper, backoff)) { - throw exc; - } - } - } - } - - public List listShardsFollowingClosedShard( - final String streamName, final String exclusiveStartShardId) - throws TransientKinesisException { - ShardFilter shardFilter = - new ShardFilter() - .withType(ShardFilterType.AFTER_SHARD_ID) - .withShardId(exclusiveStartShardId); - return listShards(streamName, shardFilter); - } - - private List listShards(final String streamName, final ShardFilter shardFilter) - throws TransientKinesisException { - return wrapExceptions( - () -> { - ImmutableList.Builder shardsBuilder = ImmutableList.builder(); - - String currentNextToken = null; - do { - ListShardsRequest request = new ListShardsRequest(); - request.setMaxResults(LIST_SHARDS_MAX_RESULTS); - if (currentNextToken != null) { - request.setNextToken(currentNextToken); - } else { - request.setStreamName(streamName); - } - request.setShardFilter(shardFilter); - - ListShardsResult response = kinesis.listShards(request); - List shards = response.getShards(); - shardsBuilder.addAll(shards); - currentNextToken = response.getNextToken(); - } while (currentNextToken != null); - - return shardsBuilder.build(); - }); - } - - /** - * Gets records from Kinesis and deaggregates them if needed. - * - * @return list of deaggregated records - * @throws TransientKinesisException - in case of recoverable situation - */ - public GetKinesisRecordsResult getRecords(String shardIterator, String streamName, String shardId) - throws TransientKinesisException { - return getRecords(shardIterator, streamName, shardId, limit); - } - - /** - * Gets records from Kinesis and deaggregates them if needed. - * - * @return list of deaggregated records - * @throws TransientKinesisException - in case of recoverable situation - */ - public GetKinesisRecordsResult getRecords( - final String shardIterator, - final String streamName, - final String shardId, - final Integer limit) - throws TransientKinesisException { - return wrapExceptions( - () -> { - GetRecordsResult response = - kinesis.getRecords( - new GetRecordsRequest().withShardIterator(shardIterator).withLimit(limit)); - return new GetKinesisRecordsResult( - UserRecord.deaggregate(response.getRecords()), - response.getNextShardIterator(), - response.getMillisBehindLatest(), - streamName, - shardId); - }); - } - - /** - * Gets total size in bytes of all events that remain in Kinesis stream after specified instant. - * - * @return total size in bytes of all Kinesis events after specified instant - */ - public long getBacklogBytes(String streamName, Instant countSince) - throws TransientKinesisException { - return getBacklogBytes(streamName, countSince, new Instant()); - } - - /** - * Gets total size in bytes of all events that remain in Kinesis stream between specified - * instants. - * - * @return total size in bytes of all Kinesis events after specified instant - */ - public long getBacklogBytes( - final String streamName, final Instant countSince, final Instant countTo) - throws TransientKinesisException { - return wrapExceptions( - () -> { - Minutes period = Minutes.minutesBetween(countSince, countTo); - if (period.isLessThan(Minutes.ONE)) { - return 0L; - } - - GetMetricStatisticsRequest request = - createMetricStatisticsRequest(streamName, countSince, countTo, period); - - long totalSizeInBytes = 0; - GetMetricStatisticsResult result = cloudWatch.getMetricStatistics(request); - for (Datapoint point : result.getDatapoints()) { - totalSizeInBytes += point.getSum().longValue(); - } - return totalSizeInBytes; - }); - } - - GetMetricStatisticsRequest createMetricStatisticsRequest( - String streamName, Instant countSince, Instant countTo, Minutes period) { - return new GetMetricStatisticsRequest() - .withNamespace(KINESIS_NAMESPACE) - .withMetricName(INCOMING_RECORDS_METRIC) - .withPeriod(period.getMinutes() * PERIOD_GRANULARITY_IN_SECONDS) - .withStartTime(countSince.toDate()) - .withEndTime(countTo.toDate()) - .withStatistics(Collections.singletonList(SUM_STATISTIC)) - .withDimensions( - Collections.singletonList( - new Dimension().withName(STREAM_NAME_DIMENSION).withValue(streamName))); - } - - /** - * Wraps Amazon specific exceptions into more friendly format. - * - * @throws TransientKinesisException - in case of recoverable situation, i.e. the request rate is - * too high, Kinesis remote service failed, network issue, etc. - * @throws ExpiredIteratorException - if iterator needs to be refreshed - * @throws RuntimeException - in all other cases - */ - private T wrapExceptions(Callable callable) throws TransientKinesisException { - try { - return callable.call(); - } catch (ExpiredIteratorException e) { - throw e; - } catch (LimitExceededException | ProvisionedThroughputExceededException e) { - throw new KinesisClientThrottledException( - "Too many requests to Kinesis. Wait some time and retry.", e); - } catch (AmazonServiceException e) { - if (e.getErrorType() == AmazonServiceException.ErrorType.Service) { - throw new TransientKinesisException("Kinesis backend failed. Wait some time and retry.", e); - } - throw new RuntimeException("Kinesis client side failure", e); - } catch (AmazonClientException e) { - if (e.isRetryable()) { - throw new TransientKinesisException("Retryable client failure", e); - } - throw new RuntimeException("Not retryable client failure", e); - } catch (Exception e) { - throw new RuntimeException("Unknown kinesis failure, when trying to reach kinesis", e); - } - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StartingPoint.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StartingPoint.java deleted file mode 100644 index 6fde16d7f3b9..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StartingPoint.java +++ /dev/null @@ -1,88 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; -import java.io.Serializable; -import java.util.Objects; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Instant; - -/** - * Denotes a point at which the reader should start reading from a Kinesis stream. It can be - * expressed either as an {@link InitialPositionInStream} enum constant or a timestamp, in which - * case the reader will start reading at the specified point in time. - */ -@SuppressWarnings({ - "nullness" // TODO(https://github.com/apache/beam/issues/20497) -}) -class StartingPoint implements Serializable { - - private final InitialPositionInStream position; - private final Instant timestamp; - - public StartingPoint(InitialPositionInStream position) { - this.position = checkNotNull(position, "position"); - this.timestamp = null; - } - - public StartingPoint(Instant timestamp) { - this.timestamp = checkNotNull(timestamp, "timestamp"); - this.position = InitialPositionInStream.AT_TIMESTAMP; - } - - public InitialPositionInStream getPosition() { - return position; - } - - public String getPositionName() { - return position.name(); - } - - public Instant getTimestamp() { - return timestamp; - } - - @Override - public boolean equals(@Nullable Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - StartingPoint that = (StartingPoint) o; - return position == that.position && Objects.equals(timestamp, that.timestamp); - } - - @Override - public int hashCode() { - return Objects.hash(position, timestamp); - } - - @Override - public String toString() { - if (timestamp == null) { - return position.toString(); - } else { - return "Starting at timestamp " + timestamp; - } - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StaticCheckpointGenerator.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StaticCheckpointGenerator.java deleted file mode 100644 index 9364f98eccea..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StaticCheckpointGenerator.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -/** Always returns the same instance of checkpoint. */ -class StaticCheckpointGenerator implements CheckpointGenerator { - - private final KinesisReaderCheckpoint checkpoint; - - public StaticCheckpointGenerator(KinesisReaderCheckpoint checkpoint) { - checkNotNull(checkpoint, "checkpoint"); - this.checkpoint = checkpoint; - } - - @Override - public KinesisReaderCheckpoint generate(SimplifiedKinesisClient client) { - return checkpoint; - } - - @Override - public String toString() { - return checkpoint.toString(); - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/TransientKinesisException.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/TransientKinesisException.java deleted file mode 100644 index 876acf85c998..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/TransientKinesisException.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import com.amazonaws.AmazonClientException; - -/** A transient exception thrown by Kinesis. */ -class TransientKinesisException extends Exception { - - public TransientKinesisException(String s, AmazonClientException e) { - super(s, e); - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/WatermarkParameters.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/WatermarkParameters.java deleted file mode 100644 index f604dc9dc11b..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/WatermarkParameters.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument; - -import com.google.auto.value.AutoValue; -import java.io.Serializable; -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.joda.time.Duration; -import org.joda.time.Instant; - -/** {@code WatermarkParameters} contains the parameters used for watermark computation. */ -@AutoValue -public abstract class WatermarkParameters implements Serializable { - - private static final SerializableFunction ARRIVAL_TIME_FN = - KinesisRecord::getApproximateArrivalTimestamp; - private static final Duration STANDARD_WATERMARK_IDLE_DURATION_THRESHOLD = - Duration.standardMinutes(2); - - abstract Instant getCurrentWatermark(); - - abstract Instant getEventTime(); - - abstract Instant getLastUpdateTime(); - - abstract SerializableFunction getTimestampFn(); - - abstract Duration getWatermarkIdleDurationThreshold(); - - public abstract Builder toBuilder(); - - public static Builder builder() { - return new AutoValue_WatermarkParameters.Builder() - .setCurrentWatermark(BoundedWindow.TIMESTAMP_MIN_VALUE) - .setEventTime(BoundedWindow.TIMESTAMP_MIN_VALUE) - .setTimestampFn(ARRIVAL_TIME_FN) - .setLastUpdateTime(Instant.now()) - .setWatermarkIdleDurationThreshold(STANDARD_WATERMARK_IDLE_DURATION_THRESHOLD); - } - - @AutoValue.Builder - abstract static class Builder { - abstract Builder setCurrentWatermark(Instant currentWatermark); - - abstract Builder setEventTime(Instant eventTime); - - abstract Builder setLastUpdateTime(Instant now); - - abstract Builder setWatermarkIdleDurationThreshold(Duration watermarkIdleDurationThreshold); - - abstract Builder setTimestampFn(SerializableFunction timestampFn); - - abstract WatermarkParameters build(); - } - - public static WatermarkParameters create() { - return builder().build(); - } - - /** - * Specify the {@code SerializableFunction} to extract the event time from a {@code - * KinesisRecord}. The default event timestamp is the arrival timestamp of the record. - * - * @param timestampFn Serializable function to extract the timestamp from a record. - */ - public WatermarkParameters withTimestampFn( - SerializableFunction timestampFn) { - checkArgument(timestampFn != null, "timestampFn function is null"); - return toBuilder().setTimestampFn(timestampFn).build(); - } - - /** - * Specify the watermark idle duration to consider before advancing the watermark. The default - * watermark idle duration threshold is 2 minutes. - */ - public WatermarkParameters withWatermarkIdleDurationThreshold(Duration idleDurationThreshold) { - checkArgument(idleDurationThreshold != null, "watermark idle duration threshold is null"); - return toBuilder().setWatermarkIdleDurationThreshold(idleDurationThreshold).build(); - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/WatermarkPolicy.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/WatermarkPolicy.java deleted file mode 100644 index 69ac45f0a7dc..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/WatermarkPolicy.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import java.io.Serializable; -import org.joda.time.Instant; - -/** Implement this interface to define a custom watermark calculation heuristic. */ -public interface WatermarkPolicy extends Serializable { - - Instant getWatermark(); - - void update(KinesisRecord record); -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/WatermarkPolicyFactory.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/WatermarkPolicyFactory.java deleted file mode 100644 index 62de2fe16a5e..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/WatermarkPolicyFactory.java +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import java.io.Serializable; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Ordering; -import org.joda.time.Duration; -import org.joda.time.Instant; - -/** - * Implement this interface to create a {@code WatermarkPolicy}. Used by the {@code - * ShardRecordsIterator} to create a watermark policy for every shard. - */ -public interface WatermarkPolicyFactory extends Serializable { - - WatermarkPolicy createWatermarkPolicy(); - - /** Returns an ArrivalTimeWatermarkPolicy. */ - static WatermarkPolicyFactory withArrivalTimePolicy() { - return ArrivalTimeWatermarkPolicy::new; - } - - /** - * Returns an ArrivalTimeWatermarkPolicy. - * - * @param watermarkIdleDurationThreshold watermark idle duration threshold. - */ - static WatermarkPolicyFactory withArrivalTimePolicy(Duration watermarkIdleDurationThreshold) { - return () -> new ArrivalTimeWatermarkPolicy(watermarkIdleDurationThreshold); - } - - /** Returns an ProcessingTimeWatermarkPolicy. */ - static WatermarkPolicyFactory withProcessingTimePolicy() { - return ProcessingTimeWatermarkPolicy::new; - } - - /** - * Returns an custom WatermarkPolicyFactory. - * - * @param watermarkParameters Watermark parameters (timestamp extractor, watermark lag) for the - * policy. - */ - static WatermarkPolicyFactory withCustomWatermarkPolicy(WatermarkParameters watermarkParameters) { - return () -> new CustomWatermarkPolicy(watermarkParameters); - } - - /** - * ArrivalTimeWatermarkPolicy uses {@link CustomWatermarkPolicy} for watermark computation. It - * uses the arrival time of the record as the event time for watermark calculations. - */ - class ArrivalTimeWatermarkPolicy implements WatermarkPolicy { - private final CustomWatermarkPolicy watermarkPolicy; - - ArrivalTimeWatermarkPolicy() { - this.watermarkPolicy = - new CustomWatermarkPolicy( - WatermarkParameters.create() - .withTimestampFn(KinesisRecord::getApproximateArrivalTimestamp)); - } - - ArrivalTimeWatermarkPolicy(Duration idleDurationThreshold) { - WatermarkParameters watermarkParameters = - WatermarkParameters.create() - .withTimestampFn(KinesisRecord::getApproximateArrivalTimestamp) - .withWatermarkIdleDurationThreshold(idleDurationThreshold); - this.watermarkPolicy = new CustomWatermarkPolicy(watermarkParameters); - } - - @Override - public Instant getWatermark() { - return watermarkPolicy.getWatermark(); - } - - @Override - public void update(KinesisRecord record) { - watermarkPolicy.update(record); - } - } - - /** - * CustomWatermarkPolicy uses parameters defined in {@link WatermarkParameters} to compute - * watermarks. This can be used as a standard heuristic to compute watermarks. Used by {@link - * ArrivalTimeWatermarkPolicy}. - */ - class CustomWatermarkPolicy implements WatermarkPolicy { - private WatermarkParameters watermarkParameters; - - CustomWatermarkPolicy(WatermarkParameters watermarkParameters) { - this.watermarkParameters = watermarkParameters; - } - - @Override - public Instant getWatermark() { - Instant now = Instant.now(); - Instant watermarkIdleThreshold = - now.minus(watermarkParameters.getWatermarkIdleDurationThreshold()); - - Instant newWatermark = - watermarkParameters.getLastUpdateTime().isBefore(watermarkIdleThreshold) - ? watermarkIdleThreshold - : watermarkParameters.getEventTime(); - - if (newWatermark.isAfter(watermarkParameters.getCurrentWatermark())) { - watermarkParameters = - watermarkParameters.toBuilder().setCurrentWatermark(newWatermark).build(); - } - return watermarkParameters.getCurrentWatermark(); - } - - @Override - public void update(KinesisRecord record) { - watermarkParameters = - watermarkParameters - .toBuilder() - .setEventTime( - Ordering.natural() - .max( - watermarkParameters.getEventTime(), - watermarkParameters.getTimestampFn().apply(record))) - .setLastUpdateTime(Instant.now()) - .build(); - } - } - - /** Watermark policy where the processing time is used as the event time. */ - class ProcessingTimeWatermarkPolicy implements WatermarkPolicy { - @Override - public Instant getWatermark() { - return Instant.now(); - } - - @Override - public void update(KinesisRecord record) { - // do nothing - } - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/package-info.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/package-info.java deleted file mode 100644 index 6a36686cd8ab..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/package-info.java +++ /dev/null @@ -1,20 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** Transforms for reading and writing from Amazon Kinesis. */ -package org.apache.beam.sdk.io.kinesis; diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/serde/AwsModule.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/serde/AwsModule.java deleted file mode 100644 index d8396d5da924..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/serde/AwsModule.java +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis.serde; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkNotNull; - -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.BasicSessionCredentials; -import com.amazonaws.auth.ClasspathPropertiesFileCredentialsProvider; -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; -import com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper; -import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; -import com.amazonaws.auth.PropertiesFileCredentialsProvider; -import com.amazonaws.auth.SystemPropertiesCredentialsProvider; -import com.amazonaws.auth.profile.ProfileCredentialsProvider; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import com.fasterxml.jackson.core.JsonGenerator; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.DeserializationContext; -import com.fasterxml.jackson.databind.JsonDeserializer; -import com.fasterxml.jackson.databind.JsonSerializer; -import com.fasterxml.jackson.databind.Module; -import com.fasterxml.jackson.databind.SerializerProvider; -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; -import com.fasterxml.jackson.databind.annotation.JsonSerialize; -import com.fasterxml.jackson.databind.jsontype.TypeDeserializer; -import com.fasterxml.jackson.databind.jsontype.TypeSerializer; -import com.fasterxml.jackson.databind.module.SimpleModule; -import java.io.IOException; -import java.util.Map; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet; -import org.apache.commons.lang3.reflect.FieldUtils; - -/** - * A Jackson {@link Module} that registers a {@link JsonSerializer} and {@link JsonDeserializer} for - * {@link AWSCredentialsProvider} and some subclasses. The serialized form is a JSON map. - * - *

Note: This module is a stripped down version of {@link AwsModule} in 'amazon-web-services' - * excluding support for STS. - */ -class AwsModule extends SimpleModule { - - private static final String AWS_ACCESS_KEY_ID = "awsAccessKeyId"; - private static final String AWS_SECRET_KEY = "awsSecretKey"; - private static final String SESSION_TOKEN = "sessionToken"; - private static final String CREDENTIALS_FILE_PATH = "credentialsFilePath"; - - @SuppressWarnings({"nullness"}) - AwsModule() { - super("AwsModule"); - setMixInAnnotation(AWSCredentialsProvider.class, AWSCredentialsProviderMixin.class); - } - - /** A mixin to add Jackson annotations to {@link AWSCredentialsProvider}. */ - @JsonDeserialize(using = AWSCredentialsProviderDeserializer.class) - @JsonSerialize(using = AWSCredentialsProviderSerializer.class) - @JsonTypeInfo(use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY) - private static class AWSCredentialsProviderMixin {} - - private static class AWSCredentialsProviderDeserializer - extends JsonDeserializer { - - @Override - public AWSCredentialsProvider deserialize(JsonParser jsonParser, DeserializationContext context) - throws IOException { - return context.readValue(jsonParser, AWSCredentialsProvider.class); - } - - @Override - public AWSCredentialsProvider deserializeWithType( - JsonParser jsonParser, DeserializationContext context, TypeDeserializer typeDeserializer) - throws IOException { - Map asMap = - checkNotNull(jsonParser.readValueAs(new TypeReference>() {})); - - String typeNameKey = typeDeserializer.getPropertyName(); - String typeName = getNotNull(asMap, typeNameKey, "unknown"); - if (hasName(AWSStaticCredentialsProvider.class, typeName)) { - boolean isSession = asMap.containsKey(SESSION_TOKEN); - if (isSession) { - return new AWSStaticCredentialsProvider( - new BasicSessionCredentials( - getNotNull(asMap, AWS_ACCESS_KEY_ID, typeName), - getNotNull(asMap, AWS_SECRET_KEY, typeName), - getNotNull(asMap, SESSION_TOKEN, typeName))); - } else { - return new AWSStaticCredentialsProvider( - new BasicAWSCredentials( - getNotNull(asMap, AWS_ACCESS_KEY_ID, typeName), - getNotNull(asMap, AWS_SECRET_KEY, typeName))); - } - } else if (hasName(PropertiesFileCredentialsProvider.class, typeName)) { - return new PropertiesFileCredentialsProvider( - getNotNull(asMap, CREDENTIALS_FILE_PATH, typeName)); - } else if (hasName(ClasspathPropertiesFileCredentialsProvider.class, typeName)) { - return new ClasspathPropertiesFileCredentialsProvider( - getNotNull(asMap, CREDENTIALS_FILE_PATH, typeName)); - } else if (hasName(DefaultAWSCredentialsProviderChain.class, typeName)) { - return DefaultAWSCredentialsProviderChain.getInstance(); - } else if (hasName(EnvironmentVariableCredentialsProvider.class, typeName)) { - return new EnvironmentVariableCredentialsProvider(); - } else if (hasName(SystemPropertiesCredentialsProvider.class, typeName)) { - return new SystemPropertiesCredentialsProvider(); - } else if (hasName(ProfileCredentialsProvider.class, typeName)) { - return new ProfileCredentialsProvider(); - } else if (hasName(EC2ContainerCredentialsProviderWrapper.class, typeName)) { - return new EC2ContainerCredentialsProviderWrapper(); - } else { - throw new IOException( - String.format("AWS credential provider type '%s' is not supported", typeName)); - } - } - - @SuppressWarnings({"nullness"}) - private String getNotNull(Map map, String key, String typeName) { - return checkNotNull( - map.get(key), "AWS credentials provider type '%s' is missing '%s'", typeName, key); - } - - private boolean hasName(Class clazz, String typeName) { - return typeName.equals(clazz.getSimpleName()); - } - } - - private static class AWSCredentialsProviderSerializer - extends JsonSerializer { - // These providers are singletons, so don't require any serialization, other than type. - private static final ImmutableSet SINGLETON_CREDENTIAL_PROVIDERS = - ImmutableSet.of( - DefaultAWSCredentialsProviderChain.class, - EnvironmentVariableCredentialsProvider.class, - SystemPropertiesCredentialsProvider.class, - ProfileCredentialsProvider.class, - EC2ContainerCredentialsProviderWrapper.class); - - @Override - public void serialize( - AWSCredentialsProvider credentialsProvider, - JsonGenerator jsonGenerator, - SerializerProvider serializers) - throws IOException { - serializers.defaultSerializeValue(credentialsProvider, jsonGenerator); - } - - @Override - public void serializeWithType( - AWSCredentialsProvider credentialsProvider, - JsonGenerator jsonGenerator, - SerializerProvider serializers, - TypeSerializer typeSerializer) - throws IOException { - // BEAM-11958 Use deprecated Jackson APIs to be compatible with older versions of jackson - typeSerializer.writeTypePrefixForObject(credentialsProvider, jsonGenerator); - - Class providerClass = credentialsProvider.getClass(); - if (providerClass.equals(AWSStaticCredentialsProvider.class)) { - AWSCredentials credentials = credentialsProvider.getCredentials(); - if (credentials.getClass().equals(BasicSessionCredentials.class)) { - BasicSessionCredentials sessionCredentials = (BasicSessionCredentials) credentials; - jsonGenerator.writeStringField(AWS_ACCESS_KEY_ID, sessionCredentials.getAWSAccessKeyId()); - jsonGenerator.writeStringField(AWS_SECRET_KEY, sessionCredentials.getAWSSecretKey()); - jsonGenerator.writeStringField(SESSION_TOKEN, sessionCredentials.getSessionToken()); - } else { - jsonGenerator.writeStringField(AWS_ACCESS_KEY_ID, credentials.getAWSAccessKeyId()); - jsonGenerator.writeStringField(AWS_SECRET_KEY, credentials.getAWSSecretKey()); - } - } else if (providerClass.equals(PropertiesFileCredentialsProvider.class)) { - jsonGenerator.writeStringField( - CREDENTIALS_FILE_PATH, readProviderField(credentialsProvider, CREDENTIALS_FILE_PATH)); - } else if (providerClass.equals(ClasspathPropertiesFileCredentialsProvider.class)) { - jsonGenerator.writeStringField( - CREDENTIALS_FILE_PATH, readProviderField(credentialsProvider, CREDENTIALS_FILE_PATH)); - } else if (!SINGLETON_CREDENTIAL_PROVIDERS.contains(credentialsProvider.getClass())) { - throw new IllegalArgumentException( - "Unsupported AWS credentials provider type " + credentialsProvider.getClass()); - } - // BEAM-11958 Use deprecated Jackson APIs to be compatible with older versions of jackson - typeSerializer.writeTypeSuffixForObject(credentialsProvider, jsonGenerator); - } - - private String readProviderField(AWSCredentialsProvider provider, String fieldName) - throws IOException { - try { - return (String) checkNotNull(FieldUtils.readField(provider, fieldName, true)); - } catch (NullPointerException | IllegalArgumentException | IllegalAccessException e) { - throw new IOException( - String.format( - "Failed to access private field '%s' of AWS credential provider type '%s' with reflection", - fieldName, provider.getClass().getSimpleName()), - e); - } - } - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/serde/AwsSerializableUtils.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/serde/AwsSerializableUtils.java deleted file mode 100644 index 37f7b4d65b46..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/serde/AwsSerializableUtils.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis.serde; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.io.IOException; - -/** Utilities for working with AWS Serializables. */ -public class AwsSerializableUtils { - - public static String serialize(AWSCredentialsProvider awsCredentialsProvider) { - ObjectMapper om = new ObjectMapper(); - om.registerModule(new AwsModule()); - try { - return om.writeValueAsString(awsCredentialsProvider); - } catch (JsonProcessingException e) { - throw new IllegalArgumentException("AwsCredentialsProvider can not be serialized to Json", e); - } - } - - public static AWSCredentialsProvider deserialize(String awsCredentialsProviderSerialized) { - ObjectMapper om = new ObjectMapper(); - om.registerModule(new AwsModule()); - try { - return om.readValue(awsCredentialsProviderSerialized, AWSCredentialsProvider.class); - } catch (IOException e) { - throw new IllegalArgumentException( - "AwsCredentialsProvider can not be deserialized from Json", e); - } - } -} diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/serde/package-info.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/serde/package-info.java deleted file mode 100644 index 4384814b0818..000000000000 --- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/serde/package-info.java +++ /dev/null @@ -1,19 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -/** Defines serializers / deserializers for AWS. */ -package org.apache.beam.sdk.io.kinesis.serde; diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/AmazonKinesisMock.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/AmazonKinesisMock.java deleted file mode 100644 index 704a5ab07ba9..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/AmazonKinesisMock.java +++ /dev/null @@ -1,504 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static java.lang.Integer.parseInt; -import static java.lang.Math.min; -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists.transform; -import static org.apache.commons.lang.builder.HashCodeBuilder.reflectionHashCode; - -import com.amazonaws.AmazonWebServiceRequest; -import com.amazonaws.ResponseMetadata; -import com.amazonaws.http.HttpResponse; -import com.amazonaws.http.SdkHttpMetadata; -import com.amazonaws.regions.Region; -import com.amazonaws.services.cloudwatch.AmazonCloudWatch; -import com.amazonaws.services.kinesis.AmazonKinesis; -import com.amazonaws.services.kinesis.model.AddTagsToStreamRequest; -import com.amazonaws.services.kinesis.model.AddTagsToStreamResult; -import com.amazonaws.services.kinesis.model.CreateStreamRequest; -import com.amazonaws.services.kinesis.model.CreateStreamResult; -import com.amazonaws.services.kinesis.model.DecreaseStreamRetentionPeriodRequest; -import com.amazonaws.services.kinesis.model.DecreaseStreamRetentionPeriodResult; -import com.amazonaws.services.kinesis.model.DeleteStreamRequest; -import com.amazonaws.services.kinesis.model.DeleteStreamResult; -import com.amazonaws.services.kinesis.model.DeregisterStreamConsumerRequest; -import com.amazonaws.services.kinesis.model.DeregisterStreamConsumerResult; -import com.amazonaws.services.kinesis.model.DescribeLimitsRequest; -import com.amazonaws.services.kinesis.model.DescribeLimitsResult; -import com.amazonaws.services.kinesis.model.DescribeStreamConsumerRequest; -import com.amazonaws.services.kinesis.model.DescribeStreamConsumerResult; -import com.amazonaws.services.kinesis.model.DescribeStreamRequest; -import com.amazonaws.services.kinesis.model.DescribeStreamResult; -import com.amazonaws.services.kinesis.model.DescribeStreamSummaryRequest; -import com.amazonaws.services.kinesis.model.DescribeStreamSummaryResult; -import com.amazonaws.services.kinesis.model.DisableEnhancedMonitoringRequest; -import com.amazonaws.services.kinesis.model.DisableEnhancedMonitoringResult; -import com.amazonaws.services.kinesis.model.EnableEnhancedMonitoringRequest; -import com.amazonaws.services.kinesis.model.EnableEnhancedMonitoringResult; -import com.amazonaws.services.kinesis.model.GetRecordsRequest; -import com.amazonaws.services.kinesis.model.GetRecordsResult; -import com.amazonaws.services.kinesis.model.GetShardIteratorRequest; -import com.amazonaws.services.kinesis.model.GetShardIteratorResult; -import com.amazonaws.services.kinesis.model.IncreaseStreamRetentionPeriodRequest; -import com.amazonaws.services.kinesis.model.IncreaseStreamRetentionPeriodResult; -import com.amazonaws.services.kinesis.model.LimitExceededException; -import com.amazonaws.services.kinesis.model.ListShardsRequest; -import com.amazonaws.services.kinesis.model.ListShardsResult; -import com.amazonaws.services.kinesis.model.ListStreamConsumersRequest; -import com.amazonaws.services.kinesis.model.ListStreamConsumersResult; -import com.amazonaws.services.kinesis.model.ListStreamsRequest; -import com.amazonaws.services.kinesis.model.ListStreamsResult; -import com.amazonaws.services.kinesis.model.ListTagsForStreamRequest; -import com.amazonaws.services.kinesis.model.ListTagsForStreamResult; -import com.amazonaws.services.kinesis.model.MergeShardsRequest; -import com.amazonaws.services.kinesis.model.MergeShardsResult; -import com.amazonaws.services.kinesis.model.PutRecordRequest; -import com.amazonaws.services.kinesis.model.PutRecordResult; -import com.amazonaws.services.kinesis.model.PutRecordsRequest; -import com.amazonaws.services.kinesis.model.PutRecordsResult; -import com.amazonaws.services.kinesis.model.Record; -import com.amazonaws.services.kinesis.model.RegisterStreamConsumerRequest; -import com.amazonaws.services.kinesis.model.RegisterStreamConsumerResult; -import com.amazonaws.services.kinesis.model.RemoveTagsFromStreamRequest; -import com.amazonaws.services.kinesis.model.RemoveTagsFromStreamResult; -import com.amazonaws.services.kinesis.model.Shard; -import com.amazonaws.services.kinesis.model.ShardIteratorType; -import com.amazonaws.services.kinesis.model.SplitShardRequest; -import com.amazonaws.services.kinesis.model.SplitShardResult; -import com.amazonaws.services.kinesis.model.StartStreamEncryptionRequest; -import com.amazonaws.services.kinesis.model.StartStreamEncryptionResult; -import com.amazonaws.services.kinesis.model.StopStreamEncryptionRequest; -import com.amazonaws.services.kinesis.model.StopStreamEncryptionResult; -import com.amazonaws.services.kinesis.model.UpdateShardCountRequest; -import com.amazonaws.services.kinesis.model.UpdateShardCountResult; -import com.amazonaws.services.kinesis.model.UpdateStreamModeRequest; -import com.amazonaws.services.kinesis.model.UpdateStreamModeResult; -import com.amazonaws.services.kinesis.producer.IKinesisProducer; -import com.amazonaws.services.kinesis.producer.KinesisProducerConfiguration; -import com.amazonaws.services.kinesis.waiters.AmazonKinesisWaiters; -import java.io.Serializable; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.stream.Collectors; -import java.util.stream.IntStream; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Splitter; -import org.apache.commons.lang.builder.EqualsBuilder; -import org.checkerframework.checker.nullness.qual.Nullable; -import org.joda.time.Instant; -import org.mockito.Mockito; - -/** Mock implementation of {@link AmazonKinesis} for testing. */ -class AmazonKinesisMock implements AmazonKinesis { - - static class TestData implements Serializable { - - private final String data; - private final Instant arrivalTimestamp; - private final String sequenceNumber; - - public TestData(KinesisRecord record) { - this( - new String(record.getData().array(), StandardCharsets.UTF_8), - record.getApproximateArrivalTimestamp(), - record.getSequenceNumber()); - } - - public TestData(String data, Instant arrivalTimestamp, String sequenceNumber) { - this.data = data; - this.arrivalTimestamp = arrivalTimestamp; - this.sequenceNumber = sequenceNumber; - } - - public Record convertToRecord() { - return new Record() - .withApproximateArrivalTimestamp(arrivalTimestamp.toDate()) - .withData(ByteBuffer.wrap(data.getBytes(StandardCharsets.UTF_8))) - .withSequenceNumber(sequenceNumber) - .withPartitionKey(""); - } - - @Override - public boolean equals(@Nullable Object obj) { - return EqualsBuilder.reflectionEquals(this, obj); - } - - @Override - public int hashCode() { - return reflectionHashCode(this); - } - - @Override - public String toString() { - return "TestData{" - + "data='" - + data - + '\'' - + ", arrivalTimestamp=" - + arrivalTimestamp - + ", sequenceNumber='" - + sequenceNumber - + '\'' - + '}'; - } - } - - static class Provider implements AWSClientsProvider { - - private final List> shardedData; - private final int numberOfRecordsPerGet; - - private boolean expectedListShardsLimitExceededException; - - public Provider(List> shardedData, int numberOfRecordsPerGet) { - this.shardedData = shardedData; - this.numberOfRecordsPerGet = numberOfRecordsPerGet; - } - - /** Simulate limit exceeded exception for ListShards. */ - public Provider withExpectedListShardsLimitExceededException() { - expectedListShardsLimitExceededException = true; - return this; - } - - @Override - public AmazonKinesis getKinesisClient() { - AmazonKinesisMock client = - new AmazonKinesisMock( - shardedData.stream() - .map(testData -> transform(testData, TestData::convertToRecord)) - .collect(Collectors.toList()), - numberOfRecordsPerGet); - if (expectedListShardsLimitExceededException) { - client = client.withExpectedListShardsLimitExceededException(); - } - return client; - } - - @Override - public AmazonCloudWatch getCloudWatchClient() { - return Mockito.mock(AmazonCloudWatch.class); - } - - @Override - public IKinesisProducer createKinesisProducer(KinesisProducerConfiguration config) { - throw new RuntimeException("Not implemented"); - } - } - - private final List> shardedData; - private final int numberOfRecordsPerGet; - - private boolean expectedListShardsLimitExceededException; - - public AmazonKinesisMock(List> shardedData, int numberOfRecordsPerGet) { - this.shardedData = shardedData; - this.numberOfRecordsPerGet = numberOfRecordsPerGet; - } - - public AmazonKinesisMock withExpectedListShardsLimitExceededException() { - this.expectedListShardsLimitExceededException = true; - return this; - } - - @Override - public GetRecordsResult getRecords(GetRecordsRequest getRecordsRequest) { - List shardIteratorParts = - Splitter.on(':').splitToList(getRecordsRequest.getShardIterator()); - int shardId = parseInt(shardIteratorParts.get(0)); - int startingRecord = parseInt(shardIteratorParts.get(1)); - List shardData = shardedData.get(shardId); - - int toIndex = min(startingRecord + numberOfRecordsPerGet, shardData.size()); - int fromIndex = min(startingRecord, toIndex); - return new GetRecordsResult() - .withRecords(shardData.subList(fromIndex, toIndex)) - .withNextShardIterator(String.format("%s:%s", shardId, toIndex)) - .withMillisBehindLatest(0L); - } - - @Override - public GetShardIteratorResult getShardIterator(GetShardIteratorRequest getShardIteratorRequest) { - ShardIteratorType shardIteratorType = - ShardIteratorType.fromValue(getShardIteratorRequest.getShardIteratorType()); - - String shardIterator; - if (shardIteratorType == ShardIteratorType.TRIM_HORIZON) { - shardIterator = String.format("%s:%s", getShardIteratorRequest.getShardId(), 0); - } else { - throw new RuntimeException("Not implemented"); - } - - return new GetShardIteratorResult().withShardIterator(shardIterator); - } - - @Override - public DescribeStreamResult describeStream(String streamName, String exclusiveStartShardId) { - throw new RuntimeException("Not implemented"); - } - - @Override - public void setEndpoint(String endpoint) {} - - @Override - public void setRegion(Region region) {} - - @Override - public AddTagsToStreamResult addTagsToStream(AddTagsToStreamRequest addTagsToStreamRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public CreateStreamResult createStream(CreateStreamRequest createStreamRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public CreateStreamResult createStream(String streamName, Integer shardCount) { - throw new RuntimeException("Not implemented"); - } - - @Override - public DecreaseStreamRetentionPeriodResult decreaseStreamRetentionPeriod( - DecreaseStreamRetentionPeriodRequest decreaseStreamRetentionPeriodRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public DeleteStreamResult deleteStream(DeleteStreamRequest deleteStreamRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public DeleteStreamResult deleteStream(String streamName) { - throw new RuntimeException("Not implemented"); - } - - @Override - public DeregisterStreamConsumerResult deregisterStreamConsumer( - DeregisterStreamConsumerRequest deregisterStreamConsumerRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public DescribeLimitsResult describeLimits(DescribeLimitsRequest describeLimitsRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public DescribeStreamResult describeStream(DescribeStreamRequest describeStreamRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public DescribeStreamResult describeStream(String streamName) { - return describeStream(streamName, null); - } - - @Override - public DescribeStreamResult describeStream( - String streamName, Integer limit, String exclusiveStartShardId) { - throw new RuntimeException("Not implemented"); - } - - @Override - public DescribeStreamConsumerResult describeStreamConsumer( - DescribeStreamConsumerRequest describeStreamConsumerRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public DescribeStreamSummaryResult describeStreamSummary( - DescribeStreamSummaryRequest describeStreamSummaryRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public DisableEnhancedMonitoringResult disableEnhancedMonitoring( - DisableEnhancedMonitoringRequest disableEnhancedMonitoringRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public EnableEnhancedMonitoringResult enableEnhancedMonitoring( - EnableEnhancedMonitoringRequest enableEnhancedMonitoringRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public GetShardIteratorResult getShardIterator( - String streamName, String shardId, String shardIteratorType) { - throw new RuntimeException("Not implemented"); - } - - @Override - public GetShardIteratorResult getShardIterator( - String streamName, String shardId, String shardIteratorType, String startingSequenceNumber) { - throw new RuntimeException("Not implemented"); - } - - @Override - public IncreaseStreamRetentionPeriodResult increaseStreamRetentionPeriod( - IncreaseStreamRetentionPeriodRequest increaseStreamRetentionPeriodRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public ListShardsResult listShards(ListShardsRequest listShardsRequest) { - if (expectedListShardsLimitExceededException) { - throw new LimitExceededException("ListShards rate limit exceeded"); - } - - ListShardsResult result = new ListShardsResult(); - - List shards = - IntStream.range(0, shardedData.size()) - .boxed() - .map(i -> new Shard().withShardId(Integer.toString(i))) - .collect(Collectors.toList()); - result.setShards(shards); - - HttpResponse response = new HttpResponse(null, null); - response.setStatusCode(200); - result.setSdkHttpMetadata(SdkHttpMetadata.from(response)); - return result; - } - - @Override - public ListStreamConsumersResult listStreamConsumers( - ListStreamConsumersRequest listStreamConsumersRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public ListStreamsResult listStreams(ListStreamsRequest listStreamsRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public ListStreamsResult listStreams() { - throw new RuntimeException("Not implemented"); - } - - @Override - public ListStreamsResult listStreams(String exclusiveStartStreamName) { - throw new RuntimeException("Not implemented"); - } - - @Override - public ListStreamsResult listStreams(Integer limit, String exclusiveStartStreamName) { - throw new RuntimeException("Not implemented"); - } - - @Override - public ListTagsForStreamResult listTagsForStream( - ListTagsForStreamRequest listTagsForStreamRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public MergeShardsResult mergeShards(MergeShardsRequest mergeShardsRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public MergeShardsResult mergeShards( - String streamName, String shardToMerge, String adjacentShardToMerge) { - throw new RuntimeException("Not implemented"); - } - - @Override - public PutRecordResult putRecord(PutRecordRequest putRecordRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public PutRecordResult putRecord(String streamName, ByteBuffer data, String partitionKey) { - throw new RuntimeException("Not implemented"); - } - - @Override - public PutRecordResult putRecord( - String streamName, ByteBuffer data, String partitionKey, String sequenceNumberForOrdering) { - throw new RuntimeException("Not implemented"); - } - - @Override - public PutRecordsResult putRecords(PutRecordsRequest putRecordsRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public RegisterStreamConsumerResult registerStreamConsumer( - RegisterStreamConsumerRequest registerStreamConsumerRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public RemoveTagsFromStreamResult removeTagsFromStream( - RemoveTagsFromStreamRequest removeTagsFromStreamRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public SplitShardResult splitShard(SplitShardRequest splitShardRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public SplitShardResult splitShard( - String streamName, String shardToSplit, String newStartingHashKey) { - throw new RuntimeException("Not implemented"); - } - - @Override - public StartStreamEncryptionResult startStreamEncryption( - StartStreamEncryptionRequest startStreamEncryptionRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public StopStreamEncryptionResult stopStreamEncryption( - StopStreamEncryptionRequest stopStreamEncryptionRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public UpdateShardCountResult updateShardCount(UpdateShardCountRequest updateShardCountRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public UpdateStreamModeResult updateStreamMode(UpdateStreamModeRequest updateStreamModeRequest) { - throw new RuntimeException("Not implemented"); - } - - @Override - public void shutdown() {} - - @Override - public ResponseMetadata getCachedResponseMetadata(AmazonWebServiceRequest request) { - throw new RuntimeException("Not implemented"); - } - - @Override - public AmazonKinesisWaiters waiters() { - throw new RuntimeException("Not implemented"); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/BasicKinesisClientProviderTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/BasicKinesisClientProviderTest.java deleted file mode 100644 index 938dc9b6f8b6..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/BasicKinesisClientProviderTest.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.junit.Assert.assertEquals; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.regions.Regions; -import org.apache.beam.sdk.util.SerializableUtils; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests on {@link org.apache.beam.sdk.io.aws2.kinesis.BasicKinesisProvider}. */ -@RunWith(JUnit4.class) -public class BasicKinesisClientProviderTest { - private static final String ACCESS_KEY_ID = "ACCESS_KEY_ID"; - private static final String SECRET_ACCESS_KEY = "SECRET_ACCESS_KEY"; - - @Test - public void testSerialization() { - AWSCredentialsProvider awsCredentialsProvider = - new AWSStaticCredentialsProvider(new BasicAWSCredentials(ACCESS_KEY_ID, SECRET_ACCESS_KEY)); - - BasicKinesisProvider kinesisProvider = - new BasicKinesisProvider(awsCredentialsProvider, Regions.AP_EAST_1, null, true); - - byte[] serializedBytes = SerializableUtils.serializeToByteArray(kinesisProvider); - - BasicKinesisProvider kinesisProviderDeserialized = - (BasicKinesisProvider) - SerializableUtils.deserializeFromByteArray(serializedBytes, "Basic Kinesis Provider"); - - assertEquals(kinesisProvider, kinesisProviderDeserialized); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/CustomOptionalTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/CustomOptionalTest.java deleted file mode 100644 index 00e6b9334025..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/CustomOptionalTest.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import com.google.common.testing.EqualsTester; -import java.util.NoSuchElementException; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests {@link CustomOptional}. */ -@RunWith(JUnit4.class) -public class CustomOptionalTest { - - @Test(expected = NoSuchElementException.class) - public void absentThrowsNoSuchElementExceptionOnGet() { - CustomOptional.absent().get(); - } - - @Test - public void testEqualsAndHashCode() { - new EqualsTester() - .addEqualityGroup(CustomOptional.absent(), CustomOptional.absent()) - .addEqualityGroup(CustomOptional.of(3), CustomOptional.of(3)) - .addEqualityGroup(CustomOptional.of(11)) - .addEqualityGroup(CustomOptional.of("3")) - .testEquals(); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGeneratorTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGeneratorTest.java deleted file mode 100644 index 1426f3b52197..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGeneratorTest.java +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.Mockito.when; - -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; -import com.amazonaws.services.kinesis.model.Shard; -import java.util.List; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.mockito.junit.MockitoJUnitRunner; - -/** * */ -@RunWith(MockitoJUnitRunner.class) -public class DynamicCheckpointGeneratorTest { - - @Mock private SimplifiedKinesisClient kinesisClient; - @Mock private Shard shard1, shard2, shard3; - - @Test - public void shouldMapAllShardsToCheckpoints() throws Exception { - when(shard1.getShardId()).thenReturn("shard-01"); - when(shard2.getShardId()).thenReturn("shard-02"); - when(shard3.getShardId()).thenReturn("shard-03"); - List shards = ImmutableList.of(shard1, shard2, shard3); - String streamName = "stream"; - StartingPoint startingPoint = new StartingPoint(InitialPositionInStream.LATEST); - when(kinesisClient.listShardsAtPoint(streamName, startingPoint)).thenReturn(shards); - DynamicCheckpointGenerator underTest = - new DynamicCheckpointGenerator(streamName, startingPoint); - - KinesisReaderCheckpoint checkpoint = underTest.generate(kinesisClient); - - assertThat(checkpoint).hasSize(3); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisIOIT.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisIOIT.java deleted file mode 100644 index b2ec825f7d85..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisIOIT.java +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.client.builder.AwsClientBuilder; -import com.amazonaws.regions.Regions; -import com.amazonaws.services.kinesis.AmazonKinesis; -import com.amazonaws.services.kinesis.AmazonKinesisClientBuilder; -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; -import java.io.Serializable; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.List; -import java.util.Random; -import org.apache.beam.sdk.io.GenerateSequence; -import org.apache.beam.sdk.io.common.HashingFn; -import org.apache.beam.sdk.io.common.TestRow; -import org.apache.beam.sdk.options.PipelineOptionsFactory; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Combine; -import org.apache.beam.sdk.transforms.Count; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.AfterClass; -import org.junit.BeforeClass; -import org.junit.Rule; -import org.junit.Test; -import org.junit.function.ThrowingRunnable; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; -import org.slf4j.LoggerFactory; -import org.testcontainers.containers.localstack.LocalStackContainer; -import org.testcontainers.containers.localstack.LocalStackContainer.Service; -import org.testcontainers.utility.DockerImageName; - -/** - * Integration test, that writes and reads data to and from real Kinesis. You need to provide {@link - * KinesisTestOptions} in order to run this if you want to test it with production setup. By default - * when no options are provided an instance of localstack is used. - */ -@RunWith(JUnit4.class) -public class KinesisIOIT implements Serializable { - private static final String LOCALSTACK_VERSION = "0.12.18"; - - @Rule public TestPipeline pipelineWrite = TestPipeline.create(); - @Rule public TestPipeline pipelineRead = TestPipeline.create(); - - // Will be run in reverse order - private static final List teardownTasks = new ArrayList<>(); - - private static KinesisTestOptions options; - - private static Instant now = Instant.now(); - - @BeforeClass - public static void setup() throws Exception { - PipelineOptionsFactory.register(KinesisTestOptions.class); - options = TestPipeline.testingPipelineOptions().as(KinesisTestOptions.class); - - if (options.getUseLocalstack()) { - setupLocalstack(); - } - if (options.getCreateStream()) { - AmazonKinesis kinesisClient = createKinesisClient(); - teardownTasks.add(kinesisClient::shutdown); - - createStream(kinesisClient); - teardownTasks.add(() -> deleteStream(kinesisClient)); - } - } - - @AfterClass - public static void teardown() { - Lists.reverse(teardownTasks).forEach(KinesisIOIT::safeRun); - teardownTasks.clear(); - } - - /** Test which write and then read data for a Kinesis stream. */ - @Test - public void testWriteThenRead() { - runWrite(); - runRead(); - } - - /** Write test dataset into Kinesis stream. */ - private void runWrite() { - pipelineWrite - .apply("Generate Sequence", GenerateSequence.from(0).to(options.getNumberOfRecords())) - .apply("Prepare TestRows", ParDo.of(new TestRow.DeterministicallyConstructTestRowFn())) - .apply("Prepare Kinesis input records", ParDo.of(new ConvertToBytes())) - .apply( - "Write to Kinesis", - KinesisIO.write() - .withStreamName(options.getAwsKinesisStream()) - .withPartitioner(new RandomPartitioner()) - .withAWSClientsProvider( - options.getAwsAccessKey(), - options.getAwsSecretKey(), - Regions.fromName(options.getAwsKinesisRegion()), - options.getAwsServiceEndpoint(), - options.getAwsVerifyCertificate())); - - pipelineWrite.run().waitUntilFinish(); - } - - /** Read test dataset from Kinesis stream. */ - private void runRead() { - PCollection output = - pipelineRead.apply( - KinesisIO.read() - .withStreamName(options.getAwsKinesisStream()) - .withAWSClientsProvider( - options.getAwsAccessKey(), - options.getAwsSecretKey(), - Regions.fromName(options.getAwsKinesisRegion()), - options.getAwsServiceEndpoint(), - options.getAwsVerifyCertificate()) - .withMaxNumRecords(options.getNumberOfRecords()) - // to prevent endless running in case of error - .withMaxReadTime(Duration.standardMinutes(10L)) - .withInitialPositionInStream(InitialPositionInStream.AT_TIMESTAMP) - .withInitialTimestampInStream(now) - .withRequestRecordsLimit(1000)); - - PAssert.thatSingleton(output.apply("Count All", Count.globally())) - .isEqualTo((long) options.getNumberOfRecords()); - - PCollection consolidatedHashcode = - output - .apply(ParDo.of(new ExtractDataValues())) - .apply("Hash row contents", Combine.globally(new HashingFn()).withoutDefaults()); - - PAssert.that(consolidatedHashcode) - .containsInAnyOrder(TestRow.getExpectedHashForRowCount(options.getNumberOfRecords())); - - pipelineRead.run().waitUntilFinish(); - } - - /** Necessary setup for localstack environment. */ - private static void setupLocalstack() { - // For some unclear reason localstack requires a timestamp in seconds - now = Instant.ofEpochMilli(Long.divideUnsigned(now.getMillis(), 1000L)); - - LocalStackContainer kinesisContainer = - new LocalStackContainer( - DockerImageName.parse("localstack/localstack").withTag(LOCALSTACK_VERSION)) - .withServices(Service.KINESIS) - .withEnv("USE_SSL", "true") - .withStartupAttempts(3); - - kinesisContainer.start(); - teardownTasks.add(() -> kinesisContainer.stop()); - - options.setAwsServiceEndpoint(kinesisContainer.getEndpointOverride(Service.KINESIS).toString()); - options.setAwsKinesisRegion(kinesisContainer.getRegion()); - options.setAwsAccessKey(kinesisContainer.getAccessKey()); - options.setAwsSecretKey(kinesisContainer.getSecretKey()); - options.setAwsVerifyCertificate(false); - options.setCreateStream(true); - } - - private static AmazonKinesis createKinesisClient() { - AWSCredentials credentials = - new BasicAWSCredentials(options.getAwsAccessKey(), options.getAwsSecretKey()); - AmazonKinesisClientBuilder clientBuilder = - AmazonKinesisClientBuilder.standard() - .withCredentials(new AWSStaticCredentialsProvider(credentials)); - - if (options.getAwsServiceEndpoint() != null) { - clientBuilder.setEndpointConfiguration( - new AwsClientBuilder.EndpointConfiguration( - options.getAwsServiceEndpoint(), options.getAwsKinesisRegion())); - } else { - clientBuilder.setRegion(options.getAwsKinesisRegion()); - } - - return clientBuilder.build(); - } - - private static void createStream(AmazonKinesis kinesisClient) throws Exception { - kinesisClient.createStream(options.getAwsKinesisStream(), options.getNumberOfShards()); - int attempts = 10; - for (int i = 0; i <= attempts; ++i) { - String streamStatus = - kinesisClient - .describeStream(options.getAwsKinesisStream()) - .getStreamDescription() - .getStreamStatus(); - if ("ACTIVE".equals(streamStatus)) { - return; - } - Thread.sleep(1000L); - } - throw new RuntimeException("Unable to initialize stream"); - } - - private static void deleteStream(AmazonKinesis kinesisClient) { - kinesisClient.deleteStream(options.getAwsKinesisStream()); - } - - private static void safeRun(ThrowingRunnable task) { - try { - task.run(); - } catch (Throwable e) { - LoggerFactory.getLogger(KinesisIOIT.class).warn("Cleanup task failed", e); - } - } - - /** Produces test rows. */ - private static class ConvertToBytes extends DoFn { - @ProcessElement - public void processElement(ProcessContext c) { - c.output(String.valueOf(c.element().name()).getBytes(StandardCharsets.UTF_8)); - } - } - - /** Read rows from Table. */ - private static class ExtractDataValues extends DoFn { - @ProcessElement - public void processElement(ProcessContext c) { - c.output(new String(c.element().getDataAsBytes(), StandardCharsets.UTF_8)); - } - } - - private static final class RandomPartitioner implements KinesisPartitioner { - @Override - public String getPartitionKey(byte[] value) { - Random rand = new Random(); - int n = rand.nextInt(options.getNumberOfShards()) + 1; - return String.valueOf(n); - } - - @Override - public String getExplicitHashKey(byte[] value) { - return null; - } - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisIOReadTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisIOReadTest.java deleted file mode 100644 index fdacc62bdb4a..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisIOReadTest.java +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.junit.Assert.assertEquals; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; -import com.amazonaws.regions.Regions; -import org.apache.beam.sdk.io.kinesis.KinesisIO.Read; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests for non trivial builder variants of {@link KinesisIO#read}. */ -@RunWith(JUnit4.class) -public class KinesisIOReadTest { - private static final String ACCESS_KEY_ID = "ACCESS_KEY_ID"; - private static final String SECRET_ACCESS_KEY = "SECRET_ACCESS_KEY"; - private static final boolean VERIFICATION_DISABLED = false; - - @Test - public void testReadWithBasicCredentials() { - Regions region = Regions.US_EAST_1; - Read read = - KinesisIO.read().withAWSClientsProvider(ACCESS_KEY_ID, SECRET_ACCESS_KEY, region); - - assertEquals( - read.getAWSClientsProvider(), - new BasicKinesisProvider( - new AWSStaticCredentialsProvider( - new BasicAWSCredentials(ACCESS_KEY_ID, SECRET_ACCESS_KEY)), - region, - null, - true)); - } - - @Test - public void testReadWithCredentialsProvider() { - Regions region = Regions.US_EAST_1; - AWSCredentialsProvider credentialsProvider = DefaultAWSCredentialsProviderChain.getInstance(); - - Read read = KinesisIO.read().withAWSClientsProvider(credentialsProvider, region); - - assertEquals( - read.getAWSClientsProvider(), - new BasicKinesisProvider(credentialsProvider, region, null, true)); - } - - @Test - public void testReadWithBasicCredentialsAndCustomEndpoint() { - String customEndpoint = "localhost:9999"; - Regions region = Regions.US_WEST_1; - - Read read = - KinesisIO.read() - .withAWSClientsProvider(ACCESS_KEY_ID, SECRET_ACCESS_KEY, region, customEndpoint); - - assertEquals( - read.getAWSClientsProvider(), - new BasicKinesisProvider( - new AWSStaticCredentialsProvider( - new BasicAWSCredentials(ACCESS_KEY_ID, SECRET_ACCESS_KEY)), - region, - customEndpoint, - true)); - } - - @Test - public void testReadWithCredentialsProviderAndCustomEndpoint() { - String customEndpoint = "localhost:9999"; - Regions region = Regions.US_WEST_1; - AWSCredentialsProvider credentialsProvider = DefaultAWSCredentialsProviderChain.getInstance(); - - Read read = - KinesisIO.read().withAWSClientsProvider(credentialsProvider, region, customEndpoint); - - assertEquals( - read.getAWSClientsProvider(), - new BasicKinesisProvider(credentialsProvider, region, customEndpoint, true)); - } - - @Test - public void testReadWithBasicCredentialsAndVerificationDisabled() { - String customEndpoint = "localhost:9999"; - Regions region = Regions.US_WEST_1; - - Read read = - KinesisIO.read() - .withAWSClientsProvider( - ACCESS_KEY_ID, SECRET_ACCESS_KEY, region, customEndpoint, VERIFICATION_DISABLED); - - assertEquals( - read.getAWSClientsProvider(), - new BasicKinesisProvider( - new AWSStaticCredentialsProvider( - new BasicAWSCredentials(ACCESS_KEY_ID, SECRET_ACCESS_KEY)), - region, - customEndpoint, - VERIFICATION_DISABLED)); - } - - @Test - public void testReadWithCredentialsProviderAndVerificationDisabled() { - String customEndpoint = "localhost:9999"; - Regions region = Regions.US_WEST_1; - AWSCredentialsProvider credentialsProvider = DefaultAWSCredentialsProviderChain.getInstance(); - - Read read = - KinesisIO.read() - .withAWSClientsProvider( - credentialsProvider, region, customEndpoint, VERIFICATION_DISABLED); - - assertEquals( - read.getAWSClientsProvider(), - new BasicKinesisProvider( - credentialsProvider, region, customEndpoint, VERIFICATION_DISABLED)); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisIOWriteTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisIOWriteTest.java deleted file mode 100644 index 6884b199a1e3..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisIOWriteTest.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.junit.Assert.assertEquals; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; -import com.amazonaws.regions.Regions; -import org.apache.beam.sdk.io.kinesis.KinesisIO.Write; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests for non trivial builder variants of {@link KinesisIO#write()}. */ -@RunWith(JUnit4.class) -public class KinesisIOWriteTest { - private static final String ACCESS_KEY_ID = "ACCESS_KEY_ID"; - private static final String SECRET_KEY = "SECRET_KEY"; - private static final boolean VERIFICATION_DISABLED = false; - - @Test - public void testReadWithBasicCredentials() { - Regions region = Regions.US_EAST_1; - Write write = KinesisIO.write().withAWSClientsProvider(ACCESS_KEY_ID, SECRET_KEY, region); - - assertEquals( - write.getAWSClientsProvider(), - new BasicKinesisProvider( - new AWSStaticCredentialsProvider(new BasicAWSCredentials(ACCESS_KEY_ID, SECRET_KEY)), - region, - null, - true)); - } - - @Test - public void testReadWithCredentialsProvider() { - Regions region = Regions.US_EAST_1; - AWSCredentialsProvider credentialsProvider = DefaultAWSCredentialsProviderChain.getInstance(); - - Write write = KinesisIO.write().withAWSClientsProvider(credentialsProvider, region); - - assertEquals( - write.getAWSClientsProvider(), - new BasicKinesisProvider(credentialsProvider, region, null, true)); - } - - @Test - public void testReadWithBasicCredentialsAndCustomEndpoint() { - String customEndpoint = "localhost:9999"; - Regions region = Regions.US_WEST_1; - BasicAWSCredentials credentials = new BasicAWSCredentials(ACCESS_KEY_ID, SECRET_KEY); - - Write write = - KinesisIO.write().withAWSClientsProvider(ACCESS_KEY_ID, SECRET_KEY, region, customEndpoint); - - assertEquals( - write.getAWSClientsProvider(), - new BasicKinesisProvider( - new AWSStaticCredentialsProvider(credentials), region, customEndpoint, true)); - } - - @Test - public void testReadWithCredentialsProviderAndCustomEndpoint() { - String customEndpoint = "localhost:9999"; - Regions region = Regions.US_WEST_1; - AWSCredentialsProvider credentialsProvider = DefaultAWSCredentialsProviderChain.getInstance(); - - Write write = - KinesisIO.write().withAWSClientsProvider(credentialsProvider, region, customEndpoint); - - assertEquals( - write.getAWSClientsProvider(), - new BasicKinesisProvider(credentialsProvider, region, customEndpoint, true)); - } - - @Test - public void testReadWithBasicCredentialsAndVerificationDisabled() { - String customEndpoint = "localhost:9999"; - Regions region = Regions.US_WEST_1; - BasicAWSCredentials credentials = new BasicAWSCredentials(ACCESS_KEY_ID, SECRET_KEY); - - Write write = - KinesisIO.write() - .withAWSClientsProvider( - ACCESS_KEY_ID, SECRET_KEY, region, customEndpoint, VERIFICATION_DISABLED); - - assertEquals( - write.getAWSClientsProvider(), - new BasicKinesisProvider( - new AWSStaticCredentialsProvider(credentials), - region, - customEndpoint, - VERIFICATION_DISABLED)); - } - - @Test - public void testReadWithCredentialsProviderAndVerificationDisabled() { - String customEndpoint = "localhost:9999"; - Regions region = Regions.US_WEST_1; - AWSCredentialsProvider credentialsProvider = DefaultAWSCredentialsProviderChain.getInstance(); - - Write write = - KinesisIO.write() - .withAWSClientsProvider( - credentialsProvider, region, customEndpoint, VERIFICATION_DISABLED); - - assertEquals( - write.getAWSClientsProvider(), - new BasicKinesisProvider( - credentialsProvider, region, customEndpoint, VERIFICATION_DISABLED)); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisMockReadTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisMockReadTest.java deleted file mode 100644 index 77cabe858f52..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisMockReadTest.java +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists.newArrayList; - -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; -import java.util.List; -import org.apache.beam.sdk.Pipeline.PipelineExecutionException; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.DoFn; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.joda.time.DateTime; -import org.junit.Rule; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests {@link AmazonKinesisMock}. */ -@RunWith(JUnit4.class) -public class KinesisMockReadTest { - - @Rule public final transient TestPipeline p = TestPipeline.create(); - - private final int noOfShards = 3; - private final int noOfEventsPerShard = 100; - - @Test - public void readsDataFromMockKinesis() { - List> testData = defaultTestData(); - verifyReadWithProvider(new AmazonKinesisMock.Provider(testData, 10), testData); - } - - @Test(expected = PipelineExecutionException.class) - public void readsDataFromMockKinesisWithLimitFailure() { - List> testData = defaultTestData(); - verifyReadWithProvider( - new AmazonKinesisMock.Provider(testData, 10).withExpectedListShardsLimitExceededException(), - testData); - } - - public void verifyReadWithProvider( - AmazonKinesisMock.Provider provider, List> testData) { - PCollection result = - p.apply( - KinesisIO.read() - .withStreamName("stream") - .withInitialPositionInStream(InitialPositionInStream.TRIM_HORIZON) - .withAWSClientsProvider(provider) - .withArrivalTimeWatermarkPolicy() - .withMaxNumRecords(noOfShards * noOfEventsPerShard)) - .apply(ParDo.of(new KinesisRecordToTestData())); - PAssert.that(result).containsInAnyOrder(Iterables.concat(testData)); - p.run(); - } - - static class KinesisRecordToTestData extends DoFn { - - @ProcessElement - public void processElement(ProcessContext c) throws Exception { - c.output(new AmazonKinesisMock.TestData(c.element())); - } - } - - private List> defaultTestData() { - return provideTestData(noOfShards, noOfEventsPerShard); - } - - private List> provideTestData( - int noOfShards, int noOfEventsPerShard) { - - int seqNumber = 0; - - List> shardedData = newArrayList(); - for (int i = 0; i < noOfShards; ++i) { - List shardData = newArrayList(); - shardedData.add(shardData); - - DateTime arrival = DateTime.now(); - for (int j = 0; j < noOfEventsPerShard; ++j) { - arrival = arrival.plusSeconds(1); - - seqNumber++; - shardData.add( - new AmazonKinesisMock.TestData( - Integer.toString(seqNumber), arrival.toInstant(), Integer.toString(seqNumber))); - } - } - - return shardedData; - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisMockWriteTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisMockWriteTest.java deleted file mode 100644 index 33b0c3a096ab..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisMockWriteTest.java +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.junit.Assert.assertEquals; -import static org.mockito.Mockito.mock; - -import com.amazonaws.services.cloudwatch.AmazonCloudWatch; -import com.amazonaws.services.kinesis.AmazonKinesis; -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; -import com.amazonaws.services.kinesis.producer.IKinesisProducer; -import com.amazonaws.services.kinesis.producer.KinesisProducerConfiguration; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Properties; -import org.apache.beam.sdk.testing.PAssert; -import org.apache.beam.sdk.testing.TestPipeline; -import org.apache.beam.sdk.transforms.Create; -import org.apache.beam.sdk.transforms.ParDo; -import org.apache.beam.sdk.values.PCollection; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.junit.Before; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests for {@link KinesisIO.Write}. */ -@RunWith(JUnit4.class) -public class KinesisMockWriteTest { - private static final String STREAM = "BEAM"; - private static final String PARTITION_KEY = "partitionKey"; - - @Rule public final transient TestPipeline p = TestPipeline.create(); - @Rule public final transient TestPipeline p2 = TestPipeline.create(); - @Rule public ExpectedException thrown = ExpectedException.none(); - - @Before - public void beforeTest() { - KinesisServiceMock kinesisService = KinesisServiceMock.getInstance(); - kinesisService.init(STREAM, 1); - } - - @Test - public void testWriteBuildsCorrectly() { - Properties properties = new Properties(); - properties.setProperty("KinesisEndpoint", "localhost"); - properties.setProperty("KinesisPort", "4567"); - - KinesisIO.Write write = - KinesisIO.write() - .withStreamName(STREAM) - .withPartitionKey(PARTITION_KEY) - .withPartitioner(new BasicKinesisPartitioner()) - .withAWSClientsProvider(new FakeKinesisProvider()) - .withProducerProperties(properties) - .withRetries(10); - - assertEquals(STREAM, write.getStreamName()); - assertEquals(PARTITION_KEY, write.getPartitionKey()); - assertEquals(properties, write.getProducerProperties()); - assertEquals(FakeKinesisProvider.class, write.getAWSClientsProvider().getClass()); - assertEquals(BasicKinesisPartitioner.class, write.getPartitioner().getClass()); - assertEquals(10, write.getRetries()); - - assertEquals("localhost", write.getProducerProperties().getProperty("KinesisEndpoint")); - assertEquals("4567", write.getProducerProperties().getProperty("KinesisPort")); - } - - @Test - public void testWriteValidationFailsMissingStreamName() { - KinesisIO.Write write = - KinesisIO.write() - .withPartitionKey(PARTITION_KEY) - .withAWSClientsProvider(new FakeKinesisProvider()); - - thrown.expect(IllegalArgumentException.class); - write.expand(null); - } - - @Test - public void testWriteValidationFailsMissingPartitioner() { - KinesisIO.Write write = - KinesisIO.write().withStreamName(STREAM).withAWSClientsProvider(new FakeKinesisProvider()); - - thrown.expect(IllegalArgumentException.class); - write.expand(null); - } - - @Test - public void testWriteValidationFailsPartitionerAndPartitioneKey() { - KinesisIO.Write write = - KinesisIO.write() - .withStreamName(STREAM) - .withPartitionKey(PARTITION_KEY) - .withPartitioner(new BasicKinesisPartitioner()) - .withAWSClientsProvider(new FakeKinesisProvider()); - - thrown.expect(IllegalArgumentException.class); - write.expand(null); - } - - @Test - public void testWriteValidationFailsMissingAWSClientsProvider() { - KinesisIO.Write write = - KinesisIO.write().withPartitionKey(PARTITION_KEY).withStreamName(STREAM); - - thrown.expect(IllegalArgumentException.class); - write.expand(null); - } - - @Test - public void testSetInvalidProperty() { - Properties properties = new Properties(); - properties.setProperty("KinesisPort", "qwe"); - - KinesisIO.Write write = - KinesisIO.write() - .withStreamName(STREAM) - .withPartitionKey(PARTITION_KEY) - .withAWSClientsProvider(new FakeKinesisProvider()) - .withProducerProperties(properties); - - thrown.expect(IllegalArgumentException.class); - write.expand(null); - } - - @Test - public void testWrite() { - KinesisServiceMock kinesisService = KinesisServiceMock.getInstance(); - - Properties properties = new Properties(); - properties.setProperty("KinesisEndpoint", "localhost"); - properties.setProperty("KinesisPort", "4567"); - properties.setProperty("VerifyCertificate", "false"); - - Iterable data = - ImmutableList.of( - "1".getBytes(StandardCharsets.UTF_8), - "2".getBytes(StandardCharsets.UTF_8), - "3".getBytes(StandardCharsets.UTF_8)); - p.apply(Create.of(data)) - .apply( - KinesisIO.write() - .withStreamName(STREAM) - .withPartitionKey(PARTITION_KEY) - .withAWSClientsProvider(new FakeKinesisProvider()) - .withProducerProperties(properties)); - p.run().waitUntilFinish(); - - assertEquals(3, kinesisService.getAddedRecords().get()); - } - - @Test - public void testWriteFailed() { - Iterable data = ImmutableList.of("1".getBytes(StandardCharsets.UTF_8)); - p.apply(Create.of(data)) - .apply( - KinesisIO.write() - .withStreamName(STREAM) - .withPartitionKey(PARTITION_KEY) - .withAWSClientsProvider(new FakeKinesisProvider().setFailedFlush(true)) - .withRetries(2)); - - thrown.expect(RuntimeException.class); - p.run().waitUntilFinish(); - } - - @Test - public void testWriteAndReadFromMockKinesis() { - KinesisServiceMock kinesisService = KinesisServiceMock.getInstance(); - - Iterable data = - ImmutableList.of( - "1".getBytes(StandardCharsets.UTF_8), "2".getBytes(StandardCharsets.UTF_8)); - p.apply(Create.of(data)) - .apply( - KinesisIO.write() - .withStreamName(STREAM) - .withPartitionKey(PARTITION_KEY) - .withAWSClientsProvider(new FakeKinesisProvider())); - p.run().waitUntilFinish(); - assertEquals(2, kinesisService.getAddedRecords().get()); - - List> testData = kinesisService.getShardedData(); - - int noOfShards = 1; - int noOfEventsPerShard = 2; - PCollection result = - p2.apply( - KinesisIO.read() - .withStreamName(STREAM) - .withInitialPositionInStream(InitialPositionInStream.TRIM_HORIZON) - .withAWSClientsProvider(new AmazonKinesisMock.Provider(testData, 10)) - .withMaxNumRecords(noOfShards * noOfEventsPerShard)) - .apply(ParDo.of(new KinesisMockReadTest.KinesisRecordToTestData())); - PAssert.that(result).containsInAnyOrder(Iterables.concat(testData)); - p2.run().waitUntilFinish(); - } - - private static final class BasicKinesisPartitioner implements KinesisPartitioner { - @Override - public String getPartitionKey(byte[] value) { - return String.valueOf(value.length); - } - - @Override - public String getExplicitHashKey(byte[] value) { - return null; - } - } - - private static final class FakeKinesisProvider implements AWSClientsProvider { - private boolean isFailedFlush = false; - - public FakeKinesisProvider() {} - - public FakeKinesisProvider setFailedFlush(boolean failedFlush) { - isFailedFlush = failedFlush; - return this; - } - - @Override - public AmazonKinesis getKinesisClient() { - return mock(AmazonKinesis.class); - } - - @Override - public AmazonCloudWatch getCloudWatchClient() { - throw new RuntimeException("Not implemented"); - } - - @Override - public IKinesisProducer createKinesisProducer(KinesisProducerConfiguration config) { - return new KinesisProducerMock(config, isFailedFlush); - } - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisProducerMock.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisProducerMock.java deleted file mode 100644 index 17c8c1ddb815..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisProducerMock.java +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import com.amazonaws.services.kinesis.producer.IKinesisProducer; -import com.amazonaws.services.kinesis.producer.KinesisProducerConfiguration; -import com.amazonaws.services.kinesis.producer.Metric; -import com.amazonaws.services.kinesis.producer.UserRecord; -import com.amazonaws.services.kinesis.producer.UserRecordResult; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.SettableFuture; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.atomic.AtomicInteger; -import org.joda.time.DateTime; - -/** Simple mock implementation of {@link IKinesisProducer} for testing. */ -public class KinesisProducerMock implements IKinesisProducer { - - private boolean isFailedFlush = false; - - private List addedRecords = Collections.synchronizedList(new ArrayList<>()); - - private KinesisServiceMock kinesisService = KinesisServiceMock.getInstance(); - - private AtomicInteger seqNumber = new AtomicInteger(0); - - public KinesisProducerMock() {} - - public KinesisProducerMock(KinesisProducerConfiguration config, boolean isFailedFlush) { - this.isFailedFlush = isFailedFlush; - this.seqNumber.set(0); - } - - @Override - public ListenableFuture addUserRecord( - String stream, String partitionKey, ByteBuffer data) { - throw new UnsupportedOperationException("Not implemented"); - } - - @Override - public ListenableFuture addUserRecord(UserRecord userRecord) { - throw new UnsupportedOperationException("Not implemented"); - } - - @Override - public synchronized ListenableFuture addUserRecord( - String stream, String partitionKey, String explicitHashKey, ByteBuffer data) { - seqNumber.incrementAndGet(); - SettableFuture f = SettableFuture.create(); - f.set( - new UserRecordResult( - new ArrayList<>(), String.valueOf(seqNumber.get()), explicitHashKey, !isFailedFlush)); - - if (kinesisService.getExistedStream().equals(stream)) { - addedRecords.add(new UserRecord(stream, partitionKey, explicitHashKey, data)); - } - return f; - } - - @Override - public int getOutstandingRecordsCount() { - return addedRecords.size(); - } - - @Override - public List getMetrics(String metricName, int windowSeconds) - throws InterruptedException, ExecutionException { - throw new UnsupportedOperationException("Not implemented"); - } - - @Override - public List getMetrics(String metricName) - throws InterruptedException, ExecutionException { - throw new UnsupportedOperationException("Not implemented"); - } - - @Override - public List getMetrics() throws InterruptedException, ExecutionException { - throw new UnsupportedOperationException("Not implemented"); - } - - @Override - public List getMetrics(int windowSeconds) - throws InterruptedException, ExecutionException { - throw new UnsupportedOperationException("Not implemented"); - } - - @Override - public void destroy() {} - - @Override - public void flush(String stream) { - throw new UnsupportedOperationException("Not implemented"); - } - - @Override - public synchronized void flush() { - DateTime arrival = DateTime.now(); - for (int i = 0; i < addedRecords.size(); i++) { - UserRecord record = addedRecords.get(i); - arrival = arrival.plusSeconds(1); - kinesisService.addShardedData(record.getData(), arrival); - addedRecords.remove(i); - } - } - - @Override - public synchronized void flushSync() { - flush(); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpointTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpointTest.java deleted file mode 100644 index 61212fb05570..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpointTest.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static java.util.Arrays.asList; -import static org.assertj.core.api.Assertions.assertThat; - -import java.util.Iterator; -import java.util.List; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Iterables; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.mockito.junit.MockitoJUnitRunner; - -/** * */ -@RunWith(MockitoJUnitRunner.class) -public class KinesisReaderCheckpointTest { - - @Mock private ShardCheckpoint a, b, c; - - private KinesisReaderCheckpoint checkpoint; - - @Before - public void setUp() { - checkpoint = new KinesisReaderCheckpoint(asList(a, b, c)); - } - - @Test - public void splitsCheckpointAccordingly() { - verifySplitInto(1); - verifySplitInto(2); - verifySplitInto(3); - verifySplitInto(4); - } - - @Test(expected = UnsupportedOperationException.class) - public void isImmutable() { - Iterator iterator = checkpoint.iterator(); - iterator.remove(); - } - - private void verifySplitInto(int size) { - List split = checkpoint.splitInto(size); - assertThat(Iterables.concat(split)).containsOnly(a, b, c); - assertThat(split).hasSize(Math.min(size, 3)); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderTest.java deleted file mode 100644 index 64f0fe7c6538..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderTest.java +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static java.util.Arrays.asList; -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.spy; -import static org.mockito.Mockito.when; - -import java.io.IOException; -import java.util.NoSuchElementException; -import org.apache.beam.sdk.io.UnboundedSource; -import org.apache.beam.sdk.transforms.windowing.BoundedWindow; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.mockito.junit.MockitoJUnitRunner; - -/** Tests {@link KinesisReader}. */ -@RunWith(MockitoJUnitRunner.Silent.class) -public class KinesisReaderTest { - - @Mock private SimplifiedKinesisClient kinesis; - @Mock private CheckpointGenerator generator; - @Mock private ShardCheckpoint firstCheckpoint, secondCheckpoint; - @Mock private KinesisRecord a, b, c, d; - @Mock private KinesisSource kinesisSource; - @Mock private ShardReadersPool shardReadersPool; - - private KinesisReader reader; - - @Before - public void setUp() throws TransientKinesisException { - when(generator.generate(kinesis)) - .thenReturn(new KinesisReaderCheckpoint(asList(firstCheckpoint, secondCheckpoint))); - when(shardReadersPool.nextRecord()).thenReturn(CustomOptional.absent()); - when(a.getApproximateArrivalTimestamp()).thenReturn(Instant.now()); - when(b.getApproximateArrivalTimestamp()).thenReturn(Instant.now()); - when(c.getApproximateArrivalTimestamp()).thenReturn(Instant.now()); - when(d.getApproximateArrivalTimestamp()).thenReturn(Instant.now()); - - reader = spy(createReader(Duration.ZERO)); - } - - private KinesisReader createReader(Duration backlogBytesCheckThreshold) { - return new KinesisReader( - kinesis, - generator, - kinesisSource, - WatermarkPolicyFactory.withArrivalTimePolicy(), - RateLimitPolicyFactory.withoutLimiter(), - Duration.ZERO, - backlogBytesCheckThreshold, - ShardReadersPool.DEFAULT_CAPACITY_PER_SHARD) { - @Override - ShardReadersPool createShardReadersPool() { - return shardReadersPool; - } - }; - } - - @Test - public void startReturnsFalseIfNoDataAtTheBeginning() throws IOException { - assertThat(reader.start()).isFalse(); - } - - @Test(expected = NoSuchElementException.class) - public void throwsNoSuchElementExceptionIfNoData() throws IOException { - reader.start(); - reader.getCurrent(); - } - - @Test - public void startReturnsTrueIfSomeDataAvailable() throws IOException { - when(shardReadersPool.nextRecord()) - .thenReturn(CustomOptional.of(a)) - .thenReturn(CustomOptional.absent()); - - assertThat(reader.start()).isTrue(); - } - - @Test - public void readsThroughAllDataAvailable() throws IOException { - when(shardReadersPool.nextRecord()) - .thenReturn(CustomOptional.of(c)) - .thenReturn(CustomOptional.absent()) - .thenReturn(CustomOptional.of(a)) - .thenReturn(CustomOptional.absent()) - .thenReturn(CustomOptional.of(d)) - .thenReturn(CustomOptional.of(b)) - .thenReturn(CustomOptional.absent()); - - assertThat(reader.start()).isTrue(); - assertThat(reader.getCurrent()).isEqualTo(c); - assertThat(reader.advance()).isFalse(); - assertThat(reader.advance()).isTrue(); - assertThat(reader.getCurrent()).isEqualTo(a); - assertThat(reader.advance()).isFalse(); - assertThat(reader.advance()).isTrue(); - assertThat(reader.getCurrent()).isEqualTo(d); - assertThat(reader.advance()).isTrue(); - assertThat(reader.getCurrent()).isEqualTo(b); - assertThat(reader.advance()).isFalse(); - } - - @Test - public void returnsCurrentWatermark() throws IOException { - Instant expectedWatermark = new Instant(123456L); - when(shardReadersPool.getWatermark()).thenReturn(expectedWatermark); - - reader.start(); - Instant currentWatermark = reader.getWatermark(); - - assertThat(currentWatermark).isEqualTo(expectedWatermark); - } - - @Test - public void getSplitBacklogBytesShouldReturnLastSeenValueWhenKinesisExceptionsOccur() - throws TransientKinesisException, IOException { - reader.start(); - when(kinesisSource.getStreamName()).thenReturn("stream1"); - when(shardReadersPool.getLatestRecordTimestamp()) - .thenReturn(Instant.now().minus(Duration.standardMinutes(1))); - when(kinesis.getBacklogBytes(eq("stream1"), any(Instant.class))) - .thenReturn(10L) - .thenThrow(TransientKinesisException.class) - .thenReturn(20L); - - assertThat(reader.getSplitBacklogBytes()).isEqualTo(10); - assertThat(reader.getSplitBacklogBytes()).isEqualTo(10); - assertThat(reader.getSplitBacklogBytes()).isEqualTo(20); - } - - @Test - public void getSplitBacklogBytesShouldReturnLastSeenValueWhenCalledFrequently() - throws TransientKinesisException, IOException { - KinesisReader backlogCachingReader = spy(createReader(Duration.standardSeconds(30))); - backlogCachingReader.start(); - when(shardReadersPool.getLatestRecordTimestamp()) - .thenReturn(Instant.now().minus(Duration.standardMinutes(1))); - when(kinesisSource.getStreamName()).thenReturn("stream1"); - when(kinesis.getBacklogBytes(eq("stream1"), any(Instant.class))) - .thenReturn(10L) - .thenReturn(20L); - - assertThat(backlogCachingReader.getSplitBacklogBytes()).isEqualTo(10); - assertThat(backlogCachingReader.getSplitBacklogBytes()).isEqualTo(10); - } - - @Test - public void getSplitBacklogBytesShouldReturnBacklogUnknown() - throws IOException, TransientKinesisException { - reader.start(); - when(kinesisSource.getStreamName()).thenReturn("stream1"); - when(shardReadersPool.getLatestRecordTimestamp()) - .thenReturn(BoundedWindow.TIMESTAMP_MIN_VALUE) - .thenReturn(Instant.now().minus(Duration.standardMinutes(1))); - when(kinesis.getBacklogBytes(eq("stream1"), any(Instant.class))).thenReturn(10L); - - assertThat(reader.getSplitBacklogBytes()) - .isEqualTo(UnboundedSource.UnboundedReader.BACKLOG_UNKNOWN); - assertThat(reader.getSplitBacklogBytes()).isEqualTo(10); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoderTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoderTest.java deleted file mode 100644 index 7df3643050ba..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoderTest.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import org.apache.beam.sdk.testing.CoderProperties; -import org.joda.time.Instant; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests {@link KinesisRecordCoder}. */ -@RunWith(JUnit4.class) -public class KinesisRecordCoderTest { - - @Test - public void encodingAndDecodingWorks() throws Exception { - KinesisRecord record = - new KinesisRecord( - ByteBuffer.wrap("data".getBytes(StandardCharsets.UTF_8)), - "sequence", - 128L, - "partition", - Instant.now(), - Instant.now(), - "stream", - "shard"); - CoderProperties.coderDecodeEncodeEqual(new KinesisRecordCoder(), record); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisServiceMock.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisServiceMock.java deleted file mode 100644 index dcbe4224b630..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisServiceMock.java +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists.newArrayList; - -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.concurrent.atomic.AtomicInteger; -import org.joda.time.DateTime; - -/** Simple mock implementation of Kinesis service for testing, singletone. */ -public class KinesisServiceMock { - private static KinesisServiceMock instance; - - // Mock stream where client is supposed to write - private String existedStream; - - private AtomicInteger addedRecords = new AtomicInteger(0); - private AtomicInteger seqNumber = new AtomicInteger(0); - private List> shardedData; - - private KinesisServiceMock() {} - - public static synchronized KinesisServiceMock getInstance() { - if (instance == null) { - instance = new KinesisServiceMock(); - } - return instance; - } - - public synchronized void init(String stream, int shardsNum) { - existedStream = stream; - addedRecords.set(0); - seqNumber.set(0); - shardedData = newArrayList(); - for (int i = 0; i < shardsNum; i++) { - List shardData = newArrayList(); - shardedData.add(shardData); - } - } - - public AtomicInteger getAddedRecords() { - return addedRecords; - } - - public String getExistedStream() { - return existedStream; - } - - public synchronized void addShardedData(ByteBuffer data, DateTime arrival) { - String dataString = StandardCharsets.UTF_8.decode(data).toString(); - - List shardData = shardedData.get(0); - - seqNumber.incrementAndGet(); - AmazonKinesisMock.TestData testData = - new AmazonKinesisMock.TestData( - dataString, arrival.toInstant(), Integer.toString(seqNumber.get())); - shardData.add(testData); - - addedRecords.incrementAndGet(); - } - - public synchronized List> getShardedData() { - return shardedData; - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisTestOptions.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisTestOptions.java deleted file mode 100644 index 2ba932b35a3d..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisTestOptions.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import org.apache.beam.sdk.options.Default; -import org.apache.beam.sdk.options.Description; -import org.apache.beam.sdk.testing.TestPipelineOptions; -import org.checkerframework.checker.nullness.qual.Nullable; - -/** Options for Kinesis integration tests. */ -public interface KinesisTestOptions extends TestPipelineOptions { - - @Description("AWS region where Kinesis stream resided") - @Default.String("aws-kinesis-region") - String getAwsKinesisRegion(); - - void setAwsKinesisRegion(String value); - - @Description("Kinesis stream name") - @Default.String("aws-kinesis-stream") - String getAwsKinesisStream(); - - void setAwsKinesisStream(String value); - - @Description("AWS secret key") - @Default.String("aws-secret-key") - String getAwsSecretKey(); - - void setAwsSecretKey(String value); - - @Description("AWS access key") - @Default.String("aws-access-key") - String getAwsAccessKey(); - - void setAwsAccessKey(String value); - - @Description("Aws service endpoint") - @Nullable - String getAwsServiceEndpoint(); - - void setAwsServiceEndpoint(String awsServiceEndpoint); - - @Description("Flag for certificate verification") - @Default.Boolean(true) - Boolean getAwsVerifyCertificate(); - - void setAwsVerifyCertificate(Boolean awsVerifyCertificate); - - @Description("Number of shards of stream") - @Default.Integer(2) - Integer getNumberOfShards(); - - void setNumberOfShards(Integer count); - - @Description("Number of records that will be written and read by the test") - @Default.Integer(1000) - Integer getNumberOfRecords(); - - void setNumberOfRecords(Integer count); - - @Description("Use localstack. Disable to test with real Kinesis") - @Default.Boolean(true) - Boolean getUseLocalstack(); - - void setUseLocalstack(Boolean useLocalstack); - - @Description("Create stream. Enabled when using localstack") - @Default.Boolean(false) - Boolean getCreateStream(); - - void setCreateStream(Boolean createStream); -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RateLimitPolicyFactoryTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RateLimitPolicyFactoryTest.java deleted file mode 100644 index 0d144d19a909..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RateLimitPolicyFactoryTest.java +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.apache.beam.sdk.io.kinesis.RateLimitPolicyFactory.withDefaultRateLimiter; -import static org.assertj.core.api.Assertions.assertThat; -import static org.joda.time.Duration.millis; -import static org.mockito.ArgumentMatchers.anyLong; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.Mockito.clearInvocations; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.never; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.verifyNoInteractions; -import static org.mockito.Mockito.verifyNoMoreInteractions; -import static org.mockito.Mockito.when; -import static org.powermock.api.mockito.PowerMockito.verifyStatic; - -import java.util.concurrent.atomic.AtomicLong; -import org.apache.beam.sdk.io.kinesis.RateLimitPolicyFactory.DefaultRateLimiter; -import org.apache.beam.sdk.util.BackOff; -import org.apache.beam.sdk.util.Sleeper; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.powermock.api.mockito.PowerMockito; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; - -@RunWith(PowerMockRunner.class) -@PrepareForTest(RateLimitPolicyFactory.class) -public class RateLimitPolicyFactoryTest { - - @Test - public void defaultRateLimiterShouldUseBackoffs() throws Exception { - assertThat(withDefaultRateLimiter().getRateLimitPolicy()) - .isInstanceOf(DefaultRateLimiter.class); - assertThat(withDefaultRateLimiter(millis(1), millis(1), millis(1)).getRateLimitPolicy()) - .isInstanceOf(DefaultRateLimiter.class); - - Sleeper sleeper = mock(Sleeper.class); - BackOff emptySuccess = mock(BackOff.class); - BackOff throttled = mock(BackOff.class); - - RateLimitPolicy policy = new DefaultRateLimiter(emptySuccess, throttled, sleeper); - - // reset emptySuccess after receiving at least 1 record, throttled is reset on any success - policy.onSuccess(ImmutableList.of(mock(KinesisRecord.class))); - - verify(emptySuccess).reset(); - verify(throttled).reset(); - verifyNoInteractions(sleeper); - clearInvocations(emptySuccess, throttled); - - when(emptySuccess.nextBackOffMillis()).thenReturn(88L, 99L); - // throttle if no records received, throttled is reset again - policy.onSuccess(ImmutableList.of()); - policy.onSuccess(ImmutableList.of()); - - verify(emptySuccess, times(2)).nextBackOffMillis(); - verify(throttled, times(2)).reset(); - verify(sleeper).sleep(88L); - verify(sleeper).sleep(99L); - verifyNoMoreInteractions(sleeper, throttled, emptySuccess); - clearInvocations(emptySuccess, throttled, sleeper); - - when(throttled.nextBackOffMillis()).thenReturn(111L, 222L); - // throttle onThrottle - policy.onThrottle(mock(KinesisClientThrottledException.class)); - policy.onThrottle(mock(KinesisClientThrottledException.class)); - - verify(throttled, times(2)).nextBackOffMillis(); - verify(sleeper).sleep(111L); - verify(sleeper).sleep(222L); - verifyNoMoreInteractions(sleeper, throttled, emptySuccess); - } - - @Test - public void withoutLimiterShouldDoNothing() throws Exception { - PowerMockito.spy(Thread.class); - PowerMockito.doNothing().when(Thread.class); - Thread.sleep(anyLong()); - RateLimitPolicy rateLimitPolicy = RateLimitPolicyFactory.withoutLimiter().getRateLimitPolicy(); - rateLimitPolicy.onSuccess(ImmutableList.of()); - verifyStatic(Thread.class, never()); - Thread.sleep(anyLong()); - } - - @Test - public void shouldDelayDefaultInterval() throws Exception { - PowerMockito.spy(Thread.class); - PowerMockito.doNothing().when(Thread.class); - Thread.sleep(anyLong()); - RateLimitPolicy rateLimitPolicy = RateLimitPolicyFactory.withFixedDelay().getRateLimitPolicy(); - rateLimitPolicy.onSuccess(ImmutableList.of()); - verifyStatic(Thread.class); - Thread.sleep(eq(1000L)); - } - - @Test - public void shouldDelayFixedInterval() throws Exception { - PowerMockito.spy(Thread.class); - PowerMockito.doNothing().when(Thread.class); - Thread.sleep(anyLong()); - RateLimitPolicy rateLimitPolicy = - RateLimitPolicyFactory.withFixedDelay(millis(500)).getRateLimitPolicy(); - rateLimitPolicy.onSuccess(ImmutableList.of()); - verifyStatic(Thread.class); - Thread.sleep(eq(500L)); - } - - @Test - public void shouldDelayDynamicInterval() throws Exception { - PowerMockito.spy(Thread.class); - PowerMockito.doNothing().when(Thread.class); - Thread.sleep(anyLong()); - AtomicLong delay = new AtomicLong(0L); - RateLimitPolicy rateLimitPolicy = - RateLimitPolicyFactory.withDelay(() -> millis(delay.getAndUpdate(d -> d ^ 1))) - .getRateLimitPolicy(); - rateLimitPolicy.onSuccess(ImmutableList.of()); - verifyStatic(Thread.class); - Thread.sleep(eq(0L)); - Thread.sleep(eq(1L)); - Thread.sleep(eq(0L)); - Thread.sleep(eq(1L)); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RecordFilterTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RecordFilterTest.java deleted file mode 100644 index ad1e58c265e7..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RecordFilterTest.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.mockito.Mockito.when; - -import java.util.Collections; -import java.util.List; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists; -import org.assertj.core.api.Assertions; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.mockito.junit.MockitoJUnitRunner; - -/** * */ -@RunWith(MockitoJUnitRunner.class) -public class RecordFilterTest { - - @Mock private ShardCheckpoint checkpoint; - @Mock private KinesisRecord record1, record2, record3, record4, record5; - - @Test - public void shouldFilterOutRecordsBeforeOrAtCheckpoint() { - when(checkpoint.isBeforeOrAt(record1)).thenReturn(false); - when(checkpoint.isBeforeOrAt(record2)).thenReturn(true); - when(checkpoint.isBeforeOrAt(record3)).thenReturn(true); - when(checkpoint.isBeforeOrAt(record4)).thenReturn(false); - when(checkpoint.isBeforeOrAt(record5)).thenReturn(true); - List records = Lists.newArrayList(record1, record2, record3, record4, record5); - RecordFilter underTest = new RecordFilter(); - - List retainedRecords = underTest.apply(records, checkpoint); - - Assertions.assertThat(retainedRecords).containsOnly(record2, record3, record5); - } - - @Test - public void shouldNotFailOnEmptyList() { - List records = Collections.emptyList(); - RecordFilter underTest = new RecordFilter(); - - List retainedRecords = underTest.apply(records, checkpoint); - - Assertions.assertThat(retainedRecords).isEmpty(); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java deleted file mode 100644 index 227542cb8055..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream.LATEST; -import static com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream.TRIM_HORIZON; -import static com.amazonaws.services.kinesis.model.ShardIteratorType.AFTER_SEQUENCE_NUMBER; -import static com.amazonaws.services.kinesis.model.ShardIteratorType.AT_SEQUENCE_NUMBER; -import static com.amazonaws.services.kinesis.model.ShardIteratorType.AT_TIMESTAMP; -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.ArgumentMatchers.anyString; -import static org.mockito.Matchers.eq; -import static org.mockito.Matchers.isNull; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -import com.amazonaws.services.kinesis.clientlibrary.types.ExtendedSequenceNumber; -import com.amazonaws.services.kinesis.model.ShardIteratorType; -import java.io.IOException; -import org.joda.time.DateTime; -import org.joda.time.Instant; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.mockito.junit.MockitoJUnitRunner; - -/** */ -@RunWith(MockitoJUnitRunner.class) -public class ShardCheckpointTest { - - private static final String AT_SEQUENCE_SHARD_IT = "AT_SEQUENCE_SHARD_IT"; - private static final String AFTER_SEQUENCE_SHARD_IT = "AFTER_SEQUENCE_SHARD_IT"; - private static final String STREAM_NAME = "STREAM"; - private static final String SHARD_ID = "SHARD_ID"; - @Mock private SimplifiedKinesisClient client; - - @Before - public void setUp() throws IOException, TransientKinesisException { - when(client.getShardIterator( - eq(STREAM_NAME), - eq(SHARD_ID), - eq(AT_SEQUENCE_NUMBER), - anyString(), - isNull(Instant.class))) - .thenReturn(AT_SEQUENCE_SHARD_IT); - when(client.getShardIterator( - eq(STREAM_NAME), - eq(SHARD_ID), - eq(AFTER_SEQUENCE_NUMBER), - anyString(), - isNull(Instant.class))) - .thenReturn(AFTER_SEQUENCE_SHARD_IT); - } - - @Test - public void testProvidingShardIterator() throws IOException, TransientKinesisException { - assertThat(checkpoint(AT_SEQUENCE_NUMBER, "100", null).getShardIterator(client)) - .isEqualTo(AT_SEQUENCE_SHARD_IT); - assertThat(checkpoint(AFTER_SEQUENCE_NUMBER, "100", null).getShardIterator(client)) - .isEqualTo(AFTER_SEQUENCE_SHARD_IT); - assertThat(checkpoint(AT_SEQUENCE_NUMBER, "100", 10L).getShardIterator(client)) - .isEqualTo(AT_SEQUENCE_SHARD_IT); - assertThat(checkpoint(AFTER_SEQUENCE_NUMBER, "100", 10L).getShardIterator(client)) - .isEqualTo(AT_SEQUENCE_SHARD_IT); - } - - @Test - public void testComparisonWithExtendedSequenceNumber() { - assertThat( - new ShardCheckpoint("", "", new StartingPoint(LATEST)) - .isBeforeOrAt(recordWith(new ExtendedSequenceNumber("100", 0L)))) - .isTrue(); - - assertThat( - new ShardCheckpoint("", "", new StartingPoint(TRIM_HORIZON)) - .isBeforeOrAt(recordWith(new ExtendedSequenceNumber("100", 0L)))) - .isTrue(); - - assertThat( - checkpoint(AFTER_SEQUENCE_NUMBER, "10", 1L) - .isBeforeOrAt(recordWith(new ExtendedSequenceNumber("100", 0L)))) - .isTrue(); - - assertThat( - checkpoint(AT_SEQUENCE_NUMBER, "100", 0L) - .isBeforeOrAt(recordWith(new ExtendedSequenceNumber("100", 0L)))) - .isTrue(); - - assertThat( - checkpoint(AFTER_SEQUENCE_NUMBER, "100", 0L) - .isBeforeOrAt(recordWith(new ExtendedSequenceNumber("100", 0L)))) - .isFalse(); - - assertThat( - checkpoint(AT_SEQUENCE_NUMBER, "100", 1L) - .isBeforeOrAt(recordWith(new ExtendedSequenceNumber("100", 0L)))) - .isFalse(); - - assertThat( - checkpoint(AFTER_SEQUENCE_NUMBER, "100", 0L) - .isBeforeOrAt(recordWith(new ExtendedSequenceNumber("99", 1L)))) - .isFalse(); - } - - @Test - public void testComparisonWithTimestamp() { - DateTime referenceTimestamp = DateTime.now(); - - assertThat( - checkpoint(AT_TIMESTAMP, referenceTimestamp.toInstant()) - .isBeforeOrAt(recordWith(referenceTimestamp.minusMillis(10).toInstant()))) - .isFalse(); - - assertThat( - checkpoint(AT_TIMESTAMP, referenceTimestamp.toInstant()) - .isBeforeOrAt(recordWith(referenceTimestamp.toInstant()))) - .isTrue(); - - assertThat( - checkpoint(AT_TIMESTAMP, referenceTimestamp.toInstant()) - .isBeforeOrAt(recordWith(referenceTimestamp.plusMillis(10).toInstant()))) - .isTrue(); - } - - private KinesisRecord recordWith(ExtendedSequenceNumber extendedSequenceNumber) { - KinesisRecord record = mock(KinesisRecord.class); - when(record.getExtendedSequenceNumber()).thenReturn(extendedSequenceNumber); - return record; - } - - private ShardCheckpoint checkpoint( - ShardIteratorType iteratorType, String sequenceNumber, Long subSequenceNumber) { - return new ShardCheckpoint( - STREAM_NAME, SHARD_ID, iteratorType, sequenceNumber, subSequenceNumber); - } - - private KinesisRecord recordWith(Instant approximateArrivalTimestamp) { - KinesisRecord record = mock(KinesisRecord.class); - when(record.getApproximateArrivalTimestamp()).thenReturn(approximateArrivalTimestamp); - return record; - } - - private ShardCheckpoint checkpoint(ShardIteratorType iteratorType, Instant timestamp) { - return new ShardCheckpoint(STREAM_NAME, SHARD_ID, iteratorType, timestamp); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardReadersPoolTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardReadersPoolTest.java deleted file mode 100644 index 74c9446d316a..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardReadersPoolTest.java +++ /dev/null @@ -1,355 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static java.util.Collections.singletonList; -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.eq; -import static org.mockito.ArgumentMatchers.same; -import static org.mockito.Mockito.doReturn; -import static org.mockito.Mockito.timeout; -import static org.mockito.Mockito.times; -import static org.mockito.Mockito.verify; -import static org.mockito.Mockito.when; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.concurrent.TimeUnit; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Stopwatch; -import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.mockito.Mockito; -import org.mockito.junit.MockitoJUnitRunner; -import org.mockito.stubbing.Answer; - -/** Tests {@link ShardReadersPool}. */ -@RunWith(MockitoJUnitRunner.Silent.class) -public class ShardReadersPoolTest { - - private static final int TIMEOUT_IN_MILLIS = (int) TimeUnit.SECONDS.toMillis(10); - - @Mock private ShardRecordsIterator firstIterator, secondIterator, thirdIterator, fourthIterator; - @Mock private ShardCheckpoint firstCheckpoint, secondCheckpoint; - @Mock private SimplifiedKinesisClient kinesis; - @Mock private KinesisRecord a, b, c, d; - @Mock private WatermarkPolicyFactory watermarkPolicyFactory; - @Mock private RateLimitPolicyFactory rateLimitPolicyFactory; - @Mock private RateLimitPolicy customRateLimitPolicy; - - private ShardReadersPool shardReadersPool; - private final Instant now = Instant.now(); - - @Before - public void setUp() throws TransientKinesisException { - when(a.getShardId()).thenReturn("shard1"); - when(b.getShardId()).thenReturn("shard1"); - when(c.getShardId()).thenReturn("shard2"); - when(d.getShardId()).thenReturn("shard2"); - when(firstCheckpoint.getShardId()).thenReturn("shard1"); - when(firstCheckpoint.getStreamName()).thenReturn("testStream"); - when(secondCheckpoint.getShardId()).thenReturn("shard2"); - when(firstIterator.getShardId()).thenReturn("shard1"); - when(firstIterator.getStreamName()).thenReturn("testStream"); - when(firstIterator.getCheckpoint()).thenReturn(firstCheckpoint); - when(secondIterator.getShardId()).thenReturn("shard2"); - when(secondIterator.getCheckpoint()).thenReturn(secondCheckpoint); - when(thirdIterator.getShardId()).thenReturn("shard3"); - when(fourthIterator.getShardId()).thenReturn("shard4"); - - WatermarkPolicy watermarkPolicy = - WatermarkPolicyFactory.withArrivalTimePolicy().createWatermarkPolicy(); - RateLimitPolicy rateLimitPolicy = RateLimitPolicyFactory.withoutLimiter().getRateLimitPolicy(); - - KinesisReaderCheckpoint checkpoint = - new KinesisReaderCheckpoint(ImmutableList.of(firstCheckpoint, secondCheckpoint)); - shardReadersPool = - Mockito.spy( - new ShardReadersPool( - kinesis, checkpoint, watermarkPolicyFactory, rateLimitPolicyFactory, 100)); - - when(watermarkPolicyFactory.createWatermarkPolicy()).thenReturn(watermarkPolicy); - when(rateLimitPolicyFactory.getRateLimitPolicy()).thenReturn(rateLimitPolicy); - - doReturn(firstIterator).when(shardReadersPool).createShardIterator(kinesis, firstCheckpoint); - doReturn(secondIterator).when(shardReadersPool).createShardIterator(kinesis, secondCheckpoint); - } - - @After - public void clean() { - shardReadersPool.stop(); - } - - @Test - public void shouldReturnAllRecords() - throws TransientKinesisException, KinesisShardClosedException { - when(firstIterator.readNextBatch()) - .thenReturn(Collections.emptyList()) - .thenReturn(ImmutableList.of(a, b)) - .thenReturn(Collections.emptyList()); - when(secondIterator.readNextBatch()) - .thenReturn(singletonList(c)) - .thenReturn(singletonList(d)) - .thenReturn(Collections.emptyList()); - - shardReadersPool.start(); - List fetchedRecords = new ArrayList<>(); - while (fetchedRecords.size() < 4) { - CustomOptional nextRecord = shardReadersPool.nextRecord(); - if (nextRecord.isPresent()) { - fetchedRecords.add(nextRecord.get()); - } - } - assertThat(fetchedRecords).containsExactlyInAnyOrder(a, b, c, d); - assertThat(shardReadersPool.getRecordsQueue().remainingCapacity()).isEqualTo(100 * 2); - } - - @Test - public void shouldReturnAbsentOptionalWhenNoRecords() - throws TransientKinesisException, KinesisShardClosedException { - when(firstIterator.readNextBatch()).thenReturn(Collections.emptyList()); - when(secondIterator.readNextBatch()).thenReturn(Collections.emptyList()); - - shardReadersPool.start(); - CustomOptional nextRecord = shardReadersPool.nextRecord(); - assertThat(nextRecord.isPresent()).isFalse(); - } - - @Test - public void shouldCheckpointReadRecords() - throws TransientKinesisException, KinesisShardClosedException { - when(firstIterator.readNextBatch()) - .thenReturn(ImmutableList.of(a, b)) - .thenReturn(Collections.emptyList()); - when(secondIterator.readNextBatch()) - .thenReturn(singletonList(c)) - .thenReturn(singletonList(d)) - .thenReturn(Collections.emptyList()); - - shardReadersPool.start(); - int recordsFound = 0; - while (recordsFound < 4) { - CustomOptional nextRecord = shardReadersPool.nextRecord(); - if (nextRecord.isPresent()) { - recordsFound++; - KinesisRecord kinesisRecord = nextRecord.get(); - if ("shard1".equals(kinesisRecord.getShardId())) { - verify(firstIterator).ackRecord(kinesisRecord); - } else { - verify(secondIterator).ackRecord(kinesisRecord); - } - } - } - } - - @Test - public void shouldInterruptKinesisReadingAndStopShortly() - throws TransientKinesisException, KinesisShardClosedException { - when(firstIterator.readNextBatch()) - .thenAnswer( - (Answer>) - invocation -> { - Thread.sleep(TIMEOUT_IN_MILLIS / 2); - return Collections.emptyList(); - }); - shardReadersPool.start(); - - Stopwatch stopwatch = Stopwatch.createStarted(); - shardReadersPool.stop(); - assertThat(stopwatch.elapsed(TimeUnit.MILLISECONDS)).isLessThan(TIMEOUT_IN_MILLIS); - } - - @Test - public void shouldInterruptPuttingRecordsToQueueAndStopShortly() - throws TransientKinesisException, KinesisShardClosedException { - when(firstIterator.readNextBatch()).thenReturn(ImmutableList.of(a, b, c)); - KinesisReaderCheckpoint checkpoint = - new KinesisReaderCheckpoint(ImmutableList.of(firstCheckpoint, secondCheckpoint)); - - WatermarkPolicyFactory watermarkPolicyFactory = WatermarkPolicyFactory.withArrivalTimePolicy(); - RateLimitPolicyFactory rateLimitPolicyFactory = RateLimitPolicyFactory.withoutLimiter(); - ShardReadersPool shardReadersPool = - new ShardReadersPool( - kinesis, checkpoint, watermarkPolicyFactory, rateLimitPolicyFactory, 2); - shardReadersPool.start(); - - Stopwatch stopwatch = Stopwatch.createStarted(); - shardReadersPool.stop(); - assertThat(stopwatch.elapsed(TimeUnit.MILLISECONDS)).isLessThan(TIMEOUT_IN_MILLIS); - } - - @Test - public void shouldStopReadingShardAfterReceivingShardClosedException() throws Exception { - when(firstIterator.readNextBatch()).thenThrow(KinesisShardClosedException.class); - when(firstIterator.findSuccessiveShardRecordIterators()).thenReturn(Collections.emptyList()); - - shardReadersPool.start(); - - verify(firstIterator, timeout(TIMEOUT_IN_MILLIS).times(1)).readNextBatch(); - verify(secondIterator, timeout(TIMEOUT_IN_MILLIS).atLeast(2)).readNextBatch(); - } - - @Test - public void shouldStartReadingSuccessiveShardsAfterReceivingShardClosedException() - throws Exception { - when(firstIterator.readNextBatch()).thenThrow(KinesisShardClosedException.class); - when(firstIterator.findSuccessiveShardRecordIterators()) - .thenReturn(ImmutableList.of(thirdIterator, fourthIterator)); - - shardReadersPool.start(); - - verify(thirdIterator, timeout(TIMEOUT_IN_MILLIS).atLeast(2)).readNextBatch(); - verify(fourthIterator, timeout(TIMEOUT_IN_MILLIS).atLeast(2)).readNextBatch(); - } - - @Test - public void shouldStopReadersPoolWhenLastShardReaderStopped() throws Exception { - when(firstIterator.readNextBatch()).thenThrow(KinesisShardClosedException.class); - when(firstIterator.findSuccessiveShardRecordIterators()).thenReturn(Collections.emptyList()); - - shardReadersPool.start(); - - verify(firstIterator, timeout(TIMEOUT_IN_MILLIS).times(1)).readNextBatch(); - } - - @Test - public void shouldStopReadersPoolAlsoWhenExceptionsOccurDuringStopping() throws Exception { - when(firstIterator.readNextBatch()).thenThrow(KinesisShardClosedException.class); - when(firstIterator.findSuccessiveShardRecordIterators()) - .thenThrow(TransientKinesisException.class) - .thenReturn(Collections.emptyList()); - - shardReadersPool.start(); - - verify(firstIterator, timeout(TIMEOUT_IN_MILLIS).times(2)).readNextBatch(); - } - - @Test - public void shouldReturnAbsentOptionalWhenStartedWithNoIterators() throws Exception { - KinesisReaderCheckpoint checkpoint = new KinesisReaderCheckpoint(Collections.emptyList()); - WatermarkPolicyFactory watermarkPolicyFactory = WatermarkPolicyFactory.withArrivalTimePolicy(); - RateLimitPolicyFactory rateLimitPolicyFactory = RateLimitPolicyFactory.withoutLimiter(); - shardReadersPool = - Mockito.spy( - new ShardReadersPool( - kinesis, - checkpoint, - watermarkPolicyFactory, - rateLimitPolicyFactory, - ShardReadersPool.DEFAULT_CAPACITY_PER_SHARD)); - doReturn(firstIterator) - .when(shardReadersPool) - .createShardIterator(eq(kinesis), any(ShardCheckpoint.class)); - - shardReadersPool.start(); - - assertThat(shardReadersPool.nextRecord()).isEqualTo(CustomOptional.absent()); - } - - @Test - public void shouldForgetClosedShardIterator() throws Exception { - when(firstIterator.readNextBatch()).thenThrow(KinesisShardClosedException.class); - List emptyList = Collections.emptyList(); - when(firstIterator.findSuccessiveShardRecordIterators()).thenReturn(emptyList); - - shardReadersPool.start(); - verify(shardReadersPool) - .startReadingShards(ImmutableList.of(firstIterator, secondIterator), "testStream"); - verify(shardReadersPool, timeout(TIMEOUT_IN_MILLIS)) - .startReadingShards(emptyList, "testStream"); - - KinesisReaderCheckpoint checkpointMark = shardReadersPool.getCheckpointMark(); - assertThat(checkpointMark.iterator()) - .extracting("shardId", String.class) - .containsOnly("shard2") - .doesNotContain("shard1"); - } - - @Test - public void shouldReturnTheLeastWatermarkOfAllShards() throws TransientKinesisException { - Instant threeMin = now.minus(Duration.standardMinutes(3)); - Instant twoMin = now.minus(Duration.standardMinutes(2)); - - when(firstIterator.getShardWatermark()).thenReturn(threeMin).thenReturn(now); - when(secondIterator.getShardWatermark()).thenReturn(twoMin); - - shardReadersPool.start(); - - assertThat(shardReadersPool.getWatermark()).isEqualTo(threeMin); - assertThat(shardReadersPool.getWatermark()).isEqualTo(twoMin); - - verify(firstIterator, times(2)).getShardWatermark(); - verify(secondIterator, times(2)).getShardWatermark(); - } - - @Test - public void shouldReturnTheOldestFromLatestRecordTimestampOfAllShards() - throws TransientKinesisException { - Instant threeMin = now.minus(Duration.standardMinutes(3)); - Instant twoMin = now.minus(Duration.standardMinutes(2)); - - when(firstIterator.getLatestRecordTimestamp()).thenReturn(threeMin).thenReturn(now); - when(secondIterator.getLatestRecordTimestamp()).thenReturn(twoMin); - - shardReadersPool.start(); - - assertThat(shardReadersPool.getLatestRecordTimestamp()).isEqualTo(threeMin); - assertThat(shardReadersPool.getLatestRecordTimestamp()).isEqualTo(twoMin); - - verify(firstIterator, times(2)).getLatestRecordTimestamp(); - verify(secondIterator, times(2)).getLatestRecordTimestamp(); - } - - @Test - public void shouldCallRateLimitPolicy() - throws TransientKinesisException, KinesisShardClosedException, InterruptedException { - KinesisClientThrottledException e = new KinesisClientThrottledException("", null); - when(firstIterator.readNextBatch()) - .thenThrow(e) - .thenReturn(ImmutableList.of(a, b)) - .thenReturn(Collections.emptyList()); - when(secondIterator.readNextBatch()) - .thenReturn(singletonList(c)) - .thenReturn(singletonList(d)) - .thenReturn(Collections.emptyList()); - when(rateLimitPolicyFactory.getRateLimitPolicy()).thenReturn(customRateLimitPolicy); - - shardReadersPool.start(); - List fetchedRecords = new ArrayList<>(); - while (fetchedRecords.size() < 4) { - CustomOptional nextRecord = shardReadersPool.nextRecord(); - if (nextRecord.isPresent()) { - fetchedRecords.add(nextRecord.get()); - } - } - - verify(customRateLimitPolicy, timeout(TIMEOUT_IN_MILLIS)).onThrottle(same(e)); - verify(customRateLimitPolicy, timeout(TIMEOUT_IN_MILLIS)).onSuccess(eq(ImmutableList.of(a, b))); - verify(customRateLimitPolicy, timeout(TIMEOUT_IN_MILLIS)).onSuccess(eq(singletonList(c))); - verify(customRateLimitPolicy, timeout(TIMEOUT_IN_MILLIS)).onSuccess(eq(singletonList(d))); - verify(customRateLimitPolicy, timeout(TIMEOUT_IN_MILLIS).atLeastOnce()) - .onSuccess(eq(Collections.emptyList())); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIteratorTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIteratorTest.java deleted file mode 100644 index 397dc9831a9a..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIteratorTest.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static java.util.Arrays.asList; -import static java.util.Collections.singletonList; -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.anyList; -import static org.mockito.Mockito.when; - -import com.amazonaws.services.kinesis.model.ExpiredIteratorException; -import java.io.IOException; -import java.util.Collections; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.Mock; -import org.mockito.invocation.InvocationOnMock; -import org.mockito.junit.MockitoJUnitRunner; -import org.mockito.stubbing.Answer; - -/** Tests {@link ShardRecordsIterator}. */ -@RunWith(MockitoJUnitRunner.Silent.class) -public class ShardRecordsIteratorTest { - - private static final String INITIAL_ITERATOR = "INITIAL_ITERATOR"; - private static final String SECOND_ITERATOR = "SECOND_ITERATOR"; - private static final String SECOND_REFRESHED_ITERATOR = "SECOND_REFRESHED_ITERATOR"; - private static final String THIRD_ITERATOR = "THIRD_ITERATOR"; - private static final String STREAM_NAME = "STREAM_NAME"; - private static final String SHARD_ID = "SHARD_ID"; - private static final Instant NOW = Instant.now(); - - @Mock private SimplifiedKinesisClient kinesisClient; - @Mock private ShardCheckpoint firstCheckpoint, aCheckpoint, bCheckpoint, cCheckpoint, dCheckpoint; - @Mock private GetKinesisRecordsResult firstResult, secondResult, thirdResult; - @Mock private KinesisRecord a, b, c, d; - @Mock private RecordFilter recordFilter; - - private ShardRecordsIterator iterator; - - @Before - public void setUp() throws IOException, TransientKinesisException { - when(firstCheckpoint.getShardIterator(kinesisClient)).thenReturn(INITIAL_ITERATOR); - when(firstCheckpoint.getStreamName()).thenReturn(STREAM_NAME); - when(firstCheckpoint.getShardId()).thenReturn(SHARD_ID); - - when(firstCheckpoint.moveAfter(a)).thenReturn(aCheckpoint); - when(aCheckpoint.moveAfter(b)).thenReturn(bCheckpoint); - when(aCheckpoint.getStreamName()).thenReturn(STREAM_NAME); - when(aCheckpoint.getShardId()).thenReturn(SHARD_ID); - when(bCheckpoint.moveAfter(c)).thenReturn(cCheckpoint); - when(bCheckpoint.getStreamName()).thenReturn(STREAM_NAME); - when(bCheckpoint.getShardId()).thenReturn(SHARD_ID); - when(cCheckpoint.moveAfter(d)).thenReturn(dCheckpoint); - when(cCheckpoint.getStreamName()).thenReturn(STREAM_NAME); - when(cCheckpoint.getShardId()).thenReturn(SHARD_ID); - when(dCheckpoint.getStreamName()).thenReturn(STREAM_NAME); - when(dCheckpoint.getShardId()).thenReturn(SHARD_ID); - - when(kinesisClient.getRecords(INITIAL_ITERATOR, STREAM_NAME, SHARD_ID)).thenReturn(firstResult); - when(kinesisClient.getRecords(SECOND_ITERATOR, STREAM_NAME, SHARD_ID)).thenReturn(secondResult); - when(kinesisClient.getRecords(THIRD_ITERATOR, STREAM_NAME, SHARD_ID)).thenReturn(thirdResult); - - when(firstResult.getNextShardIterator()).thenReturn(SECOND_ITERATOR); - when(secondResult.getNextShardIterator()).thenReturn(THIRD_ITERATOR); - when(thirdResult.getNextShardIterator()).thenReturn(THIRD_ITERATOR); - - when(firstResult.getRecords()).thenReturn(Collections.emptyList()); - when(secondResult.getRecords()).thenReturn(Collections.emptyList()); - when(thirdResult.getRecords()).thenReturn(Collections.emptyList()); - - when(recordFilter.apply(anyList(), any(ShardCheckpoint.class))) - .thenAnswer(new IdentityAnswer()); - - WatermarkPolicyFactory watermarkPolicyFactory = WatermarkPolicyFactory.withArrivalTimePolicy(); - iterator = - new ShardRecordsIterator( - firstCheckpoint, kinesisClient, watermarkPolicyFactory, recordFilter); - } - - @Test - public void goesThroughAvailableRecords() - throws IOException, TransientKinesisException, KinesisShardClosedException { - when(firstResult.getRecords()).thenReturn(asList(a, b, c)); - when(secondResult.getRecords()).thenReturn(singletonList(d)); - when(thirdResult.getRecords()).thenReturn(Collections.emptyList()); - - assertThat(iterator.getCheckpoint()).isEqualTo(firstCheckpoint); - assertThat(iterator.readNextBatch()).isEqualTo(asList(a, b, c)); - assertThat(iterator.readNextBatch()).isEqualTo(singletonList(d)); - assertThat(iterator.readNextBatch()).isEqualTo(Collections.emptyList()); - } - - @Test - public void conformingRecordsMovesCheckpoint() throws IOException, TransientKinesisException { - when(firstResult.getRecords()).thenReturn(asList(a, b, c)); - when(secondResult.getRecords()).thenReturn(singletonList(d)); - when(thirdResult.getRecords()).thenReturn(Collections.emptyList()); - - when(a.getApproximateArrivalTimestamp()).thenReturn(NOW); - when(b.getApproximateArrivalTimestamp()).thenReturn(NOW.plus(Duration.standardSeconds(1))); - when(c.getApproximateArrivalTimestamp()).thenReturn(NOW.plus(Duration.standardSeconds(2))); - when(d.getApproximateArrivalTimestamp()).thenReturn(NOW.plus(Duration.standardSeconds(3))); - - iterator.ackRecord(a); - assertThat(iterator.getCheckpoint()).isEqualTo(aCheckpoint); - iterator.ackRecord(b); - assertThat(iterator.getCheckpoint()).isEqualTo(bCheckpoint); - iterator.ackRecord(c); - assertThat(iterator.getCheckpoint()).isEqualTo(cCheckpoint); - iterator.ackRecord(d); - assertThat(iterator.getCheckpoint()).isEqualTo(dCheckpoint); - } - - @Test - public void refreshesExpiredIterator() - throws IOException, TransientKinesisException, KinesisShardClosedException { - when(firstResult.getRecords()).thenReturn(singletonList(a)); - when(secondResult.getRecords()).thenReturn(singletonList(b)); - - when(a.getApproximateArrivalTimestamp()).thenReturn(NOW); - when(b.getApproximateArrivalTimestamp()).thenReturn(NOW.plus(Duration.standardSeconds(1))); - - when(kinesisClient.getRecords(SECOND_ITERATOR, STREAM_NAME, SHARD_ID)) - .thenThrow(ExpiredIteratorException.class); - when(aCheckpoint.getShardIterator(kinesisClient)).thenReturn(SECOND_REFRESHED_ITERATOR); - when(kinesisClient.getRecords(SECOND_REFRESHED_ITERATOR, STREAM_NAME, SHARD_ID)) - .thenReturn(secondResult); - - assertThat(iterator.readNextBatch()).isEqualTo(singletonList(a)); - iterator.ackRecord(a); - assertThat(iterator.readNextBatch()).isEqualTo(singletonList(b)); - assertThat(iterator.readNextBatch()).isEqualTo(Collections.emptyList()); - } - - @Test - public void tracksLatestRecordTimestamp() { - when(firstResult.getRecords()).thenReturn(singletonList(a)); - when(secondResult.getRecords()).thenReturn(asList(b, c)); - when(thirdResult.getRecords()).thenReturn(singletonList(c)); - - when(a.getApproximateArrivalTimestamp()).thenReturn(NOW); - when(b.getApproximateArrivalTimestamp()).thenReturn(NOW.plus(Duration.standardSeconds(4))); - when(c.getApproximateArrivalTimestamp()).thenReturn(NOW.plus(Duration.standardSeconds(2))); - when(d.getApproximateArrivalTimestamp()).thenReturn(NOW.plus(Duration.standardSeconds(6))); - - iterator.ackRecord(a); - assertThat(iterator.getLatestRecordTimestamp()).isEqualTo(NOW); - iterator.ackRecord(b); - assertThat(iterator.getLatestRecordTimestamp()) - .isEqualTo(NOW.plus(Duration.standardSeconds(4))); - iterator.ackRecord(c); - assertThat(iterator.getLatestRecordTimestamp()) - .isEqualTo(NOW.plus(Duration.standardSeconds(4))); - iterator.ackRecord(d); - assertThat(iterator.getLatestRecordTimestamp()) - .isEqualTo(NOW.plus(Duration.standardSeconds(6))); - } - - private static class IdentityAnswer implements Answer { - - @Override - public Object answer(InvocationOnMock invocation) throws Throwable { - return invocation.getArguments()[0]; - } - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClientTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClientTest.java deleted file mode 100644 index 4a7fed20af98..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClientTest.java +++ /dev/null @@ -1,614 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.assertj.core.api.Assertions.failBecauseExceptionWasNotThrown; -import static org.mockito.Matchers.any; -import static org.mockito.Mockito.doAnswer; -import static org.mockito.Mockito.reset; -import static org.mockito.Mockito.verifyZeroInteractions; -import static org.mockito.Mockito.when; - -import com.amazonaws.AmazonServiceException; -import com.amazonaws.AmazonServiceException.ErrorType; -import com.amazonaws.services.cloudwatch.AmazonCloudWatch; -import com.amazonaws.services.cloudwatch.model.Datapoint; -import com.amazonaws.services.cloudwatch.model.GetMetricStatisticsRequest; -import com.amazonaws.services.cloudwatch.model.GetMetricStatisticsResult; -import com.amazonaws.services.kinesis.AmazonKinesis; -import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream; -import com.amazonaws.services.kinesis.model.DescribeStreamSummaryRequest; -import com.amazonaws.services.kinesis.model.DescribeStreamSummaryResult; -import com.amazonaws.services.kinesis.model.ExpiredIteratorException; -import com.amazonaws.services.kinesis.model.GetRecordsRequest; -import com.amazonaws.services.kinesis.model.GetRecordsResult; -import com.amazonaws.services.kinesis.model.GetShardIteratorRequest; -import com.amazonaws.services.kinesis.model.GetShardIteratorResult; -import com.amazonaws.services.kinesis.model.LimitExceededException; -import com.amazonaws.services.kinesis.model.ListShardsRequest; -import com.amazonaws.services.kinesis.model.ListShardsResult; -import com.amazonaws.services.kinesis.model.ProvisionedThroughputExceededException; -import com.amazonaws.services.kinesis.model.Record; -import com.amazonaws.services.kinesis.model.Shard; -import com.amazonaws.services.kinesis.model.ShardFilter; -import com.amazonaws.services.kinesis.model.ShardFilterType; -import com.amazonaws.services.kinesis.model.ShardIteratorType; -import com.amazonaws.services.kinesis.model.StreamDescriptionSummary; -import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.function.Supplier; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.joda.time.Minutes; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.mockito.InjectMocks; -import org.mockito.Mock; -import org.mockito.junit.MockitoJUnitRunner; -import org.mockito.stubbing.Answer; - -/** * */ -@RunWith(MockitoJUnitRunner.class) -public class SimplifiedKinesisClientTest { - - private static final String STREAM = "stream"; - private static final String SHARD_1 = "shard-01"; - private static final String SHARD_2 = "shard-02"; - private static final String SHARD_3 = "shard-03"; - private static final String SHARD_ITERATOR = "iterator"; - private static final String SEQUENCE_NUMBER = "abc123"; - private static final Instant CURRENT_TIMESTAMP = Instant.parse("2000-01-01T15:00:00.000Z"); - - @Mock private AmazonKinesis kinesis; - @Mock private AmazonCloudWatch cloudWatch; - @Mock private Supplier currentInstantSupplier; - @InjectMocks private SimplifiedKinesisClient underTest; - - @Test - public void shouldReturnIteratorStartingWithSequenceNumber() throws Exception { - when(kinesis.getShardIterator( - new GetShardIteratorRequest() - .withStreamName(STREAM) - .withShardId(SHARD_1) - .withShardIteratorType(ShardIteratorType.AT_SEQUENCE_NUMBER) - .withStartingSequenceNumber(SEQUENCE_NUMBER))) - .thenReturn(new GetShardIteratorResult().withShardIterator(SHARD_ITERATOR)); - - String stream = - underTest.getShardIterator( - STREAM, SHARD_1, ShardIteratorType.AT_SEQUENCE_NUMBER, SEQUENCE_NUMBER, null); - - assertThat(stream).isEqualTo(SHARD_ITERATOR); - } - - @Test - public void shouldReturnIteratorStartingWithTimestamp() throws Exception { - Instant timestamp = Instant.now(); - when(kinesis.getShardIterator( - new GetShardIteratorRequest() - .withStreamName(STREAM) - .withShardId(SHARD_1) - .withShardIteratorType(ShardIteratorType.AT_SEQUENCE_NUMBER) - .withTimestamp(timestamp.toDate()))) - .thenReturn(new GetShardIteratorResult().withShardIterator(SHARD_ITERATOR)); - - String stream = - underTest.getShardIterator( - STREAM, SHARD_1, ShardIteratorType.AT_SEQUENCE_NUMBER, null, timestamp); - - assertThat(stream).isEqualTo(SHARD_ITERATOR); - } - - @Test - public void shouldHandleExpiredIterationExceptionForGetShardIterator() { - shouldHandleGetShardIteratorError( - new ExpiredIteratorException(""), ExpiredIteratorException.class); - } - - @Test - public void shouldHandleLimitExceededExceptionForGetShardIterator() { - shouldHandleGetShardIteratorError( - new LimitExceededException(""), KinesisClientThrottledException.class); - } - - @Test - public void shouldHandleProvisionedThroughputExceededExceptionForGetShardIterator() { - shouldHandleGetShardIteratorError( - new ProvisionedThroughputExceededException(""), KinesisClientThrottledException.class); - } - - @Test - public void shouldHandleServiceErrorForGetShardIterator() { - shouldHandleGetShardIteratorError( - newAmazonServiceException(ErrorType.Service), TransientKinesisException.class); - } - - @Test - public void shouldHandleClientErrorForGetShardIterator() { - shouldHandleGetShardIteratorError( - newAmazonServiceException(ErrorType.Client), RuntimeException.class); - } - - @Test - public void shouldHandleUnexpectedExceptionForGetShardIterator() { - shouldHandleGetShardIteratorError(new NullPointerException(), RuntimeException.class); - } - - private void shouldHandleGetShardIteratorError( - Exception thrownException, Class expectedExceptionClass) { - GetShardIteratorRequest request = - new GetShardIteratorRequest() - .withStreamName(STREAM) - .withShardId(SHARD_1) - .withShardIteratorType(ShardIteratorType.LATEST); - - when(kinesis.getShardIterator(request)).thenThrow(thrownException); - - try { - underTest.getShardIterator(STREAM, SHARD_1, ShardIteratorType.LATEST, null, null); - failBecauseExceptionWasNotThrown(expectedExceptionClass); - } catch (Exception e) { - assertThat(e).isExactlyInstanceOf(expectedExceptionClass); - } finally { - reset(kinesis); - } - } - - @Test - public void shouldListAllShardsForTrimHorizon() throws Exception { - Shard shard1 = new Shard().withShardId(SHARD_1); - Shard shard2 = new Shard().withShardId(SHARD_2); - Shard shard3 = new Shard().withShardId(SHARD_3); - - ShardFilter shardFilter = new ShardFilter().withType(ShardFilterType.AT_TRIM_HORIZON); - - when(kinesis.listShards( - new ListShardsRequest() - .withStreamName(STREAM) - .withShardFilter(shardFilter) - .withMaxResults(1_000))) - .thenReturn(new ListShardsResult().withShards(shard1, shard2, shard3).withNextToken(null)); - - List shards = - underTest.listShardsAtPoint( - STREAM, new StartingPoint(InitialPositionInStream.TRIM_HORIZON)); - - assertThat(shards).containsOnly(shard1, shard2, shard3); - } - - @Test - public void shouldListAllShardsForTrimHorizonWithPagedResults() throws Exception { - Shard shard1 = new Shard().withShardId(SHARD_1); - Shard shard2 = new Shard().withShardId(SHARD_2); - Shard shard3 = new Shard().withShardId(SHARD_3); - - ShardFilter shardFilter = new ShardFilter().withType(ShardFilterType.AT_TRIM_HORIZON); - - String nextListShardsToken = "testNextToken"; - when(kinesis.listShards( - new ListShardsRequest() - .withStreamName(STREAM) - .withShardFilter(shardFilter) - .withMaxResults(1_000))) - .thenReturn( - new ListShardsResult().withShards(shard1, shard2).withNextToken(nextListShardsToken)); - - when(kinesis.listShards( - new ListShardsRequest() - .withMaxResults(1_000) - .withShardFilter(shardFilter) - .withNextToken(nextListShardsToken))) - .thenReturn(new ListShardsResult().withShards(shard3).withNextToken(null)); - - List shards = - underTest.listShardsAtPoint( - STREAM, new StartingPoint(InitialPositionInStream.TRIM_HORIZON)); - - assertThat(shards).containsOnly(shard1, shard2, shard3); - } - - @Test - public void shouldListAllShardsForTimestampWithinStreamRetentionAfterStreamCreationTimestamp() - throws Exception { - Shard shard1 = new Shard().withShardId(SHARD_1); - Shard shard2 = new Shard().withShardId(SHARD_2); - Shard shard3 = new Shard().withShardId(SHARD_3); - - int hoursDifference = 1; - int retentionPeriodHours = hoursDifference * 3; - Instant streamCreationTimestamp = - CURRENT_TIMESTAMP.minus(Duration.standardHours(retentionPeriodHours)); - Instant startingPointTimestamp = - streamCreationTimestamp.plus(Duration.standardHours(hoursDifference)); - - when(currentInstantSupplier.get()).thenReturn(CURRENT_TIMESTAMP); - - when(kinesis.describeStreamSummary(new DescribeStreamSummaryRequest().withStreamName(STREAM))) - .thenReturn( - new DescribeStreamSummaryResult() - .withStreamDescriptionSummary( - new StreamDescriptionSummary() - .withRetentionPeriodHours(retentionPeriodHours) - .withStreamCreationTimestamp(streamCreationTimestamp.toDate()))); - - ShardFilter shardFilter = - new ShardFilter() - .withType(ShardFilterType.AT_TIMESTAMP) - .withTimestamp(startingPointTimestamp.toDate()); - - when(kinesis.listShards( - new ListShardsRequest() - .withStreamName(STREAM) - .withShardFilter(shardFilter) - .withMaxResults(1_000))) - .thenReturn(new ListShardsResult().withShards(shard1, shard2, shard3).withNextToken(null)); - - List shards = - underTest.listShardsAtPoint(STREAM, new StartingPoint(startingPointTimestamp)); - - assertThat(shards).containsOnly(shard1, shard2, shard3); - } - - @Test - public void - shouldListAllShardsForTimestampWithRetriedDescribeStreamSummaryCallAfterStreamCreationTimestamp() - throws TransientKinesisException { - Shard shard1 = new Shard().withShardId(SHARD_1); - Shard shard2 = new Shard().withShardId(SHARD_2); - Shard shard3 = new Shard().withShardId(SHARD_3); - - int hoursDifference = 1; - int retentionPeriodHours = hoursDifference * 3; - Instant streamCreationTimestamp = - CURRENT_TIMESTAMP.minus(Duration.standardHours(retentionPeriodHours)); - Instant startingPointTimestamp = - streamCreationTimestamp.plus(Duration.standardHours(hoursDifference)); - - when(currentInstantSupplier.get()).thenReturn(CURRENT_TIMESTAMP); - - when(kinesis.describeStreamSummary(new DescribeStreamSummaryRequest().withStreamName(STREAM))) - .thenThrow(new LimitExceededException("Fake Exception: Limit exceeded")) - .thenReturn( - new DescribeStreamSummaryResult() - .withStreamDescriptionSummary( - new StreamDescriptionSummary() - .withRetentionPeriodHours(retentionPeriodHours) - .withStreamCreationTimestamp(streamCreationTimestamp.toDate()))); - - ShardFilter shardFilter = - new ShardFilter() - .withType(ShardFilterType.AT_TIMESTAMP) - .withTimestamp(startingPointTimestamp.toDate()); - - when(kinesis.listShards( - new ListShardsRequest() - .withStreamName(STREAM) - .withShardFilter(shardFilter) - .withMaxResults(1_000))) - .thenReturn(new ListShardsResult().withShards(shard1, shard2, shard3).withNextToken(null)); - - List shards = - underTest.listShardsAtPoint(STREAM, new StartingPoint(startingPointTimestamp)); - - assertThat(shards).containsOnly(shard1, shard2, shard3); - } - - @Test - public void shouldListAllShardsForTimestampOutsideStreamRetentionAfterStreamCreationTimestamp() - throws Exception { - Shard shard1 = new Shard().withShardId(SHARD_1); - Shard shard2 = new Shard().withShardId(SHARD_2); - Shard shard3 = new Shard().withShardId(SHARD_3); - - int retentionPeriodHours = 3; - int startingPointHours = 5; - int hoursSinceStreamCreation = 6; - - Instant streamCreationTimestamp = - CURRENT_TIMESTAMP.minus(Duration.standardHours(hoursSinceStreamCreation)); - Instant startingPointTimestampAfterStreamRetentionTimestamp = - CURRENT_TIMESTAMP.minus(Duration.standardHours(startingPointHours)); - - when(currentInstantSupplier.get()).thenReturn(CURRENT_TIMESTAMP); - - DescribeStreamSummaryRequest describeStreamRequest = - new DescribeStreamSummaryRequest().withStreamName(STREAM); - when(kinesis.describeStreamSummary(describeStreamRequest)) - .thenReturn( - new DescribeStreamSummaryResult() - .withStreamDescriptionSummary( - new StreamDescriptionSummary() - .withRetentionPeriodHours(retentionPeriodHours) - .withStreamCreationTimestamp(streamCreationTimestamp.toDate()))); - - ShardFilter shardFilter = new ShardFilter().withType(ShardFilterType.AT_TRIM_HORIZON); - - when(kinesis.listShards( - new ListShardsRequest() - .withStreamName(STREAM) - .withShardFilter(shardFilter) - .withMaxResults(1_000))) - .thenReturn(new ListShardsResult().withShards(shard1, shard2, shard3).withNextToken(null)); - - List shards = - underTest.listShardsAtPoint( - STREAM, new StartingPoint(startingPointTimestampAfterStreamRetentionTimestamp)); - - assertThat(shards).containsOnly(shard1, shard2, shard3); - } - - @Test - public void shouldListAllShardsForTimestampBeforeStreamCreationTimestamp() throws Exception { - Shard shard1 = new Shard().withShardId(SHARD_1); - Shard shard2 = new Shard().withShardId(SHARD_2); - Shard shard3 = new Shard().withShardId(SHARD_3); - - Instant startingPointTimestamp = Instant.parse("2000-01-01T15:00:00.000Z"); - Instant streamCreationTimestamp = startingPointTimestamp.plus(Duration.standardHours(1)); - - DescribeStreamSummaryRequest describeStreamRequest = - new DescribeStreamSummaryRequest().withStreamName(STREAM); - when(kinesis.describeStreamSummary(describeStreamRequest)) - .thenReturn( - new DescribeStreamSummaryResult() - .withStreamDescriptionSummary( - new StreamDescriptionSummary() - .withStreamCreationTimestamp(streamCreationTimestamp.toDate()))); - - ShardFilter shardFilter = new ShardFilter().withType(ShardFilterType.AT_TRIM_HORIZON); - - when(kinesis.listShards( - new ListShardsRequest() - .withStreamName(STREAM) - .withShardFilter(shardFilter) - .withMaxResults(1_000))) - .thenReturn(new ListShardsResult().withShards(shard1, shard2, shard3).withNextToken(null)); - - List shards = - underTest.listShardsAtPoint(STREAM, new StartingPoint(startingPointTimestamp)); - - assertThat(shards).containsOnly(shard1, shard2, shard3); - } - - @Test - public void shouldListAllShardsForLatest() throws Exception { - Shard shard1 = new Shard().withShardId(SHARD_1); - Shard shard2 = new Shard().withShardId(SHARD_2); - Shard shard3 = new Shard().withShardId(SHARD_3); - - when(kinesis.listShards( - new ListShardsRequest() - .withStreamName(STREAM) - .withShardFilter(new ShardFilter().withType(ShardFilterType.AT_LATEST)) - .withMaxResults(1_000))) - .thenReturn(new ListShardsResult().withShards(shard1, shard2, shard3).withNextToken(null)); - - List shards = - underTest.listShardsAtPoint(STREAM, new StartingPoint(InitialPositionInStream.LATEST)); - - assertThat(shards).containsOnly(shard1, shard2, shard3); - } - - @Test - public void shouldListAllShardsForExclusiveStartShardId() throws Exception { - Shard shard1 = new Shard().withShardId(SHARD_1); - Shard shard2 = new Shard().withShardId(SHARD_2); - Shard shard3 = new Shard().withShardId(SHARD_3); - - String exclusiveStartShardId = "exclusiveStartShardId"; - - when(kinesis.listShards( - new ListShardsRequest() - .withStreamName(STREAM) - .withMaxResults(1_000) - .withShardFilter( - new ShardFilter() - .withType(ShardFilterType.AFTER_SHARD_ID) - .withShardId(exclusiveStartShardId)))) - .thenReturn(new ListShardsResult().withShards(shard1, shard2, shard3).withNextToken(null)); - - List shards = underTest.listShardsFollowingClosedShard(STREAM, exclusiveStartShardId); - - assertThat(shards).containsOnly(shard1, shard2, shard3); - } - - @Test - public void shouldHandleExpiredIterationExceptionForShardListing() { - shouldHandleShardListingError(new ExpiredIteratorException(""), ExpiredIteratorException.class); - } - - @Test - public void shouldHandleLimitExceededExceptionForShardListing() { - shouldHandleShardListingError( - new LimitExceededException(""), KinesisClientThrottledException.class); - } - - @Test - public void shouldHandleProvisionedThroughputExceededExceptionForShardListing() { - shouldHandleShardListingError( - new ProvisionedThroughputExceededException(""), KinesisClientThrottledException.class); - } - - @Test - public void shouldHandleServiceErrorForShardListing() { - shouldHandleShardListingError( - newAmazonServiceException(ErrorType.Service), TransientKinesisException.class); - } - - @Test - public void shouldHandleClientErrorForShardListing() { - shouldHandleShardListingError( - newAmazonServiceException(ErrorType.Client), RuntimeException.class); - } - - @Test - public void shouldHandleUnexpectedExceptionForShardListing() { - shouldHandleShardListingError(new NullPointerException(), RuntimeException.class); - } - - private void shouldHandleShardListingError( - Exception thrownException, Class expectedExceptionClass) { - when(kinesis.listShards(any(ListShardsRequest.class))).thenThrow(thrownException); - try { - underTest.listShardsAtPoint(STREAM, new StartingPoint(InitialPositionInStream.TRIM_HORIZON)); - failBecauseExceptionWasNotThrown(expectedExceptionClass); - } catch (Exception e) { - assertThat(e).isExactlyInstanceOf(expectedExceptionClass); - } finally { - reset(kinesis); - } - } - - @Test - public void shouldCountBytesWhenSingleDataPointReturned() throws Exception { - Instant countSince = new Instant("2017-04-06T10:00:00.000Z"); - Instant countTo = new Instant("2017-04-06T11:00:00.000Z"); - Minutes periodTime = Minutes.minutesBetween(countSince, countTo); - GetMetricStatisticsRequest metricStatisticsRequest = - underTest.createMetricStatisticsRequest(STREAM, countSince, countTo, periodTime); - GetMetricStatisticsResult result = - new GetMetricStatisticsResult().withDatapoints(new Datapoint().withSum(1.0)); - - when(cloudWatch.getMetricStatistics(metricStatisticsRequest)).thenReturn(result); - - long backlogBytes = underTest.getBacklogBytes(STREAM, countSince, countTo); - - assertThat(backlogBytes).isEqualTo(1L); - } - - @Test - public void shouldCountBytesWhenMultipleDataPointsReturned() throws Exception { - Instant countSince = new Instant("2017-04-06T10:00:00.000Z"); - Instant countTo = new Instant("2017-04-06T11:00:00.000Z"); - Minutes periodTime = Minutes.minutesBetween(countSince, countTo); - GetMetricStatisticsRequest metricStatisticsRequest = - underTest.createMetricStatisticsRequest(STREAM, countSince, countTo, periodTime); - GetMetricStatisticsResult result = - new GetMetricStatisticsResult() - .withDatapoints( - new Datapoint().withSum(1.0), - new Datapoint().withSum(3.0), - new Datapoint().withSum(2.0)); - - when(cloudWatch.getMetricStatistics(metricStatisticsRequest)).thenReturn(result); - - long backlogBytes = underTest.getBacklogBytes(STREAM, countSince, countTo); - - assertThat(backlogBytes).isEqualTo(6L); - } - - @Test - public void shouldNotCallCloudWatchWhenSpecifiedPeriodTooShort() throws Exception { - Instant countSince = new Instant("2017-04-06T10:00:00.000Z"); - Instant countTo = new Instant("2017-04-06T10:00:02.000Z"); - - long backlogBytes = underTest.getBacklogBytes(STREAM, countSince, countTo); - - assertThat(backlogBytes).isEqualTo(0L); - verifyZeroInteractions(cloudWatch); - } - - @Test - public void shouldHandleLimitExceededExceptionForGetBacklogBytes() { - shouldHandleGetBacklogBytesError( - new LimitExceededException(""), KinesisClientThrottledException.class); - } - - @Test - public void shouldHandleProvisionedThroughputExceededExceptionForGetBacklogBytes() { - shouldHandleGetBacklogBytesError( - new ProvisionedThroughputExceededException(""), KinesisClientThrottledException.class); - } - - @Test - public void shouldHandleServiceErrorForGetBacklogBytes() { - shouldHandleGetBacklogBytesError( - newAmazonServiceException(ErrorType.Service), TransientKinesisException.class); - } - - @Test - public void shouldHandleClientErrorForGetBacklogBytes() { - shouldHandleGetBacklogBytesError( - newAmazonServiceException(ErrorType.Client), RuntimeException.class); - } - - @Test - public void shouldHandleUnexpectedExceptionForGetBacklogBytes() { - shouldHandleGetBacklogBytesError(new NullPointerException(), RuntimeException.class); - } - - private void shouldHandleGetBacklogBytesError( - Exception thrownException, Class expectedExceptionClass) { - Instant countSince = new Instant("2017-04-06T10:00:00.000Z"); - Instant countTo = new Instant("2017-04-06T11:00:00.000Z"); - Minutes periodTime = Minutes.minutesBetween(countSince, countTo); - GetMetricStatisticsRequest metricStatisticsRequest = - underTest.createMetricStatisticsRequest(STREAM, countSince, countTo, periodTime); - - when(cloudWatch.getMetricStatistics(metricStatisticsRequest)).thenThrow(thrownException); - try { - underTest.getBacklogBytes(STREAM, countSince, countTo); - failBecauseExceptionWasNotThrown(expectedExceptionClass); - } catch (Exception e) { - assertThat(e).isExactlyInstanceOf(expectedExceptionClass); - } finally { - reset(kinesis); - } - } - - private AmazonServiceException newAmazonServiceException(ErrorType errorType) { - AmazonServiceException exception = new AmazonServiceException(""); - exception.setErrorType(errorType); - return exception; - } - - @Test - public void shouldReturnLimitedNumberOfRecords() throws Exception { - final Integer limit = 100; - - doAnswer( - (Answer) - invocation -> { - GetRecordsRequest request = (GetRecordsRequest) invocation.getArguments()[0]; - List records = generateRecords(request.getLimit()); - return new GetRecordsResult().withRecords(records).withMillisBehindLatest(1000L); - }) - .when(kinesis) - .getRecords(any(GetRecordsRequest.class)); - - GetKinesisRecordsResult result = underTest.getRecords(SHARD_ITERATOR, STREAM, SHARD_1, limit); - assertThat(result.getRecords().size()).isEqualTo(limit); - } - - private List generateRecords(int num) { - List records = new ArrayList<>(); - for (int i = 0; i < num; i++) { - byte[] value = new byte[1024]; - Arrays.fill(value, (byte) i); - records.add( - new Record() - .withSequenceNumber(String.valueOf(i)) - .withPartitionKey("key") - .withData(ByteBuffer.wrap(value))); - } - return records; - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/WatermarkPolicyTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/WatermarkPolicyTest.java deleted file mode 100644 index ce5c555a4dfb..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/WatermarkPolicyTest.java +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis; - -import static org.assertj.core.api.Assertions.assertThat; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; -import static org.powermock.api.mockito.PowerMockito.mockStatic; - -import org.apache.beam.sdk.transforms.SerializableFunction; -import org.joda.time.Duration; -import org.joda.time.Instant; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.powermock.core.classloader.annotations.PrepareForTest; -import org.powermock.modules.junit4.PowerMockRunner; - -/** Tests {@link WatermarkPolicy}. */ -@RunWith(PowerMockRunner.class) -@PrepareForTest(Instant.class) -public class WatermarkPolicyTest { - private static final Instant NOW = Instant.now(); - - @Test - public void shouldAdvanceWatermarkWithTheArrivalTimeFromKinesisRecords() { - WatermarkPolicy policy = WatermarkPolicyFactory.withArrivalTimePolicy().createWatermarkPolicy(); - - KinesisRecord a = mock(KinesisRecord.class); - KinesisRecord b = mock(KinesisRecord.class); - - Instant time1 = NOW.minus(Duration.standardSeconds(30L)); - Instant time2 = NOW.minus(Duration.standardSeconds(20L)); - when(a.getApproximateArrivalTimestamp()).thenReturn(time1); - when(b.getApproximateArrivalTimestamp()).thenReturn(time2); - - policy.update(a); - assertThat(policy.getWatermark()).isEqualTo(time1); - policy.update(b); - assertThat(policy.getWatermark()).isEqualTo(time2); - } - - @Test - public void shouldOnlyAdvanceTheWatermark() { - WatermarkPolicy policy = WatermarkPolicyFactory.withArrivalTimePolicy().createWatermarkPolicy(); - - KinesisRecord a = mock(KinesisRecord.class); - KinesisRecord b = mock(KinesisRecord.class); - KinesisRecord c = mock(KinesisRecord.class); - - Instant time1 = NOW.minus(Duration.standardSeconds(30L)); - Instant time2 = NOW.minus(Duration.standardSeconds(20L)); - Instant time3 = NOW.minus(Duration.standardSeconds(40L)); - when(a.getApproximateArrivalTimestamp()).thenReturn(time1); - when(b.getApproximateArrivalTimestamp()).thenReturn(time2); - // time3 is before time2 - when(c.getApproximateArrivalTimestamp()).thenReturn(time3); - - policy.update(a); - assertThat(policy.getWatermark()).isEqualTo(time1); - policy.update(b); - assertThat(policy.getWatermark()).isEqualTo(time2); - policy.update(c); - // watermark doesn't go back in time - assertThat(policy.getWatermark()).isEqualTo(time2); - } - - @Test - public void shouldAdvanceWatermarkWhenThereAreNoIncomingRecords() { - WatermarkParameters standardWatermarkParams = WatermarkParameters.create(); - WatermarkPolicy policy = - WatermarkPolicyFactory.withCustomWatermarkPolicy(standardWatermarkParams) - .createWatermarkPolicy(); - - mockStatic(Instant.class); - - Instant time1 = NOW.minus(Duration.standardSeconds(500)); // returned when update is called - Instant time2 = - NOW.minus( - Duration.standardSeconds(498)); // returned when getWatermark is called the first time - Instant time3 = NOW; // returned when getWatermark is called the second time - Instant arrivalTime = NOW.minus(Duration.standardSeconds(510)); - Duration watermarkIdleTimeThreshold = - standardWatermarkParams.getWatermarkIdleDurationThreshold(); - - when(Instant.now()).thenReturn(time1).thenReturn(time2).thenReturn(time3); - - KinesisRecord a = mock(KinesisRecord.class); - when(a.getApproximateArrivalTimestamp()).thenReturn(arrivalTime); - - policy.update(a); - - // returns the latest event time when the watermark - assertThat(policy.getWatermark()).isEqualTo(arrivalTime); - // advance the watermark to [NOW - watermark idle time threshold] - assertThat(policy.getWatermark()).isEqualTo(time3.minus(watermarkIdleTimeThreshold)); - } - - @Test - public void shouldAdvanceWatermarkToNowWithProcessingTimePolicy() { - WatermarkPolicy policy = - WatermarkPolicyFactory.withProcessingTimePolicy().createWatermarkPolicy(); - - mockStatic(Instant.class); - - Instant time1 = NOW.minus(Duration.standardSeconds(5)); - Instant time2 = NOW.minus(Duration.standardSeconds(4)); - - when(Instant.now()).thenReturn(time1).thenReturn(time2); - - assertThat(policy.getWatermark()).isEqualTo(time1); - assertThat(policy.getWatermark()).isEqualTo(time2); - } - - @Test - public void shouldAdvanceWatermarkWithCustomTimePolicy() { - SerializableFunction timestampFn = - (record) -> record.getApproximateArrivalTimestamp().plus(Duration.standardMinutes(1)); - - WatermarkPolicy policy = - WatermarkPolicyFactory.withCustomWatermarkPolicy( - WatermarkParameters.create().withTimestampFn(timestampFn)) - .createWatermarkPolicy(); - - KinesisRecord a = mock(KinesisRecord.class); - KinesisRecord b = mock(KinesisRecord.class); - - Instant time1 = NOW.minus(Duration.standardSeconds(30L)); - Instant time2 = NOW.minus(Duration.standardSeconds(20L)); - when(a.getApproximateArrivalTimestamp()).thenReturn(time1); - when(b.getApproximateArrivalTimestamp()).thenReturn(time2); - - policy.update(a); - assertThat(policy.getWatermark()).isEqualTo(time1.plus(Duration.standardMinutes(1))); - policy.update(b); - assertThat(policy.getWatermark()).isEqualTo(time2.plus(Duration.standardMinutes(1))); - } - - @Test - public void shouldUpdateWatermarkParameters() { - SerializableFunction fn = input -> Instant.now(); - Duration idleDurationThreshold = Duration.standardSeconds(30); - - WatermarkParameters parameters = - WatermarkParameters.create() - .withTimestampFn(fn) - .withWatermarkIdleDurationThreshold(idleDurationThreshold); - - assertThat(parameters.getTimestampFn()).isEqualTo(fn); - assertThat(parameters.getWatermarkIdleDurationThreshold()).isEqualTo(idleDurationThreshold); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/serde/AwsModuleTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/serde/AwsModuleTest.java deleted file mode 100644 index e58825ec4b9d..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/serde/AwsModuleTest.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis.serde; - -import static org.apache.commons.lang3.reflect.FieldUtils.readField; -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.hasItem; -import static org.hamcrest.Matchers.not; -import static org.junit.Assert.assertEquals; - -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSSessionCredentials; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.BasicSessionCredentials; -import com.amazonaws.auth.ClasspathPropertiesFileCredentialsProvider; -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; -import com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper; -import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; -import com.amazonaws.auth.PropertiesFileCredentialsProvider; -import com.amazonaws.auth.SystemPropertiesCredentialsProvider; -import com.amazonaws.auth.profile.ProfileCredentialsProvider; -import com.fasterxml.jackson.databind.Module; -import com.fasterxml.jackson.databind.ObjectMapper; -import java.util.List; -import org.apache.beam.sdk.util.common.ReflectHelpers; -import org.hamcrest.Matchers; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -/** Tests {@link AwsModule}. */ -@RunWith(JUnit4.class) -public class AwsModuleTest { - private static final String ACCESS_KEY_ID = "ACCESS_KEY_ID"; - private static final String SECRET_ACCESS_KEY = "SECRET_ACCESS_KEY"; - private static final String SESSION_TOKEN = "SESSION_TOKEN"; - - private final ObjectMapper objectMapper = new ObjectMapper().registerModule(new AwsModule()); - - private String serialize(Object obj) throws Exception { - return objectMapper.writeValueAsString(obj); - } - - private T deserialize(String serializedObj, Class clazz) throws Exception { - return objectMapper.readValue(serializedObj, clazz); - } - - private AWSCredentialsProvider deserializeCredentialsProvider(String serializedProvider) - throws Exception { - return deserialize(serializedProvider, AWSCredentialsProvider.class); - } - - @Test - public void testObjectMapperCannotFindModule() { - // module shall not be discoverable to not conflict with the one in amazon-web-services - List modules = ObjectMapper.findModules(ReflectHelpers.findClassLoader()); - assertThat(modules, not(hasItem(Matchers.instanceOf(AwsModule.class)))); - } - - private void checkStaticBasicCredentials(AWSCredentialsProvider provider) { - assertEquals(AWSStaticCredentialsProvider.class, provider.getClass()); - assertEquals(ACCESS_KEY_ID, provider.getCredentials().getAWSAccessKeyId()); - assertEquals(SECRET_ACCESS_KEY, provider.getCredentials().getAWSSecretKey()); - } - - private void checkStaticSessionCredentials(AWSCredentialsProvider provider) { - checkStaticBasicCredentials(provider); - assertEquals( - SESSION_TOKEN, ((AWSSessionCredentials) provider.getCredentials()).getSessionToken()); - } - - @Test - public void testAWSStaticCredentialsProviderSerializationDeserialization() throws Exception { - AWSCredentialsProvider credentialsProvider = - new AWSStaticCredentialsProvider(new BasicAWSCredentials(ACCESS_KEY_ID, SECRET_ACCESS_KEY)); - - String serializedCredentialsProvider = serialize(credentialsProvider); - AWSCredentialsProvider deserializedCredentialsProvider = - deserializeCredentialsProvider(serializedCredentialsProvider); - - checkStaticBasicCredentials(deserializedCredentialsProvider); - - credentialsProvider = - new AWSStaticCredentialsProvider( - new BasicSessionCredentials(ACCESS_KEY_ID, SECRET_ACCESS_KEY, SESSION_TOKEN)); - - checkStaticSessionCredentials(credentialsProvider); - } - - @Test - public void testPropertiesFileCredentialsProviderSerializationDeserialization() throws Exception { - String credentialsFilePath = "/path/to/file"; - - AWSCredentialsProvider credentialsProvider = - new PropertiesFileCredentialsProvider(credentialsFilePath); - - String serializedCredentialsProvider = serialize(credentialsProvider); - AWSCredentialsProvider deserializedCredentialsProvider = - deserializeCredentialsProvider(serializedCredentialsProvider); - - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - assertEquals( - credentialsFilePath, - readField(deserializedCredentialsProvider, "credentialsFilePath", true)); - } - - @Test - public void testClasspathPropertiesFileCredentialsProviderSerializationDeserialization() - throws Exception { - String credentialsFilePath = "/path/to/file"; - - AWSCredentialsProvider credentialsProvider = - new ClasspathPropertiesFileCredentialsProvider(credentialsFilePath); - - String serializedCredentialsProvider = serialize(credentialsProvider); - AWSCredentialsProvider deserializedCredentialsProvider = - deserializeCredentialsProvider(serializedCredentialsProvider); - - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - assertEquals( - credentialsFilePath, - readField(deserializedCredentialsProvider, "credentialsFilePath", true)); - } - - @Test - public void testSingletonAWSCredentialsProviderSerializationDeserialization() throws Exception { - AWSCredentialsProvider credentialsProvider; - String serializedCredentialsProvider; - AWSCredentialsProvider deserializedCredentialsProvider; - - credentialsProvider = new DefaultAWSCredentialsProviderChain(); - serializedCredentialsProvider = serialize(credentialsProvider); - deserializedCredentialsProvider = deserializeCredentialsProvider(serializedCredentialsProvider); - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - - credentialsProvider = new EnvironmentVariableCredentialsProvider(); - serializedCredentialsProvider = serialize(credentialsProvider); - deserializedCredentialsProvider = deserializeCredentialsProvider(serializedCredentialsProvider); - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - - credentialsProvider = new SystemPropertiesCredentialsProvider(); - serializedCredentialsProvider = serialize(credentialsProvider); - deserializedCredentialsProvider = deserializeCredentialsProvider(serializedCredentialsProvider); - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - - credentialsProvider = new ProfileCredentialsProvider(); - serializedCredentialsProvider = serialize(credentialsProvider); - deserializedCredentialsProvider = deserializeCredentialsProvider(serializedCredentialsProvider); - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - - credentialsProvider = new EC2ContainerCredentialsProviderWrapper(); - serializedCredentialsProvider = serialize(credentialsProvider); - deserializedCredentialsProvider = deserializeCredentialsProvider(serializedCredentialsProvider); - assertEquals(credentialsProvider.getClass(), deserializedCredentialsProvider.getClass()); - } -} diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/serde/AwsSerializableUtilsTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/serde/AwsSerializableUtilsTest.java deleted file mode 100644 index 972912be2a94..000000000000 --- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/serde/AwsSerializableUtilsTest.java +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.beam.sdk.io.kinesis.serde; - -import static org.apache.beam.sdk.io.kinesis.serde.AwsSerializableUtils.deserialize; -import static org.apache.beam.sdk.io.kinesis.serde.AwsSerializableUtils.serialize; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.AWSSessionCredentials; -import com.amazonaws.auth.AWSStaticCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.BasicSessionCredentials; -import com.amazonaws.auth.ClasspathPropertiesFileCredentialsProvider; -import com.amazonaws.auth.DefaultAWSCredentialsProviderChain; -import com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper; -import com.amazonaws.auth.EnvironmentVariableCredentialsProvider; -import com.amazonaws.auth.PropertiesFileCredentialsProvider; -import com.amazonaws.auth.SystemPropertiesCredentialsProvider; -import com.amazonaws.auth.profile.ProfileCredentialsProvider; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.JUnit4; - -@RunWith(JUnit4.class) -public class AwsSerializableUtilsTest { - - private static final String ACCESS_KEY_ID = "ACCESS_KEY_ID"; - private static final String SECRET_ACCESS_KEY = "SECRET_ACCESS_KEY"; - private static final String SESSION_TOKEN = "SESSION_TOKEN"; - - private void checkStaticBasicCredentials(AWSCredentialsProvider provider) { - assertTrue(provider instanceof AWSStaticCredentialsProvider); - assertEquals(ACCESS_KEY_ID, provider.getCredentials().getAWSAccessKeyId()); - assertEquals(SECRET_ACCESS_KEY, provider.getCredentials().getAWSSecretKey()); - } - - private void checkStaticSessionCredentials(AWSCredentialsProvider provider) { - checkStaticBasicCredentials(provider); - assertEquals( - SESSION_TOKEN, ((AWSSessionCredentials) provider.getCredentials()).getSessionToken()); - } - - @Test - public void testBasicCredentialsProviderSerialization() { - AWSCredentialsProvider credentialsProvider = - new AWSStaticCredentialsProvider(new BasicAWSCredentials(ACCESS_KEY_ID, SECRET_ACCESS_KEY)); - String serializedProvider = serialize(credentialsProvider); - - checkStaticBasicCredentials(deserialize(serializedProvider)); - } - - @Test - public void testStaticSessionCredentialsProviderSerialization() { - AWSCredentialsProvider credentialsProvider = - new AWSStaticCredentialsProvider( - new BasicSessionCredentials(ACCESS_KEY_ID, SECRET_ACCESS_KEY, SESSION_TOKEN)); - String serializedCredentials = serialize(credentialsProvider); - - checkStaticSessionCredentials(deserialize(serializedCredentials)); - } - - @Test - public void testDefaultAWSCredentialsProviderChainSerialization() { - AWSCredentialsProvider credentialsProvider = DefaultAWSCredentialsProviderChain.getInstance(); - String expectedSerializedProvider = "{\"@type\":\"DefaultAWSCredentialsProviderChain\"}"; - String serializedProvider = serialize(credentialsProvider); - - assertEquals(expectedSerializedProvider, serializedProvider); - assertEquals(expectedSerializedProvider, serialize(deserialize(serializedProvider))); - } - - @Test - public void testPropertiesFileCredentialsProviderSerialization() { - AWSCredentialsProvider credentialsProvider = - new PropertiesFileCredentialsProvider("AwsCredentials.properties"); - String expectedSerializedProvider = - "{\"@type\":\"PropertiesFileCredentialsProvider\",\"credentialsFilePath\":\"AwsCredentials.properties\"}"; - String serializedProvider = serialize(credentialsProvider); - - assertEquals(expectedSerializedProvider, serializedProvider); - assertEquals(expectedSerializedProvider, serialize(deserialize(serializedProvider))); - } - - @Test - public void testClasspathPropertiesFileCredentialsProviderSerialization() { - AWSCredentialsProvider credentialsProvider = - new ClasspathPropertiesFileCredentialsProvider("AwsCredentials.properties"); - String expectedSerializedProvider = - "{\"@type\":\"ClasspathPropertiesFileCredentialsProvider\",\"credentialsFilePath\":\"/AwsCredentials.properties\"}"; - String serializedProvider = serialize(credentialsProvider); - - assertEquals(expectedSerializedProvider, serializedProvider); - assertEquals(expectedSerializedProvider, serialize(deserialize(serializedProvider))); - } - - @Test - public void testEnvironmentVariableCredentialsProviderSerialization() { - AWSCredentialsProvider credentialsProvider = new EnvironmentVariableCredentialsProvider(); - String expectedSerializedProvider = "{\"@type\":\"EnvironmentVariableCredentialsProvider\"}"; - String serializedProvider = serialize(credentialsProvider); - - assertEquals(expectedSerializedProvider, serializedProvider); - assertEquals(expectedSerializedProvider, serialize(deserialize(serializedProvider))); - } - - @Test - public void testSystemPropertiesCredentialsProviderSerialization() { - AWSCredentialsProvider credentialsProvider = new SystemPropertiesCredentialsProvider(); - String expectedSerializedProvider = "{\"@type\":\"SystemPropertiesCredentialsProvider\"}"; - String serializedProvider = serialize(credentialsProvider); - - assertEquals(expectedSerializedProvider, serializedProvider); - assertEquals(expectedSerializedProvider, serialize(deserialize(serializedProvider))); - } - - @Test - public void testProfileCredentialsProviderSerialization() { - AWSCredentialsProvider credentialsProvider = new ProfileCredentialsProvider(); - String expectedSerializedProvider = "{\"@type\":\"ProfileCredentialsProvider\"}"; - String serializedProvider = serialize(credentialsProvider); - - assertEquals(expectedSerializedProvider, serializedProvider); - assertEquals(expectedSerializedProvider, serialize(deserialize(serializedProvider))); - } - - @Test - public void testEC2ContainerCredentialsProviderWrapperSerialization() { - AWSCredentialsProvider credentialsProvider = new EC2ContainerCredentialsProviderWrapper(); - String expectedSerializedProvider = "{\"@type\":\"EC2ContainerCredentialsProviderWrapper\"}"; - String serializedProvider = serialize(credentialsProvider); - - assertEquals(expectedSerializedProvider, serializedProvider); - assertEquals(expectedSerializedProvider, serialize(deserialize(serializedProvider))); - } - - static class UnknownAwsCredentialsProvider implements AWSCredentialsProvider { - @Override - public AWSCredentials getCredentials() { - return new BasicAWSCredentials(ACCESS_KEY_ID, SECRET_ACCESS_KEY); - } - - @Override - public void refresh() {} - } - - @Test(expected = IllegalArgumentException.class) - public void testFailOnAWSCredentialsProviderSerialization() { - AWSCredentialsProvider credentialsProvider = new UnknownAwsCredentialsProvider(); - serialize(credentialsProvider); - } - - @Test(expected = IllegalArgumentException.class) - public void testFailOnAWSCredentialsProviderDeserialization() { - deserialize("invalid string"); - } -} diff --git a/settings.gradle.kts b/settings.gradle.kts index a8bee45a05ac..3a0095b7b139 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -238,8 +238,6 @@ include(":sdks:java:io:jms") include(":sdks:java:io:json") include(":sdks:java:io:kafka") include(":sdks:java:io:kafka:upgrade") -include(":sdks:java:io:kinesis") -include(":sdks:java:io:kinesis:expansion-service") include(":sdks:java:io:kudu") include(":sdks:java:io:mongodb") include(":sdks:java:io:mqtt")