diff --git a/CHANGELOG.md b/CHANGELOG.md index bfd5d2e..f601ea6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/) and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.0.13] +### Changed +- `harvest_products` Lambda function now harvests data products via download and upload, rather than S3 copy. + ## [0.0.12] ### Fixed - Handle the case where a product has no temporal neighbors. diff --git a/harvest_products/cloudformation.yml b/harvest_products/cloudformation.yml index cb169d2..71fa6bf 100644 --- a/harvest_products/cloudformation.yml +++ b/harvest_products/cloudformation.yml @@ -44,14 +44,7 @@ Resources: - dynamodb:Query Resource: !Sub "arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:table/${ProductTable}*" - Effect: Allow - Action: - - s3:GetObject - - s3:GetObjectTagging - Resource: "arn:aws:s3:::*/*" - - Effect: Allow - Action: - - s3:PutObject - - s3:PutObjectTagging + Action: s3:PutObject Resource: !Sub "arn:aws:s3:::${ProductBucket}/*" Lambda: @@ -66,7 +59,7 @@ Resources: EDL_PASSWORD: !Ref EDLPassword Code: src/ Handler: harvest_products.lambda_handler - MemorySize: 256 + MemorySize: 2048 Role: !GetAtt Role.Arn Runtime: python3.8 Timeout: 900 diff --git a/harvest_products/src/harvest_products.py b/harvest_products/src/harvest_products.py index a1ccfdb..8ef75eb 100644 --- a/harvest_products/src/harvest_products.py +++ b/harvest_products/src/harvest_products.py @@ -13,10 +13,11 @@ S3 = boto3.resource('s3') -def harvest_image(image_url, destination_bucket, destination_prefix): - filename = basename(urlparse(image_url).path) +def harvest_file(file_url, destination_prefix): + destination_bucket = S3.Bucket(environ['BUCKET_NAME']) + filename = basename(urlparse(file_url).path) destination_key = f'{destination_prefix}/{filename}' - response = requests.get(image_url) + response = requests.get(file_url) response.raise_for_status() content_type = guess_type(filename)[0] if guess_type(filename)[0] else 'application/octet-stream' destination_bucket.put_object(Body=io.BytesIO(response.content), Key=destination_key, ContentType=content_type) @@ -24,23 +25,15 @@ def harvest_image(image_url, destination_bucket, destination_prefix): def harvest(product, job): - destination_bucket = S3.Bucket(environ['BUCKET_NAME']) - copy_source = { - 'Bucket': job.files[0]['s3']['bucket'], - 'Key': job.files[0]['s3']['key'], - } - product_name = job.files[0]['filename'] destination_prefix = f'{product["event_id"]}/{product["product_id"]}' - destination_key = f'{destination_prefix}/{product_name}' - print(f'copying {product_name} to s3://{destination_bucket.name}/{destination_key}') - destination_bucket.copy(copy_source, destination_key) + product_file = job.files[0] return { - 'browse_url': harvest_image(job.browse_images[0], destination_bucket, destination_prefix), - 'thumbnail_url': harvest_image(job.thumbnail_images[0], destination_bucket, destination_prefix), - 'product_name': product_name, - 'product_size': job.files[0]['size'], - 'product_url': f'https://{destination_bucket.name}.s3.amazonaws.com/{destination_key}', + 'browse_url': harvest_file(job.browse_images[0], destination_prefix), + 'thumbnail_url': harvest_file(job.thumbnail_images[0], destination_prefix), + 'product_name': product_file['filename'], + 'product_size': product_file['size'], + 'product_url': harvest_file(product_file['url'], destination_prefix), } diff --git a/tests/test_harvest_products.py b/tests/test_harvest_products.py index f6f3574..be8eda6 100644 --- a/tests/test_harvest_products.py +++ b/tests/test_harvest_products.py @@ -1,5 +1,5 @@ from os import environ -from unittest import mock +from unittest.mock import MagicMock, call, patch import responses from botocore.stub import ANY @@ -10,7 +10,7 @@ @responses.activate -def test_harvest_image(s3_stubber): +def test_harvest_file(s3_stubber): responses.add(responses.GET, 'https://foo.com/file.png', body='image_content') params = { 'Bucket': environ['BUCKET_NAME'], @@ -19,16 +19,16 @@ def test_harvest_image(s3_stubber): 'Body': ANY } s3_stubber.add_response(method='put_object', expected_params=params, service_response={}) - bucket = harvest_products.S3.Bucket(environ['BUCKET_NAME']) - response = harvest_products.harvest_image('https://foo.com/file.png', bucket, 'prefix') + response = harvest_products.harvest_file('https://foo.com/file.png', 'prefix') assert response == f'https://{environ["BUCKET_NAME"]}.s3.amazonaws.com/prefix/file.png' -def test_harvest(s3_stubber): +@patch('harvest_products.harvest_file') +def test_harvest(mock_harvest_file: MagicMock): product = { - 'event_id': '1', - 'product_id': 'source_prefix', + 'event_id': 'event_id', + 'product_id': 'product_id', 'granules': [], 'status_code': 'PENDING', 'processing_date': '2020-01-01T00:00:00+00:00' @@ -39,10 +39,7 @@ class MockJob: { 'filename': 'product.zip', 'size': 123, - 's3': { - 'bucket': 'sourceBucket', - 'key': 'source_prefix/product.zip', - }, + 'url': 'PRODUCT_URL', }, ] browse_images = [ @@ -52,36 +49,23 @@ class MockJob: 'THUMBNAIL_IMAGE_URL', ] - params = { - 'Bucket': 'sourceBucket', - 'Key': 'source_prefix/product.zip', - } - s3_response = { - 'ContentLength': 123 - } - s3_stubber.add_response(method='head_object', expected_params=params, service_response=s3_response) - - params = { - 'Bucket': environ['BUCKET_NAME'], - 'Key': '1/source_prefix/product.zip', - 'CopySource': { - 'Bucket': 'sourceBucket', - 'Key': 'source_prefix/product.zip' - }, - } - s3_stubber.add_response(method='copy_object', expected_params=params, service_response={}) - - with mock.patch('harvest_products.harvest_image', lambda x, y, z: 'https://foo.com/file.png'): - files = harvest_products.harvest(product, MockJob()) + mock_harvest_file.return_value = 'https://foo.com/file.png' + files = harvest_products.harvest(product, MockJob()) assert files == { 'browse_url': 'https://foo.com/file.png', 'thumbnail_url': 'https://foo.com/file.png', 'product_name': 'product.zip', 'product_size': 123, - 'product_url': f'https://{environ["BUCKET_NAME"]}.s3.amazonaws.com/1/source_prefix/product.zip' + 'product_url': 'https://foo.com/file.png' } + assert mock_harvest_file.mock_calls == [ + call('BROWSE_IMAGE_URL', 'event_id/product_id'), + call('THUMBNAIL_IMAGE_URL', 'event_id/product_id'), + call('PRODUCT_URL', 'event_id/product_id'), + ] + def test_update_product_succeeded(tables): product = { @@ -118,7 +102,7 @@ def test_update_product_succeeded(tables): 'product_url': f'https://{environ["BUCKET_NAME"]}.s3.amazonaws.com/1/foo/product.zip' } - with mock.patch('harvest_products.harvest', lambda x, y: mock_harvest): + with patch('harvest_products.harvest', lambda x, y: mock_harvest): harvest_products.update_product(product, job) updated_product = tables.product_table.scan()['Items'][0]