Skip to content

Commit

Permalink
Merge pull request #67 from ASFHyP3/harvest-fix
Browse files Browse the repository at this point in the history
Harvest products using download/upload instead of S3 copy
  • Loading branch information
asjohnston-asf authored Jun 27, 2022
2 parents 8e3cfdf + e72808a commit 63213e0
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 60 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [PEP 440](https://www.python.org/dev/peps/pep-0440/)
and uses [Semantic Versioning](https://semver.org/spec/v2.0.0.html).


## [0.0.13]
### Changed
- `harvest_products` Lambda function now harvests data products via download and upload, rather than S3 copy.

## [0.0.12]
### Fixed
- Handle the case where a product has no temporal neighbors.
Expand Down
11 changes: 2 additions & 9 deletions harvest_products/cloudformation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,7 @@ Resources:
- dynamodb:Query
Resource: !Sub "arn:aws:dynamodb:${AWS::Region}:${AWS::AccountId}:table/${ProductTable}*"
- Effect: Allow
Action:
- s3:GetObject
- s3:GetObjectTagging
Resource: "arn:aws:s3:::*/*"
- Effect: Allow
Action:
- s3:PutObject
- s3:PutObjectTagging
Action: s3:PutObject
Resource: !Sub "arn:aws:s3:::${ProductBucket}/*"

Lambda:
Expand All @@ -66,7 +59,7 @@ Resources:
EDL_PASSWORD: !Ref EDLPassword
Code: src/
Handler: harvest_products.lambda_handler
MemorySize: 256
MemorySize: 2048
Role: !GetAtt Role.Arn
Runtime: python3.8
Timeout: 900
Expand Down
27 changes: 10 additions & 17 deletions harvest_products/src/harvest_products.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,34 +13,27 @@
S3 = boto3.resource('s3')


def harvest_image(image_url, destination_bucket, destination_prefix):
filename = basename(urlparse(image_url).path)
def harvest_file(file_url, destination_prefix):
destination_bucket = S3.Bucket(environ['BUCKET_NAME'])
filename = basename(urlparse(file_url).path)
destination_key = f'{destination_prefix}/{filename}'
response = requests.get(image_url)
response = requests.get(file_url)
response.raise_for_status()
content_type = guess_type(filename)[0] if guess_type(filename)[0] else 'application/octet-stream'
destination_bucket.put_object(Body=io.BytesIO(response.content), Key=destination_key, ContentType=content_type)
return f'https://{destination_bucket.name}.s3.amazonaws.com/{destination_key}'


def harvest(product, job):
destination_bucket = S3.Bucket(environ['BUCKET_NAME'])
copy_source = {
'Bucket': job.files[0]['s3']['bucket'],
'Key': job.files[0]['s3']['key'],
}
product_name = job.files[0]['filename']
destination_prefix = f'{product["event_id"]}/{product["product_id"]}'
destination_key = f'{destination_prefix}/{product_name}'
print(f'copying {product_name} to s3://{destination_bucket.name}/{destination_key}')
destination_bucket.copy(copy_source, destination_key)
product_file = job.files[0]

return {
'browse_url': harvest_image(job.browse_images[0], destination_bucket, destination_prefix),
'thumbnail_url': harvest_image(job.thumbnail_images[0], destination_bucket, destination_prefix),
'product_name': product_name,
'product_size': job.files[0]['size'],
'product_url': f'https://{destination_bucket.name}.s3.amazonaws.com/{destination_key}',
'browse_url': harvest_file(job.browse_images[0], destination_prefix),
'thumbnail_url': harvest_file(job.thumbnail_images[0], destination_prefix),
'product_name': product_file['filename'],
'product_size': product_file['size'],
'product_url': harvest_file(product_file['url'], destination_prefix),
}


Expand Down
52 changes: 18 additions & 34 deletions tests/test_harvest_products.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from os import environ
from unittest import mock
from unittest.mock import MagicMock, call, patch

import responses
from botocore.stub import ANY
Expand All @@ -10,7 +10,7 @@


@responses.activate
def test_harvest_image(s3_stubber):
def test_harvest_file(s3_stubber):
responses.add(responses.GET, 'https://foo.com/file.png', body='image_content')
params = {
'Bucket': environ['BUCKET_NAME'],
Expand All @@ -19,16 +19,16 @@ def test_harvest_image(s3_stubber):
'Body': ANY
}
s3_stubber.add_response(method='put_object', expected_params=params, service_response={})
bucket = harvest_products.S3.Bucket(environ['BUCKET_NAME'])
response = harvest_products.harvest_image('https://foo.com/file.png', bucket, 'prefix')
response = harvest_products.harvest_file('https://foo.com/file.png', 'prefix')

assert response == f'https://{environ["BUCKET_NAME"]}.s3.amazonaws.com/prefix/file.png'


def test_harvest(s3_stubber):
@patch('harvest_products.harvest_file')
def test_harvest(mock_harvest_file: MagicMock):
product = {
'event_id': '1',
'product_id': 'source_prefix',
'event_id': 'event_id',
'product_id': 'product_id',
'granules': [],
'status_code': 'PENDING',
'processing_date': '2020-01-01T00:00:00+00:00'
Expand All @@ -39,10 +39,7 @@ class MockJob:
{
'filename': 'product.zip',
'size': 123,
's3': {
'bucket': 'sourceBucket',
'key': 'source_prefix/product.zip',
},
'url': 'PRODUCT_URL',
},
]
browse_images = [
Expand All @@ -52,36 +49,23 @@ class MockJob:
'THUMBNAIL_IMAGE_URL',
]

params = {
'Bucket': 'sourceBucket',
'Key': 'source_prefix/product.zip',
}
s3_response = {
'ContentLength': 123
}
s3_stubber.add_response(method='head_object', expected_params=params, service_response=s3_response)

params = {
'Bucket': environ['BUCKET_NAME'],
'Key': '1/source_prefix/product.zip',
'CopySource': {
'Bucket': 'sourceBucket',
'Key': 'source_prefix/product.zip'
},
}
s3_stubber.add_response(method='copy_object', expected_params=params, service_response={})

with mock.patch('harvest_products.harvest_image', lambda x, y, z: 'https://foo.com/file.png'):
files = harvest_products.harvest(product, MockJob())
mock_harvest_file.return_value = 'https://foo.com/file.png'
files = harvest_products.harvest(product, MockJob())

assert files == {
'browse_url': 'https://foo.com/file.png',
'thumbnail_url': 'https://foo.com/file.png',
'product_name': 'product.zip',
'product_size': 123,
'product_url': f'https://{environ["BUCKET_NAME"]}.s3.amazonaws.com/1/source_prefix/product.zip'
'product_url': 'https://foo.com/file.png'
}

assert mock_harvest_file.mock_calls == [
call('BROWSE_IMAGE_URL', 'event_id/product_id'),
call('THUMBNAIL_IMAGE_URL', 'event_id/product_id'),
call('PRODUCT_URL', 'event_id/product_id'),
]


def test_update_product_succeeded(tables):
product = {
Expand Down Expand Up @@ -118,7 +102,7 @@ def test_update_product_succeeded(tables):
'product_url': f'https://{environ["BUCKET_NAME"]}.s3.amazonaws.com/1/foo/product.zip'
}

with mock.patch('harvest_products.harvest', lambda x, y: mock_harvest):
with patch('harvest_products.harvest', lambda x, y: mock_harvest):
harvest_products.update_product(product, job)

updated_product = tables.product_table.scan()['Items'][0]
Expand Down

0 comments on commit 63213e0

Please sign in to comment.