Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Pipeline Utils Unit Tests #158

Merged
merged 6 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dags/requirements-constraints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,7 @@ requests-ntlm==1.1.0
requests-oauthlib==1.3.1
requests-toolbelt==0.10.1
requests==2.28.1
requests_mock==1.12.1
stephenkilbourn marked this conversation as resolved.
Show resolved Hide resolved
responses==0.22.0
rfc3986==1.5.0
rich-click==1.5.2
Expand Down
72 changes: 72 additions & 0 deletions dags/veda_data_pipeline/utils/test_s3_discovery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import s3_discovery
import pytest
import os
import boto3
from moto import mock_s3

from unittest.mock import patch

@pytest.fixture(scope='function')
def aws_credentials():
"""Mocked AWS Credentials, to ensure we're not touching AWS directly"""
os.environ['AWS_ACCESS_KEY_ID'] = 'testing'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'testing'
os.environ['AWS_SECURITY_TOKEN'] = 'testing'
os.environ['AWS_SESSION_TOKEN'] = 'testing'
os.environ['EVENT_BUCKET'] = 'test'

@mock_s3
def test_s3_discovery_dry_run(aws_credentials, capsys):
s3 = boto3.resource('s3')
bucket = s3.Bucket("test")
# Create the bucket first, as we're interacting with an empty mocked 'AWS account'
bucket.create(
CreateBucketConfiguration={'LocationConstraint': 'us-west-2'}
)

# Create some example files that are representative of what the S3 bucket would look like in production
client = boto3.client('s3', region_name='us-west-2')
client.put_object(Bucket="test", Key="file1.cog", Body="stuff")
client.put_object(Bucket="test", Key="file2.tif", Body="stuff")

fake_event = {
"dry_run": "dry run",
"bucket": "test",
"filename_regex": r"[\s\S]*"
}

res = s3_discovery.s3_discovery_handler(fake_event)

captured = capsys.readouterr()
assert "Running discovery in dry run mode" in captured.out
assert "-DRYRUN- Example item" in captured.out

assert isinstance(res, dict)
assert res["discovered"] == 2

@mock_s3
def test_s3_discovery(aws_credentials, capsys):
s3 = boto3.resource('s3')
bucket = s3.Bucket("test")
# Create the bucket first, as we're interacting with an empty mocked 'AWS account'
bucket.create(
CreateBucketConfiguration={'LocationConstraint': 'us-west-2'}
)

# Create some example files that are representative of what the S3 bucket would look like in production
client = boto3.client('s3', region_name='us-west-2')
client.put_object(Bucket="test", Key="file1.cog", Body="stuff")
client.put_object(Bucket="test", Key="file2.tif", Body="stuff")
client.put_object(Bucket="test", Key="file2.txt", Body="stuff")
fake_event = {
"bucket": "test",
"filename_regex": r"^.*\.(cog|tif)$"
}

res = s3_discovery.s3_discovery_handler(fake_event)

captured = capsys.readouterr()
assert "Running discovery in dry run mode" not in captured.out

assert isinstance(res, dict)
assert res["discovered"] == 2
61 changes: 61 additions & 0 deletions dags/veda_data_pipeline/utils/test_submit_stac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import os
import boto3
import pytest
from moto import mock_secretsmanager
import requests_mock
import submit_stac

@pytest.fixture(scope="function")
def aws_credentials():
"""Mocked AWS Credentials for moto."""
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
os.environ["AWS_SECRET_ACCESS_KEY"] = "testing"
os.environ["AWS_SECURITY_TOKEN"] = "testing"
os.environ["AWS_SESSION_TOKEN"] = "testing"
os.environ["AWS_DEFAULT_REGION"] = "us-west-2"
os.environ["COGNITO_APP_SECRET"] = "app_secret"
os.environ["STAC_INGESTOR_API_URL"] = "http://www.test.com"

@pytest.fixture(scope="function")
def aws(aws_credentials):
with mock_secretsmanager():
yield boto3.client("secretsmanager", region_name="us-west-2")

@pytest.fixture
def create_secret(aws):
boto3.client("secretsmanager", region_name="us-west-2").create_secret(Name="app_secret", SecretString="{\"cognito_domain\": \"http://test.com\", \"client_id\": \"test_id\", \"client_secret\": \"test_secret\", \"scope\": \"test_scope\"}")

@requests_mock.Mocker(kw="mock")
def test_submission_handler_dry_run(create_secret, capsys, **kwargs):
token_endpoint = kwargs["mock"].post("http://test.com/oauth2/token", json={"token_type": "bearer", "access_token": "token"})
ingestions_endpoint = kwargs["mock"].post("http://www.test.com/ingestions", json={"token_type": "bearer", "access_token": "token"})
fake_event = {
"dry_run": "dry run",
"stac_file_url": "http://www.test.com",
"stac_item": 123
}

res = submit_stac.submission_handler(fake_event)

assert res == None
captured = capsys.readouterr()
assert "Dry run, not inserting" in captured.out
assert token_endpoint.call_count == 0
assert ingestions_endpoint.call_count == 0

@requests_mock.Mocker(kw="mock")
def test_submission_handler( create_secret, capsys, **kwargs):
token_endpoint = kwargs["mock"].post("http://test.com/oauth2/token", json={"token_type": "bearer", "access_token": "token"})
ingestions_endpoint = kwargs["mock"].post("http://www.test.com/ingestions", json={"token_type": "bearer", "access_token": "token"})
fake_event = {
"stac_file_url": "http://www.test.com",
"stac_item": 123
}

res = submit_stac.submission_handler(fake_event)

assert res == None
captured = capsys.readouterr()
assert "Dry run, not inserting" not in captured.out
assert token_endpoint.call_count == 1
assert ingestions_endpoint.call_count == 1
87 changes: 87 additions & 0 deletions dags/veda_data_pipeline/utils/test_transfer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import transfer
import pytest
import os
import boto3
from moto import mock_s3

from unittest.mock import patch

@pytest.fixture(scope='function')
def aws_credentials():
"""Mocked AWS Credentials, to ensure we're not touching AWS directly"""
os.environ['AWS_ACCESS_KEY_ID'] = 'testing'
os.environ['AWS_SECRET_ACCESS_KEY'] = 'testing'
os.environ['AWS_SECURITY_TOKEN'] = 'testing'
os.environ['AWS_SESSION_TOKEN'] = 'testing'
os.environ['EVENT_BUCKET'] = 'test'

@mock_s3
def test_transfer_dry_run(aws_credentials, capsys):
s3 = boto3.resource('s3')
bucket_source = s3.Bucket("test_source")
bucket_target = s3.Bucket("test_target")

# Create the bucket first, as we're interacting with an empty mocked 'AWS account'
bucket_source.create(
CreateBucketConfiguration={'LocationConstraint': 'us-west-2'}
)
bucket_target.create(
CreateBucketConfiguration={'LocationConstraint': 'us-west-2'}
)
# Create some example files that are representative of what the S3 bucket would look like in production
client = boto3.client('s3', region_name='us-west-2')
client.put_object(Bucket="test_source", Key="files/file1.cog", Body="stuff")
client.put_object(Bucket="test_source", Key="files/file2.tif", Body="stuff")
client.put_object(Bucket="test_source", Key="files/file3.txt", Body="stuff")

fake_event = {
"dry_run": "dry run",
"origin_bucket": "test_source",
"origin_prefix": "files",
"filename_regex": r"[\s\S]*",
"target_bucket": "test_target"
}

transfer.data_transfer_handler(fake_event)

captured = capsys.readouterr()
assert "Would have copied 3 files" in captured.out


@mock_s3
def test_transfer(aws_credentials, capsys):
s3 = boto3.resource('s3')
bucket_source = s3.Bucket("test_source")
bucket_target = s3.Bucket("test_target")

# Create the bucket first, as we're interacting with an empty mocked 'AWS account'
bucket_source.create(
CreateBucketConfiguration={'LocationConstraint': 'us-west-2'}
)
bucket_target.create(
CreateBucketConfiguration={'LocationConstraint': 'us-west-2'}
)
# Create some example files that are representative of what the S3 bucket would look like in production
client = boto3.client('s3', region_name='us-west-2')
client.put_object(Bucket="test_source", Key="files/file1.cog", Body="stuff")
client.put_object(Bucket="test_source", Key="files/file2.tif", Body="stuff")
client.put_object(Bucket="test_source", Key="files/file3.txt", Body="stuff")

fake_event = {
"origin_bucket": "test_source",
"origin_prefix": "files",
"filename_regex": r"^.*\.(cog|tif)$",
"target_bucket": "test_target"
}

transfer.data_transfer_handler(fake_event)


captured = capsys.readouterr()
assert "Copying file: file1.cog" in captured.out
assert "Copying file: file2.tif" in captured.out
assert "Copying file: file3.txt" not in captured.out

target_bucket_objects = sum(1 for _ in bucket_target.objects.all())

assert target_bucket_objects == 2