Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DE-175: Create general cloudLibrary client #38

Merged
merged 11 commits into from
Nov 20, 2024
Merged
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# Changelog
## v1.5.0 11/19/24
- Added cloudLibrary client

## v1.4.0 9/23/24
- Added SFTP client

Expand Down
5 changes: 3 additions & 2 deletions README.md
fatimarahman marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ This package contains common Python utility classes and functions.
* Connecting to and querying a PostgreSQL database using a connection pool
* Connecting to and querying Redshift
* Making requests to the Oauth2 authenticated APIs such as NYPL Platform API and Sierra
* Interacting with vendor APIs such as cloudLibrary

## Functions
* Reading a YAML config file and putting the contents in os.environ -- see `config/sample.yaml` for an example of how the config file should be formatted
Expand All @@ -37,7 +38,7 @@ kinesis_client = KinesisClient(...)
# Do not use any version below 1.0.0
# All available optional dependencies can be found in pyproject.toml.
# See the "Managing dependencies" section below for more details.
nypl-py-utils[kinesis-client,config-helper]==1.4.0
nypl-py-utils[kinesis-client,config-helper]==1.5.0
```

## Developing locally
Expand All @@ -63,7 +64,7 @@ The optional dependency sets also give the developer the option to manually list
### Using PostgreSQLClient in an AWS Lambda
Because `psycopg` requires a statically linked version of the `libpq` library, the `PostgreSQLClient` cannot be installed as-is in an AWS Lambda function. Instead, it must be packaged as follows:
```bash
pip install --target ./package nypl-py-utils[postgresql-client]==1.4.0
pip install --target ./package nypl-py-utils[postgresql-client]==1.5.0

pip install \
--platform manylinux2014_x86_64 \
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
fatimarahman marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "hatchling.build"

[project]
name = "nypl_py_utils"
version = "1.4.0"
version = "1.5.0"
authors = [
{ name="Aaron Friedman", email="[email protected]" },
]
Expand All @@ -27,6 +27,9 @@ avro-client = [
"avro>=1.11.1",
"requests>=2.28.1"
]
cloudlibrary-client = [
"requests>=2.28.1"
]
kinesis-client = [
"boto3>=1.26.5",
"botocore>=1.29.5"
Expand Down
148 changes: 148 additions & 0 deletions src/nypl_py_utils/classes/cloudlibrary_client.py
fatimarahman marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
import base64
import hashlib
import hmac
import requests

from datetime import datetime, timedelta, timezone
from nypl_py_utils.functions.log_helper import create_log
from requests.adapters import HTTPAdapter, Retry

_API_URL = "https://partner.yourcloudlibrary.com"
_VERSION = "3.0.2"


class CloudLibraryClient:
"""Client for interacting with CloudLibrary API v3.0.2"""

def __init__(self, library_id, account_id, account_key):
self.logger = create_log("cloudlibrary_client")
self.library_id = library_id
self.account_id = account_id
self.account_key = account_key

# authenticate & set up HTTP session
retry_policy = Retry(total=3, backoff_factor=45,
status_forcelist=[500, 502, 503, 504],
allowed_methods=frozenset(["GET"]))
self.session = requests.Session()
self.session.mount("https://",
HTTPAdapter(max_retries=retry_policy))

def get_library_events(self, start_date=None,
fatimarahman marked this conversation as resolved.
Show resolved Hide resolved
end_date=None) -> requests.Response:
"""
Retrieves all the events related to library-owned items within the
optional timeframe. Pulls past 24 hours of events by default.

start_date and end_date are optional parameters, and must be
fatimarahman marked this conversation as resolved.
Show resolved Hide resolved
formatted either YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS
"""
date_format = "%Y-%m-%dT%H:%M:%S"
today = datetime.now(timezone.utc)
yesterday = today - timedelta(1)
start_date = datetime.strftime(
yesterday, date_format) if start_date is None else start_date
end_date = datetime.strftime(
today, date_format) if end_date is None else end_date

if (datetime.strptime(start_date, date_format) >
datetime.strptime(end_date, date_format)):
error_message = (f"Start date {start_date} greater than end date "
f"{end_date}, cannot retrieve library events")
self.logger.error(error_message)
raise CloudLibraryClientError(error_message)

self.logger.info(
(f"Fetching all library events in "
f"time frame {start_date} to {end_date}..."))

path = f"data/cloudevents?startdate={start_date}&enddate={end_date}"
response = self.request(path=path, method_type="GET")
return response

def create_request_body(self, request_type,
item_id, patron_id) -> str:
"""
Helper function to generate request body when performing item
and/or patron-specific functions (ex. checking out a title).
"""
request_template = "<%(request_type)s><ItemId>%(item_id)s</ItemId><PatronId>%(patron_id)s</PatronId></%(request_type)s>" # noqa
return request_template % {
"request_type": request_type,
"item_id": item_id,
"patron_id": patron_id,
}

def request(self, path, method_type="GET",
body=None) -> requests.Response:
"""
Use this method to call specific paths in the cloudLibrary API.
This method is necessary for building headers/authorization.
Example usage of this method is in the get_library_events function.

Returns Response object by default -- you will need to parse this
object to retrieve response text, status codes, etc.
"""
extended_path = f"/cirrus/library/{self.library_id}/{path}"
headers = self._build_headers(method_type, extended_path)
url = f"{_API_URL}{extended_path}"
method_type = method_type.upper()

try:
if method_type == "PUT":
response = self.session.put(url=url,
data=body,
headers=headers,
timeout=60)
elif method_type == "POST":
response = self.session.post(url=url,
data=body,
headers=headers,
timeout=60)
else:
response = self.session.get(url=url,
data=body,
headers=headers,
timeout=60)
response.raise_for_status()
except Exception as e:
error_message = f"Failed to retrieve response from {url}: {e}"
self.logger.error(error_message)
raise CloudLibraryClientError(error_message)

return response

def _build_headers(self, method_type, path) -> dict:
time, authorization = self._build_authorization(
method_type, path)
headers = {
"3mcl-Datetime": time,
"3mcl-Authorization": authorization,
"3mcl-APIVersion": _VERSION,
}

if method_type == "GET":
headers["Accept"] = "application/xml"
else:
headers["Content-Type"] = "application/xml"

return headers

def _build_authorization(self, method_type,
path) -> tuple[str, str]:
now = datetime.now(timezone.utc).strftime(
"%a, %d %b %Y %H:%M:%S GMT")
message = "\n".join([now, method_type, path])
digest = hmac.new(
self.account_key.encode("utf-8"),
msg=message.encode("utf-8"),
digestmod=hashlib.sha256
).digest()
fatimarahman marked this conversation as resolved.
Show resolved Hide resolved
signature = base64.standard_b64encode(digest).decode()

return now, f"3MCLAUTH {self.account_id}:{signature}"


class CloudLibraryClientError(Exception):
def __init__(self, message=None):
self.message = message
196 changes: 196 additions & 0 deletions tests/test_cloudlibrary_client.py
fatimarahman marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
import pytest

from freezegun import freeze_time
from requests import ConnectTimeout
from nypl_py_utils.classes.cloudlibrary_client import (
CloudLibraryClient, CloudLibraryClientError)

_API_URL = "https://partner.yourcloudlibrary.com/cirrus/library/"

# catch-all API response since we're not testing actual data
_TEST_LIBRARY_EVENTS_RESPONSE = """<LibraryEventBatch
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<PublishId>4302fcca-ef99-49bf-bd29-d673e990f765</PublishId>
<PublishDateTimeInUTC>2024-11-10T17:35:18</PublishDateTimeInUTC>
<LastEventDateTimeInUTC>2012-11-11T13:58:52.055</LastEventDateTimeInUTC>
<Events>
<CloudLibraryEvent>
<EventId>4302fcca-ef99-49bf-bd29-d673e990f4a7</EventId>
<EventType>CHECKIN</EventType>
<EventStartDateTimeInUTC>2024-11-10T05:07:56</EventStartDateTimeInUTC>
<EventEndDateTimeInUTC>2024-11-10T07:50:59</EventEndDateTimeInUTC>
<ItemId>edbz9</ItemId>
<ItemLibraryId>1234</ItemLibraryId>
<ISBN>9780307238405</ISBN>
<PatronId>TestUser1</PatronId>
<PatronLibraryId>1234</PatronLibraryId>
<EventPublishDateTimeInUTC>2024-11-10T17:35:18</EventPublishDateTimeInUTC>
</CloudLibraryEvent>
</Events>
</LibraryEventBatch>
"""


@freeze_time("2024-11-11 10:00:00")
class TestCloudLibraryClient:
@pytest.fixture
def test_instance(self):
return CloudLibraryClient(
"library_id", "account_id", "account_key")

def test_get_library_events_success_no_args(
self, test_instance, mocker):
start = "2024-11-10T10:00:00"
end = "2024-11-11T10:00:00"
mock_request = mocker.patch(
"nypl_py_utils.classes.cloudlibrary_client.CloudLibraryClient.request", # noqa
return_value=_TEST_LIBRARY_EVENTS_RESPONSE)
response = test_instance.get_library_events()

mock_request.assert_called_once_with(
path=f"data/cloudevents?startdate={start}&enddate={end}",
method_type="GET")
assert response == _TEST_LIBRARY_EVENTS_RESPONSE

def test_get_library_events_success_with_start_and_end_date(
self, test_instance, mocker):
start = "2024-11-01T10:00:00"
end = "2024-11-05T10:00:00"
mock_request = mocker.patch(
"nypl_py_utils.classes.cloudlibrary_client.CloudLibraryClient.request", # noqa
return_value=_TEST_LIBRARY_EVENTS_RESPONSE)
response = test_instance.get_library_events(start, end)

mock_request.assert_called_once_with(
path=f"data/cloudevents?startdate={start}&enddate={end}",
method_type="GET")
assert response == _TEST_LIBRARY_EVENTS_RESPONSE

def test_get_library_events_success_with_no_end_date(
self, test_instance, mocker):
start = "2024-11-01T09:00:00"
end = "2024-11-11T10:00:00"
mock_request = mocker.patch(
"nypl_py_utils.classes.cloudlibrary_client.CloudLibraryClient.request", # noqa
return_value=_TEST_LIBRARY_EVENTS_RESPONSE)
response = test_instance.get_library_events(start)

mock_request.assert_called_once_with(
path=f"data/cloudevents?startdate={start}&enddate={end}",
method_type="GET")
assert response == _TEST_LIBRARY_EVENTS_RESPONSE

def test_get_library_events_exception_when_start_date_greater_than_end(
fatimarahman marked this conversation as resolved.
Show resolved Hide resolved
self, test_instance):
start = "2024-11-11T09:00:00"
end = "2024-11-01T10:00:00"

with pytest.raises(CloudLibraryClientError):
test_instance.get_library_events(start, end)

def test_get_library_events_exception_when_connection_timeout(
self, test_instance, requests_mock):
start = "2024-11-10T10:00:00"
end = "2024-11-11T10:00:00"

# We're making sure that a separate error during a sub-method will
# still result in CloudLibraryClientError
requests_mock.get(
f"{_API_URL}{test_instance.library_id}/data/cloudevents?startdate={start}&enddate={end}", # noqa
exc=ConnectTimeout)

with pytest.raises(CloudLibraryClientError):
test_instance.get_library_events()

def test_get_request_success(self, test_instance, requests_mock):
start = "2024-11-10T10:00:00"
end = "2024-11-11T10:00:00"
url = f"{_API_URL}{test_instance.library_id}/data/cloudevents?startdate={start}&enddate={end}" # noqa
expected_headers = {"3mcl-Datetime": "Mon, 11 Nov 2024 10:00:00 GMT",
"3mcl-Authorization": "3MCLAUTH account_id:KipNmbVsmsT2xPjP4oHAaR3n00JgcszfF6mQRffBoRk=", # noqa
"3mcl-APIVersion": "3.0.2",
"Accept": "application/xml"}
requests_mock.get(
url=url, text=_TEST_LIBRARY_EVENTS_RESPONSE)

response = test_instance.request(
path=f"data/cloudevents?startdate={start}&enddate={end}",
method_type="GET")

assert response.text == _TEST_LIBRARY_EVENTS_RESPONSE
assert requests_mock.request_history[0].method == "GET"
assert requests_mock.request_history[0].url == url
assert requests_mock.request_history[0].body is None
assert expected_headers.items() <= dict(
requests_mock.request_history[0].headers).items()

def test_put_request_success(self, test_instance, requests_mock):
start = "2024-11-10T10:00:00"
end = "2024-11-11T10:00:00"
url = f"{_API_URL}{test_instance.library_id}/data/cloudevents?startdate={start}&enddate={end}" # noqa
expected_headers = {"3mcl-Datetime": "Mon, 11 Nov 2024 10:00:00 GMT",
"3mcl-Authorization": "3MCLAUTH account_id:3M773C6ZVWmB/ISoSjQy9iBp48T4tUWhoNOwXaseMtE=", # noqa
"3mcl-APIVersion": "3.0.2",
"Content-Type": "application/xml"}
requests_mock.put(
url=url, text=_TEST_LIBRARY_EVENTS_RESPONSE)

response = test_instance.request(
path=f"data/cloudevents?startdate={start}&enddate={end}",
method_type="PUT",
body={"test": "test"})

assert response.text == _TEST_LIBRARY_EVENTS_RESPONSE
assert requests_mock.request_history[0].method == "PUT"
assert requests_mock.request_history[0].url == url
assert requests_mock.request_history[0].body == "test=test"
assert expected_headers.items() <= dict(
requests_mock.request_history[0].headers).items()

def test_post_request_success(self, test_instance, requests_mock):
start = "2024-11-10T10:00:00"
end = "2024-11-11T10:00:00"
url = f"{_API_URL}{test_instance.library_id}/data/cloudevents?startdate={start}&enddate={end}" # noqa
expected_headers = {"3mcl-Datetime": "Mon, 11 Nov 2024 10:00:00 GMT",
"3mcl-Authorization": "3MCLAUTH account_id:vF0zI6ee1w1PbTLQ9EVvtxRly2vpCRxdBdAHb8DZQ4E=", # noqa
"3mcl-APIVersion": "3.0.2",
"Content-Type": "application/xml"}
requests_mock.post(
url=url, text=_TEST_LIBRARY_EVENTS_RESPONSE)

response = test_instance.request(
path=f"data/cloudevents?startdate={start}&enddate={end}",
method_type="POST",
body={"test": "test"})

assert response.text == _TEST_LIBRARY_EVENTS_RESPONSE
assert requests_mock.request_history[0].method == "POST"
assert requests_mock.request_history[0].url == url
assert requests_mock.request_history[0].body == "test=test"
assert expected_headers.items() <= dict(
requests_mock.request_history[0].headers).items()

def test_request_failure(self, test_instance, requests_mock):
start = "2024-11-10T10:00:00"
end = "2024-11-11T10:00:00"
requests_mock.get(
f"{_API_URL}{test_instance.library_id}/data/cloudevents?startdate={start}&enddate={end}", # noqa
exc=ConnectTimeout)

with pytest.raises(CloudLibraryClientError):
test_instance.request(
path=f"data/cloudevents?startdate={start}&enddate={end}",
method_type="GET")

def test_create_request_body_success(self, test_instance):
request_type = "CheckoutRequest"
item_id = "df45qw"
patron_id = "215555602845"
EXPECTED_REQUEST_BODY = (f"<{request_type}><ItemId>{item_id}</ItemId>"
f"<PatronId>{patron_id}</PatronId>"
f"</{request_type}>")
request_body = test_instance.create_request_body(
request_type, item_id, patron_id)

assert request_body == EXPECTED_REQUEST_BODY
fatimarahman marked this conversation as resolved.
Show resolved Hide resolved