NYPL · fatimarahman · Jul 23, 2024 · Jun 25, 2024 · Jun 27, 2024 · Jul 9, 2024
diff --git a/.github/workflows/run-unit-tests.yml b/.github/workflows/run-unit-tests.yml
@@ -1,23 +1,30 @@
 name: Run Python unit tests
 
-on: [ push ]
+on: pull_request: types: [ labeled, unlabeled, opened, reopened, synchronize ]
 
 jobs:
+  changelog:
+    name: Updates changelog
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: dangoslen/changelog-enforcer@v3
   test:
     runs-on: ubuntu-latest
     steps:
       - name: Checkout repo
         uses: actions/checkout@v3
 
-      - name: Set up Python 3.9
+      - name: Set up Python 3.12
         uses: actions/setup-python@v4
         with:
-          python-version: '3.9'
+          python-version: '3.12'
           cache: 'pip'
 
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
+          pip install -r devel_requirements.txt
           pip install -r requirements.txt
 
       - name: Run linter and test suite

diff --git a/.python-version b/.python-version
@@ -1 +1 @@
-3.9.16
+3.12.0
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ sam local invoke --profile nypl-digital-dev -t config/sam.qa.yml -e sample/fireh
 
 The [sample](./sample) folder contains sample Firehose events and their expected outcomes after Lambda event handling, so you can test the efficacy of your code with various schemas.
 
-With Python, you also have the option of using the [python-lambda-local](https://pypi.org/project/python-lambda-local/) package for local development!
+With Python, you also have the option of using the [python-lambda-local](https://pypi.org/project/python-lambda-local/) package for local development! You will need to create a JSON file with env variables to use said package.
 
 ## Contributing / Deployment
 

diff --git a/config/devel.yaml b/config/devel.yaml
@@ -2,5 +2,4 @@
 PLAINTEXT_VARIABLES:
     ENVIRONMENT: devel
     NYPL_DATA_API_BASE_URL: https://qa-platform.nypl.org/api/v0.1/
-    SCHEMA_PATH: current-schemas/
 ...
diff --git a/config/production.yaml b/config/production.yaml
diff --git a/config/qa.yaml b/config/qa.yaml
diff --git a/config/sam.qa.yml b/config/sam.qa.yml
@@ -8,11 +8,9 @@ Resources:
     Properties:
       CodeUri: .
       Handler: lambda_function.lambda_handler
-      Runtime: python3.9
+      Runtime: python3.12
       Timeout: 10
       Environment:
         Variables:
           NYPL_DATA_API_BASE_URL: https://qa-platform.nypl.org/api/v0.1/
-          SCHEMA_NAME: CircTrans
-          SCHEMA_PATH: current-schemas/
           LOG_LEVEL: debug
diff --git a/devel_requirements.txt b/devel_requirements.txt
@@ -0,0 +1,8 @@
+black
+boto3
+botocore
+nypl-py-utils[avro-client]==1.2.0
+pybase64
+python-csv
+python-io
+pyyaml
diff --git a/lambda_function.py b/lambda_function.py
@@ -1,28 +1,27 @@
 import os
 import re
 
-from nypl_py_utils.functions.log_helper import create_log
 from nypl_py_utils.functions.config_helper import load_env_file
+from nypl_py_utils.functions.log_helper import create_log
 from record_processor import RecordProcessor
 
 
 def lambda_handler(event, context):
     logger = create_log("lambda_function")
-    load_env_file(os.environ["ENVIRONMENT"], "config/{}.yaml")
+    if os.environ["ENVIRONMENT"] == "devel":
+        load_env_file(os.environ["ENVIRONMENT"], "config/{}.yaml")
 
     logger.info("Starting event processing...")
 
     if event is None:
         logger.error("Event is undefined.")
-        # TODO: raise exception here?
+        raise RecordParsingError("No event found.")
     else:
         # All records under one event will have the same schema
-        schema_name = _pull_schema_name(event)
-        os.environ["SCHEMA_NAME"] = schema_name
+        schema_name = _pull_schema_name(
+            event["sourceKinesisStreamArn"])
         schema_url = (
-            os.environ["NYPL_DATA_API_BASE_URL"]
-            + os.environ["SCHEMA_PATH"]
-            + f"{schema_name}"
+            os.environ["NYPL_DATA_API_BASE_URL"] + "current-schemas/" + f"{schema_name}"
         )
         output_format = "json" if schema_name != "LocationHours" else "csv"
 
@@ -38,13 +37,14 @@ def lambda_handler(event, context):
                 if "data" in record:
                     result = processor.process_record(record, output_format)
                     if "ProcessingFailed" in result["result"]:
+                        logger.error(
+                            f"Error processing record data: {result}")
                         failures += 1
                     else:
                         successes += 1
                     processed_records.append(result)
         except Exception as e:
             # Catch any errors in the case the event has no records, etc
-            logger.error(f"Error processing records: {repr(e)}")
             raise RecordParsingError(e)
 
         logger.info(
@@ -55,22 +55,21 @@ def lambda_handler(event, context):
         return {"records": processed_records}
 
 
-def _pull_schema_name(event):
-    """Given a Firehouse event, pulls encoded schema type from stream ARN.
+def _pull_schema_name(stream_arn):
+    """
+    Given a Firehose event's stream ARN, pulls encoded schema type.
     Example input -- "arn:aws:kinesis:us-east-1:946183545209:stream/PcReserve-production"
     Example output -- "PcReserve"
     """
-    filtered_for_stream_name = event["sourceKinesisStreamArn"].split(":").pop()
-    # Against convention, the "CircTransAnon" stream contains "CircTrans"
-    # encoded records, so ensure the correct schema name is chosen
+    filtered_for_stream_name = stream_arn.split(":").pop()
     replacements = [
         ("^stream/", ""),
-        ("-[a-z]+$", ""),
-        ("^CircTransAnon$", "CircTrans"),
+        ("-[a-z]+$", "")
     ]
 
     for old, new in replacements:
-        filtered_for_stream_name = re.sub(old, new, filtered_for_stream_name)
+        filtered_for_stream_name = re.sub(
+            old, new, filtered_for_stream_name)
     return filtered_for_stream_name
 
 

diff --git a/record_processor.py b/record_processor.py
@@ -3,8 +3,8 @@
 import io
 import json
 
-from nypl_py_utils.functions.log_helper import create_log
 from nypl_py_utils.classes.avro_client import AvroDecoder
+from nypl_py_utils.functions.log_helper import create_log
 
 
 class RecordProcessor:
@@ -19,7 +19,8 @@ def process_record(self, record, output_format):
         desired output format for Firehose (JSON or CSV string)
         """
         binary_record_data = base64.b64decode(record["data"])
-        decoded_record_data = self.avro_decoder.decode_record(binary_record_data)
+        decoded_record_data = self.avro_decoder.decode_record(
+            binary_record_data)
 
         if decoded_record_data is None:
             # Unable to decode Avro record
@@ -43,8 +44,7 @@ def _format_result_string(self, output_format, data):
             data = self._transform_dictionary_to_csv_string(data)
         else:
             data = json.dumps(data)
-        # After decoding, convert to base64. More often than not,
-        # the original data is either encoded or not in base64
+        # encode output data to base64
         to_bytes = data.encode("utf-8")
         return (base64.b64encode(to_bytes)).decode("utf-8")
 

diff --git a/requirements.txt b/requirements.txt
@@ -1,8 +1,5 @@
-black
-boto3
-botocore
-nypl-py-utils[avro, requests]==1.2.0
+nypl-py-utils[avro-client]==1.2.0
 pybase64
 python-csv
 python-io
-pyyaml
+pyyaml
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -6,7 +6,6 @@
 TEST_ENV_VARS = {
     "ENVIRONMENT": "test",
     "NYPL_DATA_API_BASE_URL": "https://qa-platform.nypl.org/api/v0.1/",
-    "SCHEMA_PATH": "current-schemas/",
 }
 
 

diff --git a/tests/test_lambda_function.py b/tests/test_lambda_function.py
@@ -20,7 +20,7 @@
     },
 ]
 
-circ_trans_processed_records =[
+circ_trans_processed_records = [
     {
         "recordId": "789",
         "result": "Ok",
@@ -61,21 +61,20 @@ def test_instance_1_failure_2_success(self, mocker, test_data):
         )
 
     def test_lambda_handler_no_event_error(self, test_instance_3_success, caplog):
-        with caplog.at_level(logging.ERROR):
+        with pytest.raises(lambda_function.RecordParsingError):
             lambda_function.lambda_handler(None, None)
         assert "Event is undefined." in caplog.text
 
-    def test_lambda_handler_no_event_records_exception(
+    def test_lambda_handler_no_event_records_return_empty_array(
             self, test_instance_3_success, caplog):
         event = {
             "invocationId": "invocationIdExample",
             "deliveryStreamArn": "deliveryExample",
             "sourceKinesisStreamArn": "streamExample",
             "region": "us-east-1",
         }
-        with pytest.raises(lambda_function.RecordParsingError):
+        with pytest.raises(Exception):
             (lambda_function.lambda_handler(event, None))
-        assert "Error processing records: KeyError('records')" in caplog.text
 
     def test_lambda_handler_success(
             self, test_instance_3_success, test_data, caplog):