Skip to content

Commit

Permalink
actually working, now needs tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dragon-dxw committed Dec 4, 2024
1 parent 615991f commit 8c5199f
Showing 1 changed file with 23 additions and 16 deletions.
39 changes: 23 additions & 16 deletions ds-caselaw-ingester/lambda_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
import os
import tarfile
import xml.etree.ElementTree as ET
from typing import Dict, List, Tuple
from urllib.parse import unquote_plus
from xml.sax.saxutils import escape
from caselawclient.models.documents import Document
from caselawclient.models.identifiers.neutral_citation import NeutralCitationNumber
from caselawclient.models.documents import DocumentURIString

import boto3
import rollbar
Expand All @@ -21,6 +20,7 @@
from dotenv import load_dotenv
from notifications_python_client.notifications import NotificationsAPIClient
import logging
from caselawclient.models.documents import Document

logger = logging.getLogger("ingester")
logger.setLevel(logging.DEBUG)
Expand All @@ -43,7 +43,7 @@ def __init__(self, metadata):
self.parameters = metadata.get("parameters", {})

@property
def is_tdr(self):
def is_tdr(self) -> bool:
return "TDR" in self.parameters.keys()

@property
Expand Down Expand Up @@ -92,7 +92,7 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

@property
def originator(self):
def originator(self) -> str:
return self.message.get("parameters", {}).get("originator")

def get_consignment_reference(self):
Expand All @@ -106,7 +106,7 @@ def get_consignment_reference(self):

raise InvalidMessageException("Malformed v2 message, please supply a reference")

def save_s3_response(self, sqs_client, s3_client):
def save_s3_response(self, sqs_client, s3_client) -> str:
s3_bucket = self.message.get("parameters", {}).get("s3Bucket")
s3_key = self.message.get("parameters", {}).get("s3Key")
reference = self.get_consignment_reference()
Expand Down Expand Up @@ -201,7 +201,7 @@ def modify_filename(original: str, addition: str) -> str:
return os.path.join(path, new_basename)


def all_messages(event) -> List[Message]:
def all_messages(event) -> list[Message]:
"""All the messages in the SNS event, as Message subclasses"""
decoder = json.decoder.JSONDecoder()
messages_as_decoded_json = [decoder.decode(record["Sns"]["Message"]) for record in event["Records"]]
Expand Down Expand Up @@ -255,7 +255,7 @@ def extract_docx_filename(metadata: dict, consignment_reference: str) -> str:
)


def extract_lambda_versions(versions: List[Dict[str, str]]) -> List[Tuple[str, str]]:
def extract_lambda_versions(versions: list[dict[str, str]]) -> list[tuple[str, str]]:
version_tuples = []
for d in versions:
version_tuples += list(d.items())
Expand Down Expand Up @@ -442,16 +442,23 @@ def insert_document_xml(self) -> bool:
)
api_client.insert_document_xml(self.uri, self.xml, annotation)
return True

def set_document_identifiers(self) -> None:
doc = api_client.models.Document(self.uri)
logging.critical("start set_document_identifiers")
doc = api_client.get_document_by_uri(DocumentURIString(self.uri))
if doc.identifiers:
msg = f"Ingesting, but identifiers already present for {self.uri}!"
logger.warning(msg)
ncn = doc.neutral_citation

try:
ncn = doc.neutral_citation
except AttributeError:
return

if ncn:
doc.identifiers.add(NeutralCitationNumber(ncn))
doc.identifiers.save(doc)
logging.info(f"Ingested document had NCN {ncn}")

def send_updated_judgment_notification(self) -> None:
personalisation = personalise_email(self.uri, self.metadata)
Expand Down Expand Up @@ -517,7 +524,7 @@ def store_metadata(self) -> None:
value=tdr_metadata["Consignment-Completed-Datetime"],
)

def save_files_to_s3(self):
def save_files_to_s3(self) -> None:
sqs_client, s3_client = aws_clients()
# Determine if there's a word document -- we need to know before we save the tar.gz file
docx_filename = extract_docx_filename(self.metadata, self.consignment_reference)
Expand Down Expand Up @@ -571,7 +578,7 @@ def save_files_to_s3(self):
)

@property
def metadata_object(self):
def metadata_object(self) -> Metadata:
return Metadata(self.metadata)

def will_publish(self) -> bool:
Expand All @@ -590,7 +597,7 @@ def will_publish(self) -> bool:

raise RuntimeError(f"Didn't recognise originator {originator!r}")

def send_email(self):
def send_email(self) -> None:
originator = self.message.originator
if originator == "FCL":
return None
Expand All @@ -603,10 +610,10 @@ def send_email(self):

raise RuntimeError(f"Didn't recognise originator {originator!r}")

def close_tar(self):
def close_tar(self) -> None:
self.tar.close()

def upload_xml(self):
def upload_xml(self) -> None:
self.updated = self.update_document_xml()
self.inserted = False if self.updated else self.insert_document_xml()
if not self.updated and not self.inserted:
Expand All @@ -616,7 +623,7 @@ def upload_xml(self):
self.set_document_identifiers()

@property
def upload_state(self):
def upload_state(self) -> str:
return "updated" if self.updated else "inserted"


Expand Down

0 comments on commit 8c5199f

Please sign in to comment.