From 0dbe49a1378bef305c9486d8e50fc131967d94b2 Mon Sep 17 00:00:00 2001 From: Garrett Skjelstad Date: Fri, 20 Oct 2023 00:38:08 +0000 Subject: [PATCH 1/8] AWS Parser Update - Revision 1 --- circuit_maintenance_parser/parsers/aws.py | 86 ++++++++++--- tests/unit/data/aws/aws3.eml | 119 ++++++++++++++++++ tests/unit/data/aws/aws3_result.json | 37 ++++++ .../data/aws/aws3_subject_parser_result.json | 5 + .../data/aws/aws3_text_parser_result.json | 35 ++++++ tests/unit/test_e2e.py | 9 ++ tests/unit/test_parsers.py | 10 ++ 7 files changed, 283 insertions(+), 18 deletions(-) create mode 100644 tests/unit/data/aws/aws3.eml create mode 100644 tests/unit/data/aws/aws3_result.json create mode 100644 tests/unit/data/aws/aws3_subject_parser_result.json create mode 100644 tests/unit/data/aws/aws3_text_parser_result.json diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index 00be0b8c..2fb4984f 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -1,4 +1,4 @@ -"""AquaComms parser.""" +"""AWS parser.""" import hashlib import logging import quopri @@ -13,6 +13,7 @@ # pylint: disable=too-many-nested-blocks, too-many-branches logger = logging.getLogger(__name__) +#logger.setLevel("DEBUG") class SubjectParserAWS1(EmailSubjectParser): @@ -23,10 +24,25 @@ def parse_subject(self, subject): Example: AWS Direct Connect Planned Maintenance Notification [AWS Account: 00000001] """ - data = {} - search = re.search(r"\[AWS Account ?I?D?: ([0-9]+)\]", subject) - if search: - data["account"] = search.group(1) + data = {"account": ""} + # Common Subject strings for matching: + subject_map = { + "\[AWS Account ?I?D?: ([0-9]+)\]": "account", + } + + regex_keys = re.compile("|".join(subject_map), re.IGNORECASE) + + # in case of a multi-line subject + # match the subject map + for line in subject.splitlines(): + line_matched = re.search(regex_keys, line) + if not line_matched: + continue + for group_match in line_matched.groups(): + if group_match is not None: + for k, v in subject_map.items(): + if re.search(k, line, re.IGNORECASE): + data[v] = group_match return [data] @@ -60,29 +76,63 @@ def parse_text(self, text): This maintenance is scheduled to avoid disrupting redundant connections at = the same time. """ - data = {"circuits": []} + text_map = { + "^Account ?I?D?: ([0-9]+)": "account", + "^Start Time: ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})": "start", + "^End Time: ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})": "end", + "(?<=from )([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})": "start_and_end", + } + + regex_keys = re.compile("|".join(text_map), re.IGNORECASE) + + data = {"circuits": [], "start": "", "end": ""} impact = Impact.OUTAGE maintenace_id = "" status = Status.CONFIRMED + for line in text.splitlines(): if "planned maintenance" in line.lower(): data["summary"] = line - search = re.search( - r"([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})", - line, - ) - if search: - data["start"] = self.dt2ts(parser.parse(search.group(1))) - data["end"] = self.dt2ts(parser.parse(search.group(2))) - maintenace_id += str(data["start"]) - maintenace_id += str(data["end"]) + # match against the regex strings + line_matched = re.search(regex_keys, line) + # if we have a string that's not in our text_map + # there may still be some strings with data to capture. + # otherwise, continue on. + if not line_matched: if "may become unavailable" in line.lower(): impact = Impact.OUTAGE elif "has been cancelled" in line.lower(): status = Status.CANCELLED - elif re.match(r"[a-z]{5}-[a-z0-9]{8}", line): - maintenace_id += line - data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) + + if re.match(r"[a-z]{5}-[a-z0-9]{8}", line): + maintenace_id += line + data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) + continue + + # for lines that do match our regex strings. + # grab the data and map the values to keys. + for group_match in line_matched.groups(): + if group_match is not None: + for k, v in text_map.items(): + if re.search(k, line_matched.string, re.IGNORECASE): + # Due to having a single line on some emails + # This causes multiple match groups + # However this needs to be split across keys. + # This could probably be cleaned up. + if v == "start_and_end" and data["start"] == "": + data["start"] = group_match + elif v == "start_and_end" and data["end"] == "": + data["end"] = group_match + else: + data[v] = group_match + + # Let's get our times in order. + if data["start"] and data["end"]: + data["start"] = self.dt2ts(parser.parse(data["start"])) + data["end"] = self.dt2ts(parser.parse(data["end"])) + maintenace_id += str(data["start"]) + maintenace_id += str(data["end"]) + # No maintenance ID found in emails, so a hash value is being generated using the start, # end and IDs of all circuits in the notification. data["maintenance_id"] = hashlib.md5(maintenace_id.encode("utf-8")).hexdigest() # nosec diff --git a/tests/unit/data/aws/aws3.eml b/tests/unit/data/aws/aws3.eml new file mode 100644 index 00000000..6c3690ae --- /dev/null +++ b/tests/unit/data/aws/aws3.eml @@ -0,0 +1,119 @@ +Delivered-To: bill.murray@testdomain.com +Received: by 2002:a05:7300:a498:b0:db:5402:54d2 with SMTP id ci24csp3189966dyb; + Wed, 27 Sep 2023 23:45:14 -0700 (PDT) +X-Received: by 2002:a05:6830:1b64:b0:6c4:ded2:44d0 with SMTP id d4-20020a0568301b6400b006c4ded244d0mr393870ote.27.1695883513842; + Wed, 27 Sep 2023 23:45:13 -0700 (PDT) +ARC-Seal: i=3; a=rsa-sha256; t=1695883513; cv=pass; + d=google.com; s=arc-20160816; + b=J2AUxyAHbkdx5YmP5xAbEeJM3elEoa14Iwv5t4wjz1RTKLla7KfqHxEZMD1LYoenxs + ZLpE/YDGT+ZSphagfW1mo+veHY27kppDSD00YOjDWdWqOLNUvH6KDvGlkMIEIKATDfI/ + 5lWcAOTP2h5x7kha2YFpModQRq/fL2727THiXX+BTTWi6r1kF0IAO8lcivXAuM8jaZtM + DYgJAzQW/hcpexKfXz5idvB7cM4TA6+EtsbntwuTILFG6QuY6l09nQOLSnaiDC9WpxXk + Nnveuzzzkx4XklLFCn6AaangILYMa/Ac9kfcOT6MBwOHngH27Pj06PjxF5x7Otk/VLtD + NTqA== +ARC-Message-Signature: i=3; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; + h=list-unsubscribe:list-archive:list-help:list-post:list-id + :mailing-list:precedence:feedback-id:content-transfer-encoding + :mime-version:subject:message-id:to:from:date:sender:dkim-signature; + bh=YXHwVUeIH05Eff5uu/QBh14WkQXPczhkl0K2x/xaxHI=; + fh=/BZp/CoWS56RYHJk/Nq7+rYFbsXKZMYT25P0V/4q4R8=; + b=El2IxnqZD03wQKR3T3OgYD9VZmQlP/4/0F/G9rkTXGnpqdk6LMM7wtFngfyND14s2/ + j49qP6rvxFdU2YNPbm6K0v6UplLq6sq57eE8kKVJ9qIu4hebNR3r75ohqPRwU7rzvoGB + mPjuE2UevG9YfswuQ8/gDeLbBFAP9gyj6BMwYnC9uQRwdy1fYLMizPxPP/WxV+RLbY6C + s7UzQudntDwdE+hFpKHx8mUMrx+4AN/PU4eYyuGjXSrT8qb5LWCC6xFmIIN5LluDMGfO + VNXZBnDMf6paqOUk6SdRDDGR60D5dTR+KMXvc5ilbgmErZDLGMDqlm93ku65R3nnFot2 + OmzA== +ARC-Authentication-Results: i=3; mx.google.com; + dkim=pass header.i=@testdomain.com header.s=testdomain header.b=GLHcVmBa; + arc=pass (i=2 spf=pass spfdomain=us-west-2.amazonses.com dkim=pass dkdomain=sns.amazonaws.com dkim=pass dkdomain=amazonses.com dmarc=pass fromdomain=amazonaws.com); + spf=pass (google.com: domain of rd-notices+bncbcsjvsho64cbb6ob2suamgqe3dqkcsq@testdomain.com designates 209.85.220.69 as permitted sender) smtp.mailfrom=rd-notices+bncBCSJVSHO64CBB6OB2SUAMGQE3DQKCSQ@testdomain.com; + dmarc=fail (p=QUARANTINE sp=NONE dis=NONE arc=pass) header.from=amazonaws.com +Return-Path: +Received: from mail-sor-f69.google.com (mail-sor-f69.google.com. [209.85.220.69]) + by mx.google.com with SMTPS id z195-20020a4a49cc000000b0057b8079d2f9sor1288663ooa.3.2023.09.27.23.45.13 + for + (Google Transport Security); + Wed, 27 Sep 2023 23:45:13 -0700 (PDT) +Received-SPF: pass (google.com: domain of rd-notices+bncbcsjvsho64cbb6ob2suamgqe3dqkcsq@testdomain.com designates 209.85.220.69 as permitted sender) client-ip=209.85.220.69; +Sender: rd-notices@testdomain.com +X-Gm-Message-State: AOJu0YwA1ncYCL1JhSr58XiTdslkwS2bbAyUG8XhiJs3xZZJ3Ccy9WF5 + b8y79QbLjF9OquocCHSQC0PxicdI +X-Google-Smtp-Source: AGHT+IHRVxhXNJLs7Sr7hKiGQj5axz7trO3ifhk17zVerbtpqBwzCR3N9tJiSMksqUUrB6MOLmrSLg== +X-Received: by 2002:a4a:d138:0:b0:57e:1618:e700 with SMTP id n24-20020a4ad138000000b0057e1618e700mr147723oor.7.1695883513473; + Wed, 27 Sep 2023 23:45:13 -0700 (PDT) +X-BeenThere: rd-notices@testdomain.com +Received: by 2002:a4a:554d:0:b0:573:f543:8c29 with SMTP id e74-20020a4a554d000000b00573f5438c29ls1795866oob.1.-pod-prod-01-us; + Wed, 27 Sep 2023 23:45:12 -0700 (PDT) +X-Received: by 2002:a54:4002:0:b0:3a7:8725:f37c with SMTP id x2-20020a544002000000b003a78725f37cmr391884oie.10.1695883512779; + Wed, 27 Sep 2023 23:45:12 -0700 (PDT) +Received: from a59-201.smtp-out.us-west-2.amazonses.com (a59-201.smtp-out.us-west-2.amazonses.com. [54.240.59.201]) + by mx.google.com with ESMTPS id f20-20020a637554000000b00578b785d46csi18216323pgn.193.2023.09.27.23.45.12 + for + (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); + Wed, 27 Sep 2023 23:45:12 -0700 (PDT) +Date: Thu, 28 Sep 2023 06:45:12 +0000 +From: DXMaintNotify-RealDirect +To: rd-notices@testdomain.com +Message-ID: <0101018ada88c9ab-7bb959a5-dfa6-4e9b-9fa1-787fe83442c6-000000@us-west-2.amazonses.com> +Subject: [rd-notices] AWS_DIRECTCONNECT_MAINTENANCE_SCHEDULED +MIME-Version: 1.0 +Content-Type: text/plain; charset="UTF-8" +Content-Transfer-Encoding: quoted-printable +x-amz-sns-message-id: c08baa17-4211-5fca-a32c-f79861293c18 +x-amz-sns-subscription-arn: arn:aws:sns:us-west-2:860000000000:DXMaintNotify:9e02f42f-b026-4bd7-bb9b-5d1eb2b2e141 +Feedback-ID: 1.us-west-2.c55J8LO2Yl1R0Ht+ysI6VjzUH6Cvo3dHPF80AUVC/G8=:AmazonSES +X-SES-Outgoing: 2023.09.28-54.240.59.201 +X-Original-Sender: no-reply@sns.amazonaws.com +Precedence: list +Mailing-list: list rd-notices@testdomain.com; contact rd-notices+owners@testdomain.com +List-ID: +X-Spam-Checked-In-Group: rd-notices@testdomain.com +X-Google-Group-Id: 536184160288 +List-Post: , +List-Help: , + +List-Archive: +List-Unsubscribe: , + + +Planned maintenance has been scheduled on an AWS Direct Connect endpoint in= + Westin Building Exchange, Seattle, WA. During this maintenance window, you= +r AWS Direct Connect services associated with this event may become unavail= +able.\n\nThis maintenance is scheduled to avoid disrupting redundant connec= +tions at the same time.\n\nIf you encounter any problems with your connecti= +on after the end of this maintenance window, please contact AWS Support(1).= +\n\n(1) https://aws.amazon.com/support. For more details, please see https:= +//phd.aws.amazon.com/phd/home?region=3Dus-west-2#/dashboard/open-issues + +Region: us-west-2 +Account Id: 0000000000001 + +Affected Resources: +xxxxx-ffffffff +yyyyy-uuuuuuuu +mmmmm-iiiiiiii +rrrrr-pppppppp +fffff-qqqqqqqq + +Start Time: Thu, 12 Oct 2023 07:00:00 GMT +End Time: Thu, 12 Oct 2023 13:00:00 GMT + +-- +If you wish to stop receiving notifications from this topic, please click o= +r visit the link below to unsubscribe: +https://sns.us-west-2.amazonaws.com/unsubscribe.html?SubscriptionArn=3Darn:= +aws:sns:us-west-2:860000000000:DXMaintNotify:9e02f42f-b026-4bd7-bb9b-5d1eb2= +b2e141&Endpoint=3Drd-notices@testdomain.com + +Please do not reply directly to this email. If you have any questions or co= +mments regarding this email, please contact us at https://aws.amazon.com/su= +pport + +--=20 +You received this message because you are subscribed to the Google Groups "= +Real Direct Notices" group. +To unsubscribe from this group and stop receiving emails from it, send an e= +mail to rd-notices+unsubscribe@testdomain.com. +To view this discussion on the web visit https://groups.google.com/a/Realga= +mes.com/d/msgid/rd-notices/0101018ada88c9ab-7bb959a5-dfa6-4e9b-9fa1-787fe83= +442c6-000000%40us-west-2.amazonses.com. \ No newline at end of file diff --git a/tests/unit/data/aws/aws3_result.json b/tests/unit/data/aws/aws3_result.json new file mode 100644 index 00000000..9ee935e7 --- /dev/null +++ b/tests/unit/data/aws/aws3_result.json @@ -0,0 +1,37 @@ +[ + { + "account": "0000000000001", + "circuits": [ + { + "circuit_id": "xxxxx-ffffffff", + "impact": "OUTAGE" + }, + { + "circuit_id": "yyyyy-uuuuuuuu", + "impact": "OUTAGE" + }, + { + "circuit_id": "mmmmm-iiiiiiii", + "impact": "OUTAGE" + }, + { + "circuit_id": "rrrrr-pppppppp", + "impact": "OUTAGE" + }, + { + "circuit_id": "fffff-qqqqqqqq", + "impact": "OUTAGE" + } + ], + "end": 1697115600, + "maintenance_id": "b15bf3344836f5ad8ab6a6e16cf328f8", + "organizer": "aws-account-notifications@amazon.com", + "provider": "aws", + "sequence": 1, + "stamp": 1695883512, + "start": 1697094000, + "status": "CONFIRMED", + "summary": "Planned maintenance has been scheduled on an AWS Direct Connect endpoint in Westin Building Exchange, Seattle, WA. During this maintenance window, your AWS Direct Connect services associated with this event may become unavailable.\\n\\nThis maintenance is scheduled to avoid disrupting redundant connections at the same time.\\n\\nIf you encounter any problems with your connection after the end of this maintenance window, please contact AWS Support(1).\\n\\n(1) https://aws.amazon.com/support. For more details, please see https://phd.aws.amazon.com/phd/home?region=us-west-2#/dashboard/open-issues", + "uid": "0" + } +] \ No newline at end of file diff --git a/tests/unit/data/aws/aws3_subject_parser_result.json b/tests/unit/data/aws/aws3_subject_parser_result.json new file mode 100644 index 00000000..0e67e55b --- /dev/null +++ b/tests/unit/data/aws/aws3_subject_parser_result.json @@ -0,0 +1,5 @@ +[ + { + "account": "" + } +] \ No newline at end of file diff --git a/tests/unit/data/aws/aws3_text_parser_result.json b/tests/unit/data/aws/aws3_text_parser_result.json new file mode 100644 index 00000000..a9e8523d --- /dev/null +++ b/tests/unit/data/aws/aws3_text_parser_result.json @@ -0,0 +1,35 @@ +[ + { + "circuits": [ + { + "circuit_id": "aaaaa-00000001", + "impact": "OUTAGE" + }, + { + "circuit_id": "aaaaa-00000002", + "impact": "OUTAGE" + }, + { + "circuit_id": "aaaaa-00000003", + "impact": "OUTAGE" + }, + { + "circuit_id": "aaaaa-00000004", + "impact": "OUTAGE" + }, + { + "circuit_id": "aaaaa-00000005", + "impact": "OUTAGE" + }, + { + "circuit_id": "aaaaa-00000006", + "impact": "OUTAGE" + } + ], + "end": 1631584920, + "maintenance_id": "47876b7d5a5198643a1a9cb7f954487a", + "start": 1631559720, + "status": "CANCELLED", + "summary": "We would like to inform you that the planned maintenance that was scheduled for AWS Direct Connect endpoint in Equinix SG2, Singapore, SGP from Mon, 13 Sep 2021 19:02:00 GMT to Tue, 14 Sep 2021 02:02:00 GMT has been cancelled. Please find below your AWS Direct Connect services that would have been affected by this planned maintenance." + } +] \ No newline at end of file diff --git a/tests/unit/test_e2e.py b/tests/unit/test_e2e.py index 61f1d24a..c5755ac0 100644 --- a/tests/unit/test_e2e.py +++ b/tests/unit/test_e2e.py @@ -102,6 +102,15 @@ Path(dir_path, "data", "aws", "aws2_result.json"), ], ), + ( + AWS, + [ + ("email", Path(dir_path, "data", "aws", "aws3.eml")), + ], + [ + Path(dir_path, "data", "aws", "aws3_result.json"), + ], + ), # BSO ( BSO, diff --git a/tests/unit/test_parsers.py b/tests/unit/test_parsers.py index b2a2a900..9dc06083 100644 --- a/tests/unit/test_parsers.py +++ b/tests/unit/test_parsers.py @@ -100,6 +100,16 @@ Path(dir_path, "data", "aws", "aws2.eml"), Path(dir_path, "data", "aws", "aws2_subject_parser_result.json"), ), + ( + TextParserAWS1, + Path(dir_path, "data", "aws", "aws3.eml"), + Path(dir_path, "data", "aws", "aws3_text_parser_result.json"), + ), + ( + SubjectParserAWS1, + Path(dir_path, "data", "aws", "aws3.eml"), + Path(dir_path, "data", "aws", "aws3_subject_parser_result.json"), + ), # BSO ( HtmlParserBSO1, From 7d7d21c6214672c507f2f33851643ec58ba16675 Mon Sep 17 00:00:00 2001 From: GarrettS <139811494+garretts-ntc@users.noreply.github.com> Date: Thu, 19 Oct 2023 20:43:56 -0400 Subject: [PATCH 2/8] Update aws3.eml --- tests/unit/data/aws/aws3.eml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/unit/data/aws/aws3.eml b/tests/unit/data/aws/aws3.eml index 6c3690ae..df8871f3 100644 --- a/tests/unit/data/aws/aws3.eml +++ b/tests/unit/data/aws/aws3.eml @@ -26,15 +26,15 @@ ARC-Message-Signature: i=3; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc ARC-Authentication-Results: i=3; mx.google.com; dkim=pass header.i=@testdomain.com header.s=testdomain header.b=GLHcVmBa; arc=pass (i=2 spf=pass spfdomain=us-west-2.amazonses.com dkim=pass dkdomain=sns.amazonaws.com dkim=pass dkdomain=amazonses.com dmarc=pass fromdomain=amazonaws.com); - spf=pass (google.com: domain of rd-notices+bncbcsjvsho64cbb6ob2suamgqe3dqkcsq@testdomain.com designates 209.85.220.69 as permitted sender) smtp.mailfrom=rd-notices+bncBCSJVSHO64CBB6OB2SUAMGQE3DQKCSQ@testdomain.com; + spf=pass (google.com: domain of rd-notices+bncbcsjvsho64cbb6ob2suamgqe3dqkcsq@testdomain.com designates 7.7.7.7 as permitted sender) smtp.mailfrom=rd-notices+bncBCSJVSHO64CBB6OB2SUAMGQE3DQKCSQ@testdomain.com; dmarc=fail (p=QUARANTINE sp=NONE dis=NONE arc=pass) header.from=amazonaws.com Return-Path: -Received: from mail-sor-f69.google.com (mail-sor-f69.google.com. [209.85.220.69]) +Received: from mail-sor-f69.google.com (mail-sor-f69.google.com. [7.7.7.7) by mx.google.com with SMTPS id z195-20020a4a49cc000000b0057b8079d2f9sor1288663ooa.3.2023.09.27.23.45.13 for (Google Transport Security); Wed, 27 Sep 2023 23:45:13 -0700 (PDT) -Received-SPF: pass (google.com: domain of rd-notices+bncbcsjvsho64cbb6ob2suamgqe3dqkcsq@testdomain.com designates 209.85.220.69 as permitted sender) client-ip=209.85.220.69; +Received-SPF: pass (google.com: domain of rd-notices+bncbcsjvsho64cbb6ob2suamgqe3dqkcsq@testdomain.com designates 7.7.7.7 as permitted sender) client-ip=7.7.7.7; Sender: rd-notices@testdomain.com X-Gm-Message-State: AOJu0YwA1ncYCL1JhSr58XiTdslkwS2bbAyUG8XhiJs3xZZJ3Ccy9WF5 b8y79QbLjF9OquocCHSQC0PxicdI @@ -46,7 +46,7 @@ Received: by 2002:a4a:554d:0:b0:573:f543:8c29 with SMTP id e74-20020a4a554d00000 Wed, 27 Sep 2023 23:45:12 -0700 (PDT) X-Received: by 2002:a54:4002:0:b0:3a7:8725:f37c with SMTP id x2-20020a544002000000b003a78725f37cmr391884oie.10.1695883512779; Wed, 27 Sep 2023 23:45:12 -0700 (PDT) -Received: from a59-201.smtp-out.us-west-2.amazonses.com (a59-201.smtp-out.us-west-2.amazonses.com. [54.240.59.201]) +Received: from a59-201.smtp-out.us-west-2.amazonses.com (a59-201.smtp-out.us-west-2.amazonses.com. [7.7.7.7]) by mx.google.com with ESMTPS id f20-20020a637554000000b00578b785d46csi18216323pgn.193.2023.09.27.23.45.12 for (version=TLS1_2 cipher=ECDHE-ECDSA-AES128-GCM-SHA256 bits=128/128); @@ -116,4 +116,4 @@ To unsubscribe from this group and stop receiving emails from it, send an e= mail to rd-notices+unsubscribe@testdomain.com. To view this discussion on the web visit https://groups.google.com/a/Realga= mes.com/d/msgid/rd-notices/0101018ada88c9ab-7bb959a5-dfa6-4e9b-9fa1-787fe83= -442c6-000000%40us-west-2.amazonses.com. \ No newline at end of file +442c6-000000%40us-west-2.amazonses.com. From bc93e851ff3fcdb256a7e6a5be0de22e70d6850d Mon Sep 17 00:00:00 2001 From: Garrett Skjelstad Date: Fri, 20 Oct 2023 01:55:40 +0000 Subject: [PATCH 3/8] AWS Parser Update #2 --- circuit_maintenance_parser/parsers/aws.py | 23 ++++++------ tests/unit/data/aws/aws1_result.json | 2 +- .../data/aws/aws1_text_parser_result.json | 2 +- tests/unit/data/aws/aws2_result.json | 2 +- .../data/aws/aws2_text_parser_result.json | 2 +- .../data/aws/aws3_text_parser_result.json | 37 +++++++++---------- 6 files changed, 33 insertions(+), 35 deletions(-) diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index 2fb4984f..18c11af4 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -13,7 +13,7 @@ # pylint: disable=too-many-nested-blocks, too-many-branches logger = logging.getLogger(__name__) -#logger.setLevel("DEBUG") +# logger.setLevel("DEBUG") class SubjectParserAWS1(EmailSubjectParser): @@ -87,7 +87,7 @@ def parse_text(self, text): data = {"circuits": [], "start": "", "end": ""} impact = Impact.OUTAGE - maintenace_id = "" + maintenance_id = "" status = Status.CONFIRMED for line in text.splitlines(): @@ -99,13 +99,8 @@ def parse_text(self, text): # there may still be some strings with data to capture. # otherwise, continue on. if not line_matched: - if "may become unavailable" in line.lower(): - impact = Impact.OUTAGE - elif "has been cancelled" in line.lower(): - status = Status.CANCELLED - if re.match(r"[a-z]{5}-[a-z0-9]{8}", line): - maintenace_id += line + maintenance_id += line data["circuits"].append(CircuitImpact(circuit_id=line, impact=impact)) continue @@ -126,15 +121,21 @@ def parse_text(self, text): else: data[v] = group_match + # Let's determine impact and status + if "may become unavailable" in line.lower(): + impact = Impact.OUTAGE + elif "has been cancelled" in line.lower(): + status = Status.CANCELLED + # Let's get our times in order. if data["start"] and data["end"]: data["start"] = self.dt2ts(parser.parse(data["start"])) data["end"] = self.dt2ts(parser.parse(data["end"])) - maintenace_id += str(data["start"]) - maintenace_id += str(data["end"]) + maintenance_id += str(data["start"]) + maintenance_id += str(data["end"]) # No maintenance ID found in emails, so a hash value is being generated using the start, # end and IDs of all circuits in the notification. - data["maintenance_id"] = hashlib.md5(maintenace_id.encode("utf-8")).hexdigest() # nosec + data["maintenance_id"] = hashlib.md5(maintenance_id.encode("utf-8")).hexdigest() # nosec data["status"] = status return [data] diff --git a/tests/unit/data/aws/aws1_result.json b/tests/unit/data/aws/aws1_result.json index 1ed78df7..f4369077 100644 --- a/tests/unit/data/aws/aws1_result.json +++ b/tests/unit/data/aws/aws1_result.json @@ -28,7 +28,7 @@ } ], "end": 1621519200, - "maintenance_id": "15faf02fcf2e999792668df97828bc76", + "maintenance_id": "1cae19787c16a01d90fd5937a94c6737", "organizer": "aws-account-notifications@amazon.com", "provider": "aws", "sequence": 1, diff --git a/tests/unit/data/aws/aws1_text_parser_result.json b/tests/unit/data/aws/aws1_text_parser_result.json index 81397864..7cf8e974 100644 --- a/tests/unit/data/aws/aws1_text_parser_result.json +++ b/tests/unit/data/aws/aws1_text_parser_result.json @@ -27,7 +27,7 @@ } ], "end": 1621519200, - "maintenance_id": "15faf02fcf2e999792668df97828bc76", + "maintenance_id": "1cae19787c16a01d90fd5937a94c6737", "start": 1621497600, "status": "CONFIRMED", "summary": "Planned maintenance has been scheduled on an AWS Direct Connect router in A Block, New York, NY from Thu, 20 May 2021 08:00:00 GMT to Thu, 20 May 2021 14:00:00 GMT for 6 hours. During this maintenance window, your AWS Direct Connect services listed below may become unavailable." diff --git a/tests/unit/data/aws/aws2_result.json b/tests/unit/data/aws/aws2_result.json index 4e6a5c1f..9599357a 100644 --- a/tests/unit/data/aws/aws2_result.json +++ b/tests/unit/data/aws/aws2_result.json @@ -28,7 +28,7 @@ } ], "end": 1631584920, - "maintenance_id": "47876b7d5a5198643a1a9cb7f954487a", + "maintenance_id": "303e7bb374f620bfcc9ad3644647fce1", "organizer": "aws-account-notifications@amazon.com", "provider": "aws", "sequence": 1, diff --git a/tests/unit/data/aws/aws2_text_parser_result.json b/tests/unit/data/aws/aws2_text_parser_result.json index a9e8523d..ef74790b 100644 --- a/tests/unit/data/aws/aws2_text_parser_result.json +++ b/tests/unit/data/aws/aws2_text_parser_result.json @@ -27,7 +27,7 @@ } ], "end": 1631584920, - "maintenance_id": "47876b7d5a5198643a1a9cb7f954487a", + "maintenance_id": "303e7bb374f620bfcc9ad3644647fce1", "start": 1631559720, "status": "CANCELLED", "summary": "We would like to inform you that the planned maintenance that was scheduled for AWS Direct Connect endpoint in Equinix SG2, Singapore, SGP from Mon, 13 Sep 2021 19:02:00 GMT to Tue, 14 Sep 2021 02:02:00 GMT has been cancelled. Please find below your AWS Direct Connect services that would have been affected by this planned maintenance." diff --git a/tests/unit/data/aws/aws3_text_parser_result.json b/tests/unit/data/aws/aws3_text_parser_result.json index a9e8523d..d1079502 100644 --- a/tests/unit/data/aws/aws3_text_parser_result.json +++ b/tests/unit/data/aws/aws3_text_parser_result.json @@ -1,35 +1,32 @@ [ { + "account": "0000000000001", "circuits": [ { - "circuit_id": "aaaaa-00000001", - "impact": "OUTAGE" + "circuit_id": "xxxxx-ffffffff", + "impact": "OUTAGE" }, { - "circuit_id": "aaaaa-00000002", - "impact": "OUTAGE" + "circuit_id": "yyyyy-uuuuuuuu", + "impact": "OUTAGE" }, { - "circuit_id": "aaaaa-00000003", - "impact": "OUTAGE" + "circuit_id": "mmmmm-iiiiiiii", + "impact": "OUTAGE" }, { - "circuit_id": "aaaaa-00000004", - "impact": "OUTAGE" + "circuit_id": "rrrrr-pppppppp", + "impact": "OUTAGE" }, { - "circuit_id": "aaaaa-00000005", - "impact": "OUTAGE" - }, - { - "circuit_id": "aaaaa-00000006", - "impact": "OUTAGE" + "circuit_id": "fffff-qqqqqqqq", + "impact": "OUTAGE" } - ], - "end": 1631584920, - "maintenance_id": "47876b7d5a5198643a1a9cb7f954487a", - "start": 1631559720, - "status": "CANCELLED", - "summary": "We would like to inform you that the planned maintenance that was scheduled for AWS Direct Connect endpoint in Equinix SG2, Singapore, SGP from Mon, 13 Sep 2021 19:02:00 GMT to Tue, 14 Sep 2021 02:02:00 GMT has been cancelled. Please find below your AWS Direct Connect services that would have been affected by this planned maintenance." + ], + "end": 1697115600, + "maintenance_id": "b15bf3344836f5ad8ab6a6e16cf328f8", + "start": 1697094000, + "status": "CONFIRMED", + "summary": "Planned maintenance has been scheduled on an AWS Direct Connect endpoint in Westin Building Exchange, Seattle, WA. During this maintenance window, your AWS Direct Connect services associated with this event may become unavailable.\\n\\nThis maintenance is scheduled to avoid disrupting redundant connections at the same time.\\n\\nIf you encounter any problems with your connection after the end of this maintenance window, please contact AWS Support(1).\\n\\n(1) https://aws.amazon.com/support. For more details, please see https://phd.aws.amazon.com/phd/home?region=us-west-2#/dashboard/open-issues" } ] \ No newline at end of file From 38eeedceb050eb217c3fac88b0a00ad0486c55e6 Mon Sep 17 00:00:00 2001 From: Garrett Skjelstad Date: Fri, 20 Oct 2023 02:06:18 +0000 Subject: [PATCH 4/8] incorporated KC recommendations --- circuit_maintenance_parser/parsers/aws.py | 39 ++++++++++++----------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index 18c11af4..0fd26725 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -13,7 +13,6 @@ # pylint: disable=too-many-nested-blocks, too-many-branches logger = logging.getLogger(__name__) -# logger.setLevel("DEBUG") class SubjectParserAWS1(EmailSubjectParser): @@ -27,7 +26,7 @@ def parse_subject(self, subject): data = {"account": ""} # Common Subject strings for matching: subject_map = { - "\[AWS Account ?I?D?: ([0-9]+)\]": "account", + r"\[AWS Account ?I?D?: ([0-9]+)\]": "account", } regex_keys = re.compile("|".join(subject_map), re.IGNORECASE) @@ -39,10 +38,11 @@ def parse_subject(self, subject): if not line_matched: continue for group_match in line_matched.groups(): - if group_match is not None: - for k, v in subject_map.items(): - if re.search(k, line, re.IGNORECASE): - data[v] = group_match + if not group_match: + continue + for key, value in subject_map.items(): + if re.search(key, line, re.IGNORECASE): + data[value] = group_match return [data] @@ -107,19 +107,20 @@ def parse_text(self, text): # for lines that do match our regex strings. # grab the data and map the values to keys. for group_match in line_matched.groups(): - if group_match is not None: - for k, v in text_map.items(): - if re.search(k, line_matched.string, re.IGNORECASE): - # Due to having a single line on some emails - # This causes multiple match groups - # However this needs to be split across keys. - # This could probably be cleaned up. - if v == "start_and_end" and data["start"] == "": - data["start"] = group_match - elif v == "start_and_end" and data["end"] == "": - data["end"] = group_match - else: - data[v] = group_match + if not group_match: + continue + for key, value in text_map.items(): + if re.search(key, line_matched.string, re.IGNORECASE): + # Due to having a single line on some emails + # This causes multiple match groups + # However this needs to be split across keys. + # This could probably be cleaned up. + if value == "start_and_end" and data["start"] == "": + data["start"] = group_match + elif value == "start_and_end" and data["end"] == "": + data["end"] = group_match + else: + data[value] = group_match # Let's determine impact and status if "may become unavailable" in line.lower(): From 02547c010c0c3e292fc3e212b3bdd5a2557e0c22 Mon Sep 17 00:00:00 2001 From: Garrett Skjelstad Date: Fri, 20 Oct 2023 14:33:02 +0000 Subject: [PATCH 5/8] incorporated CA recommendations on empty keys --- circuit_maintenance_parser/parsers/aws.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index 0fd26725..e7d44442 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -85,7 +85,7 @@ def parse_text(self, text): regex_keys = re.compile("|".join(text_map), re.IGNORECASE) - data = {"circuits": [], "start": "", "end": ""} + data = {"circuits": []} impact = Impact.OUTAGE maintenance_id = "" status = Status.CONFIRMED @@ -115,9 +115,9 @@ def parse_text(self, text): # This causes multiple match groups # However this needs to be split across keys. # This could probably be cleaned up. - if value == "start_and_end" and data["start"] == "": + if value == "start_and_end" and "start" not in data: data["start"] = group_match - elif value == "start_and_end" and data["end"] == "": + elif value == "start_and_end": data["end"] = group_match else: data[value] = group_match @@ -129,7 +129,7 @@ def parse_text(self, text): status = Status.CANCELLED # Let's get our times in order. - if data["start"] and data["end"]: + if all((key in data for key in ["start", "end"])): data["start"] = self.dt2ts(parser.parse(data["start"])) data["end"] = self.dt2ts(parser.parse(data["end"])) maintenance_id += str(data["start"]) From 5ef19d2efebfc633fe0e1684a9826a55f566affb Mon Sep 17 00:00:00 2001 From: Garrett Skjelstad Date: Fri, 3 Nov 2023 12:10:18 +0000 Subject: [PATCH 6/8] updated to swap k/v pairs --- circuit_maintenance_parser/parsers/aws.py | 72 +++++++++++-------- circuit_maintenance_parser/provider.py | 1 + .../data/aws/aws3_subject_parser_result.json | 5 -- tests/unit/test_parsers.py | 5 -- 4 files changed, 45 insertions(+), 38 deletions(-) delete mode 100644 tests/unit/data/aws/aws3_subject_parser_result.json diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index e7d44442..bb1a84b4 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -23,13 +23,16 @@ def parse_subject(self, subject): Example: AWS Direct Connect Planned Maintenance Notification [AWS Account: 00000001] """ - data = {"account": ""} + data = {} # Common Subject strings for matching: - subject_map = { - r"\[AWS Account ?I?D?: ([0-9]+)\]": "account", - } + subject_map = [{"account": r"\[AWS Account ?I?D?: ([0-9]+)\]"}] - regex_keys = re.compile("|".join(subject_map), re.IGNORECASE) + subject_list = [] + for each_subject in subject_map: + for key, value in each_subject.items(): + subject_list.append(value) + + regex_keys = re.compile("|".join(subject_list), re.IGNORECASE) # in case of a multi-line subject # match the subject map @@ -40,9 +43,10 @@ def parse_subject(self, subject): for group_match in line_matched.groups(): if not group_match: continue - for key, value in subject_map.items(): - if re.search(key, line, re.IGNORECASE): - data[value] = group_match + for parser_dict in subject_map: + for key, value in parser_dict.items(): + if re.search(key, line, re.IGNORECASE): + data[key] = group_match return [data] @@ -76,14 +80,25 @@ def parse_text(self, text): This maintenance is scheduled to avoid disrupting redundant connections at = the same time. """ - text_map = { - "^Account ?I?D?: ([0-9]+)": "account", - "^Start Time: ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})": "start", - "^End Time: ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})": "end", - "(?<=from )([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})": "start_and_end", - } - - regex_keys = re.compile("|".join(text_map), re.IGNORECASE) + text_map = [ + {"account": "^Account ?I?D?: ([0-9]+)"}, + { + "start": "^Start Time: ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})" + }, + { + "end": "^End Time: ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})" + }, + { + "start_and_end": "(?<=from )([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3}) to ([A-Z][a-z]{2}, [0-9]{1,2} [A-Z][a-z]{2,9} [0-9]{4} [0-9]{2}:[0-9]{2}:[0-9]{2} [A-Z]{2,3})" + }, + ] + + each_textmap = [] + for search_string in text_map: + for key, value in search_string.items(): + each_textmap.append(value) + + regex_keys = re.compile("|".join(each_textmap), re.IGNORECASE) data = {"circuits": []} impact = Impact.OUTAGE @@ -109,18 +124,19 @@ def parse_text(self, text): for group_match in line_matched.groups(): if not group_match: continue - for key, value in text_map.items(): - if re.search(key, line_matched.string, re.IGNORECASE): - # Due to having a single line on some emails - # This causes multiple match groups - # However this needs to be split across keys. - # This could probably be cleaned up. - if value == "start_and_end" and "start" not in data: - data["start"] = group_match - elif value == "start_and_end": - data["end"] = group_match - else: - data[value] = group_match + for parser_dict in text_map: + for key, value in parser_dict.items(): + if re.search(value, line_matched.string, re.IGNORECASE): + # Due to having a single line on some emails + # This causes multiple match groups + # However this needs to be split across keys. + # This could probably be cleaned up. + if key == "start_and_end" and "start" not in data: + data["start"] = group_match + elif key == "start_and_end": + data["end"] = group_match + else: + data[key] = group_match # Let's determine impact and status if "may become unavailable" in line.lower(): diff --git a/circuit_maintenance_parser/provider.py b/circuit_maintenance_parser/provider.py index 738431ad..062018de 100644 --- a/circuit_maintenance_parser/provider.py +++ b/circuit_maintenance_parser/provider.py @@ -186,6 +186,7 @@ class AWS(GenericProvider): """AWS provider custom class.""" _processors: List[GenericProcessor] = [ + CombinedProcessor(data_parsers=[EmailDateParser, TextParserAWS1]), CombinedProcessor(data_parsers=[EmailDateParser, TextParserAWS1, SubjectParserAWS1]), ] _default_organizer = "aws-account-notifications@amazon.com" diff --git a/tests/unit/data/aws/aws3_subject_parser_result.json b/tests/unit/data/aws/aws3_subject_parser_result.json deleted file mode 100644 index 0e67e55b..00000000 --- a/tests/unit/data/aws/aws3_subject_parser_result.json +++ /dev/null @@ -1,5 +0,0 @@ -[ - { - "account": "" - } -] \ No newline at end of file diff --git a/tests/unit/test_parsers.py b/tests/unit/test_parsers.py index 9dc06083..b720f778 100644 --- a/tests/unit/test_parsers.py +++ b/tests/unit/test_parsers.py @@ -105,11 +105,6 @@ Path(dir_path, "data", "aws", "aws3.eml"), Path(dir_path, "data", "aws", "aws3_text_parser_result.json"), ), - ( - SubjectParserAWS1, - Path(dir_path, "data", "aws", "aws3.eml"), - Path(dir_path, "data", "aws", "aws3_subject_parser_result.json"), - ), # BSO ( HtmlParserBSO1, From f67ae204405a43375b489be68dd17b09db09a383 Mon Sep 17 00:00:00 2001 From: Garrett Skjelstad Date: Fri, 3 Nov 2023 14:20:23 +0000 Subject: [PATCH 7/8] linting fix --- circuit_maintenance_parser/parsers/aws.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/circuit_maintenance_parser/parsers/aws.py b/circuit_maintenance_parser/parsers/aws.py index bb1a84b4..75e7ca69 100644 --- a/circuit_maintenance_parser/parsers/aws.py +++ b/circuit_maintenance_parser/parsers/aws.py @@ -43,8 +43,8 @@ def parse_subject(self, subject): for group_match in line_matched.groups(): if not group_match: continue - for parser_dict in subject_map: - for key, value in parser_dict.items(): + for search_string in subject_map: + for key, value in search_string.items(): if re.search(key, line, re.IGNORECASE): data[key] = group_match return [data] @@ -124,8 +124,8 @@ def parse_text(self, text): for group_match in line_matched.groups(): if not group_match: continue - for parser_dict in text_map: - for key, value in parser_dict.items(): + for search_string in text_map: + for key, value in search_string.items(): if re.search(value, line_matched.string, re.IGNORECASE): # Due to having a single line on some emails # This causes multiple match groups From d1e36daba6e2c95385ad58949064bb7fd5a946e2 Mon Sep 17 00:00:00 2001 From: Garrett Skjelstad Date: Fri, 3 Nov 2023 15:16:01 +0000 Subject: [PATCH 8/8] swapped providers, so that the less specific one takes places AFTER the more specific --- circuit_maintenance_parser/provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/circuit_maintenance_parser/provider.py b/circuit_maintenance_parser/provider.py index 062018de..76449ce7 100644 --- a/circuit_maintenance_parser/provider.py +++ b/circuit_maintenance_parser/provider.py @@ -186,8 +186,8 @@ class AWS(GenericProvider): """AWS provider custom class.""" _processors: List[GenericProcessor] = [ - CombinedProcessor(data_parsers=[EmailDateParser, TextParserAWS1]), CombinedProcessor(data_parsers=[EmailDateParser, TextParserAWS1, SubjectParserAWS1]), + CombinedProcessor(data_parsers=[EmailDateParser, TextParserAWS1]), ] _default_organizer = "aws-account-notifications@amazon.com"