From ae46cc84568915f237e3f9d5a623a5bcf8df4571 Mon Sep 17 00:00:00 2001 From: Bobby Filar <29960025+bfilar@users.noreply.github.com> Date: Mon, 16 Oct 2023 13:14:40 -0500 Subject: [PATCH] Signals: additional contextual signals (#848) --- .../body_contains_shipping_language.yml | 16 ++++++++++++ signals/content/body_is_inline_image.yml | 6 +++++ .../content/subject_suspicious_keywords.yml | 3 ++- signals/headers/headers_urlhaus_domain.yml | 7 +++++ .../links/link_contains_recipient_email.yml | 2 +- .../link_contains_unicode_characters.yml | 5 ++++ .../link_count_distinct_display_text.yml | 4 +++ ...ink_domains_do_not_match_sender_domain.yml | 8 ++++++ signals/links/link_ipfs.yml | 26 +++++++++++++++++++ signals/links/link_same_through_msg.yml | 6 +++++ ...nder_display_contains_recipient_domain.yml | 4 +++ .../sender_display_name_contains_email.yml | 5 ++++ 12 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 signals/content/body_contains_shipping_language.yml create mode 100644 signals/content/body_is_inline_image.yml create mode 100644 signals/headers/headers_urlhaus_domain.yml create mode 100644 signals/links/link_contains_unicode_characters.yml create mode 100644 signals/links/link_count_distinct_display_text.yml create mode 100644 signals/links/link_domains_do_not_match_sender_domain.yml create mode 100644 signals/links/link_ipfs.yml create mode 100644 signals/links/link_same_through_msg.yml create mode 100644 signals/sender/sender_display_contains_recipient_domain.yml create mode 100644 signals/sender/sender_display_name_contains_email.yml diff --git a/signals/content/body_contains_shipping_language.yml b/signals/content/body_contains_shipping_language.yml new file mode 100644 index 00000000000..4962537122e --- /dev/null +++ b/signals/content/body_contains_shipping_language.yml @@ -0,0 +1,16 @@ +name: "Body: Contains Shipping Language" +type: "query" +source: | + regex.icontains(body.current_thread.text, + "abandon.*package", + "courier.*able", + "missed.*shipping.*notification", + "missed.shipment.notification", + "unable.*deliver", + "delivery.*attempt.*failed", + "signed.*delivery", + "status of your (.{314})? ?delivery", + "delivery attempt", + "delivery stopped for shipment", + "fedex tracking", + ) diff --git a/signals/content/body_is_inline_image.yml b/signals/content/body_is_inline_image.yml new file mode 100644 index 00000000000..13fe0e0337b --- /dev/null +++ b/signals/content/body_is_inline_image.yml @@ -0,0 +1,6 @@ +name: "Content: Body is an Inline Image" +type: "query" +source: | + length(body.html.raw) < 200 + and length(body.links) > 0 + and strings.ilike(body.html.raw, "*img*cid*") diff --git a/signals/content/subject_suspicious_keywords.yml b/signals/content/subject_suspicious_keywords.yml index 276d4d6234e..28dd8596fef 100644 --- a/signals/content/subject_suspicious_keywords.yml +++ b/signals/content/subject_suspicious_keywords.yml @@ -93,9 +93,10 @@ source: | "scanned.?invoice", "secured?.update", "security breach", + "security.notice", "securlty", "signed.*delivery", - "status of your .{314}? ?delivery", + "status of your (.{314})? ?delivery", "susp[il1]+c[il1]+ous.*act[il1]+v[il1]+ty", "suspicious.*sign.*[io]n", "suspicious.activit", diff --git a/signals/headers/headers_urlhaus_domain.yml b/signals/headers/headers_urlhaus_domain.yml new file mode 100644 index 00000000000..3816bed8601 --- /dev/null +++ b/signals/headers/headers_urlhaus_domain.yml @@ -0,0 +1,7 @@ +name: "Headers: URLHaus Domain" +type: "query" +source: | + any(headers.domains, + .root_domain in $abuse_ch_urlhaus_domains_trusted_reporters + and .root_domain not in $tranco_1m + ) \ No newline at end of file diff --git a/signals/links/link_contains_recipient_email.yml b/signals/links/link_contains_recipient_email.yml index f1ab8f78932..25b2b187055 100644 --- a/signals/links/link_contains_recipient_email.yml +++ b/signals/links/link_contains_recipient_email.yml @@ -1,5 +1,5 @@ name: "Link: Contains Recipient Email" type: "query" source: | - any(body.links, any(recipients.to, strings.icontains(..href_url.query_params, .email.email))) + any(body.links, any(recipients.to, strings.icontains(..href_url.url, .email.email))) diff --git a/signals/links/link_contains_unicode_characters.yml b/signals/links/link_contains_unicode_characters.yml new file mode 100644 index 00000000000..109549ac9e2 --- /dev/null +++ b/signals/links/link_contains_unicode_characters.yml @@ -0,0 +1,5 @@ +name: "Link: URL with Unicode U+2044 (⁄) or U+2215 (∕) characters" +type: "query" +source: | + regex.icontains(body.plain.raw, 'https?:\/\/[^\s⁄∕]+(?:\/[^\s⁄∕]+)*[⁄∕][^\s⁄∕]+') + or any(body.links, regex.icontains(.href_url.url, 'https?:\/\/[^\s⁄∕]+(?:\/[^\s⁄∕]+)*[⁄∕][^\s⁄∕]+')) diff --git a/signals/links/link_count_distinct_display_text.yml b/signals/links/link_count_distinct_display_text.yml new file mode 100644 index 00000000000..a149a7d56bf --- /dev/null +++ b/signals/links/link_count_distinct_display_text.yml @@ -0,0 +1,4 @@ +name: "Link: Count Unique Display Text" +type: "query" +source: | + length(distinct(body.links, .href_url.url)) diff --git a/signals/links/link_domains_do_not_match_sender_domain.yml b/signals/links/link_domains_do_not_match_sender_domain.yml new file mode 100644 index 00000000000..85d2dac7395 --- /dev/null +++ b/signals/links/link_domains_do_not_match_sender_domain.yml @@ -0,0 +1,8 @@ +name: "Link: Sender domain does not match any body links" +type: "query" +source: | + length(body.links) > 0 + and all(body.links, + .href_url.domain.root_domain != sender.email.domain.root_domain + and .href_url.domain.root_domain not in $org_domains + ) diff --git a/signals/links/link_ipfs.yml b/signals/links/link_ipfs.yml new file mode 100644 index 00000000000..7c72380158d --- /dev/null +++ b/signals/links/link_ipfs.yml @@ -0,0 +1,26 @@ +name: "Link: IPFS" +type: "query" +source: | + any(body.links, + // Any body link domains contain "ipfs" + strings.icontains(.href_url.domain.domain, "ipfs") + + // Or the path contains ipfs anchored to a leading and trailing '-', '/', '.' + or ( + regex.icontains(.href_url.query_params, '[\.-/]ipfs[\.-/]') + and .href_url.domain.domain not in $org_domains + and ( + ( + // don't include high rep domains + .href_url.domain.domain not in $tranco_1m + and .href_url.domain.domain not in $umbrella_1m + ) + // if it's in Tranco or Umbrella, still include it if it's one of these + or .href_url.domain.domain in $free_file_hosts + or .href_url.domain.root_domain in $free_subdomain_hosts + ) + ) + ) + + // adding negation block for legitimate domains with ipfs in their name + and not sender.email.domain.domain in ("shipfsl.com") diff --git a/signals/links/link_same_through_msg.yml b/signals/links/link_same_through_msg.yml new file mode 100644 index 00000000000..cdea808029d --- /dev/null +++ b/signals/links/link_same_through_msg.yml @@ -0,0 +1,6 @@ +name: "Link: Same URL with different Display Texts" +type: "query" +source: | + length(body.links) > 1 + and length(distinct(body.links, .href_url.url)) == 1 + and length(distinct(body.links, .display_text)) > 1 diff --git a/signals/sender/sender_display_contains_recipient_domain.yml b/signals/sender/sender_display_contains_recipient_domain.yml new file mode 100644 index 00000000000..832d5a76014 --- /dev/null +++ b/signals/sender/sender_display_contains_recipient_domain.yml @@ -0,0 +1,4 @@ +name: "Sender: Display Name Contains Recipient Root Domain" +type: "query" +source: | + any(recipients.to, strings.icontains(sender.display_name, .email.domain.root_domain)) diff --git a/signals/sender/sender_display_name_contains_email.yml b/signals/sender/sender_display_name_contains_email.yml new file mode 100644 index 00000000000..3727c526200 --- /dev/null +++ b/signals/sender/sender_display_name_contains_email.yml @@ -0,0 +1,5 @@ + +name: "Sender: Display Name Contains Email Address" +type: "query" +source: | + regex.contains(sender.display_name, '[a-z0-9]+@[a-z]+')