From 4788c8b3863eedaa61f8f049c700b5cb9219ded8 Mon Sep 17 00:00:00 2001 From: Bobby Filar <29960025+bfilar@users.noreply.github.com> Date: Tue, 19 Sep 2023 15:56:59 +0200 Subject: [PATCH] Adding to distinct counts for better accounting (#799) --- signals/links/link_appspot_in_url_path_distinct.yml | 8 ++++---- signals/links/link_free_file_host_distinct.yml | 8 ++++---- signals/links/link_free_subdomain_host_distinct.yml | 10 +++++----- signals/links/link_freenom_tld_distinct.yml | 11 +++++++---- signals/links/link_google_open_redirect_distinct.yml | 9 ++++----- signals/links/link_low_reputation_distinct.yml | 8 ++++---- signals/links/link_mimatched_distinct.yml | 8 ++++---- signals/links/link_suspicious_tld_distinct.yml | 8 ++++---- signals/links/link_url_shortener_distinct.yml | 8 ++++---- 9 files changed, 40 insertions(+), 38 deletions(-) diff --git a/signals/links/link_appspot_in_url_path_distinct.yml b/signals/links/link_appspot_in_url_path_distinct.yml index 9259f5420c3..d00ffa94e69 100644 --- a/signals/links/link_appspot_in_url_path_distinct.yml +++ b/signals/links/link_appspot_in_url_path_distinct.yml @@ -1,4 +1,4 @@ -'name': "Link: Appspot in URL Path Unique Count" -'type': "query" -'source': | - length(distinct(body.links, strings.ilike(.href_url.path, "*appspot.com*"))) +name: "Link: Appspot in URL Path Unique Count" +type: "query" +source: | + length(distinct(filter(body.links, strings.ilike(.href_url.path, "*appspot.com*")), .href_url.url)) diff --git a/signals/links/link_free_file_host_distinct.yml b/signals/links/link_free_file_host_distinct.yml index d2c06ca5aff..9bcf4195b1e 100644 --- a/signals/links/link_free_file_host_distinct.yml +++ b/signals/links/link_free_file_host_distinct.yml @@ -1,4 +1,4 @@ -'name': "Link: Free File Host Unique Count" -'type': "query" -'source': | - length(distinct(body.links, .href_url.domain.domain in $free_file_hosts)) +name: "Link: Free File Host Unique Count" +type: "query" +source: | + length(distinct(filter(body.links, .href_url.domain.domain in $free_file_hosts), .href_url.url)) diff --git a/signals/links/link_free_subdomain_host_distinct.yml b/signals/links/link_free_subdomain_host_distinct.yml index f76c1871058..d59d6345997 100644 --- a/signals/links/link_free_subdomain_host_distinct.yml +++ b/signals/links/link_free_subdomain_host_distinct.yml @@ -1,8 +1,8 @@ -'name': "Link: Free Subdomain Host Unique Count" -'type': "query" -'source': | - length(distinct(body.links, +name: "Link: Free Subdomain Host Unique Count" +type: "query" +source: | + length(distinct(filter(body.links, .href_url.domain.root_domain in $free_subdomain_hosts and .href_url.domain.subdomain is not null and .href_url.domain.subdomain != "www" - )) + ), .href_url.url)) diff --git a/signals/links/link_freenom_tld_distinct.yml b/signals/links/link_freenom_tld_distinct.yml index 96be991225b..3610f00c3e4 100644 --- a/signals/links/link_freenom_tld_distinct.yml +++ b/signals/links/link_freenom_tld_distinct.yml @@ -1,4 +1,7 @@ -'name': "Link: Freenom TLD Unique Count" -'type': "query" -'source': | - length(distinct(body.links, .href_url.domain.tld in ("tk", "ml", "ga", "cf", "gq"))) +name: "Link: Freenom TLD Unique Count" +type: "query" +source: | + length(distinct(filter(body.links, .href_url.domain.tld in ("tk", "ml", "ga", "cf", "gq")), + .href_url.url + ) + ) diff --git a/signals/links/link_google_open_redirect_distinct.yml b/signals/links/link_google_open_redirect_distinct.yml index 40330e11f95..3f22fe8b759 100644 --- a/signals/links/link_google_open_redirect_distinct.yml +++ b/signals/links/link_google_open_redirect_distinct.yml @@ -1,5 +1,4 @@ -'name': "Link: Google Open Redirect Unique Count" -'type': "query" -'source': | - length(distinct(body.links, - regex.icontains(.href_url.url, "https?://(www.)?google.[a-zA-Z]{2,}/url\\?q=https?://.+"))) +name: "Link: Google Open Redirect Unique Count" +type: "query" +source: | + length(distinct(filter(body.links, regex.icontains(.href_url.url, "https?://(www.)?google.[a-zA-Z]{2,}/url\\?q=https?://.+")), .href_url.url)) \ No newline at end of file diff --git a/signals/links/link_low_reputation_distinct.yml b/signals/links/link_low_reputation_distinct.yml index 43959030b5d..2a862d6fb2b 100644 --- a/signals/links/link_low_reputation_distinct.yml +++ b/signals/links/link_low_reputation_distinct.yml @@ -1,4 +1,4 @@ -'name': "Link: Low Reputation Unique Count" -'type': "query" -'source': | - length(distinct(body.links, .href_url.domain.root_domain not in $tranco_1m)) +name: "Link: Low Reputation Unique Count" +type: "query" +source: | + length(distinct(filter(body.links, .href_url.domain.root_domain not in $tranco_1m), .href_url.url)) diff --git a/signals/links/link_mimatched_distinct.yml b/signals/links/link_mimatched_distinct.yml index 5505135f9f2..c54815c5f21 100644 --- a/signals/links/link_mimatched_distinct.yml +++ b/signals/links/link_mimatched_distinct.yml @@ -1,4 +1,4 @@ -'name': "Link: Mismatch Unique Count" -'type': "query" -'source': | - length(distinct(body.links, .mismatched)) +name: "Link: Mismatch Unique Count" +type: "query" +source: | + length(distinct(filter(body.links, .mismatched), .href_url.url)) diff --git a/signals/links/link_suspicious_tld_distinct.yml b/signals/links/link_suspicious_tld_distinct.yml index c32122e5107..a58ea167640 100644 --- a/signals/links/link_suspicious_tld_distinct.yml +++ b/signals/links/link_suspicious_tld_distinct.yml @@ -1,4 +1,4 @@ -'name': "Link: Suspicious TLD Unique Count" -'type': "query" -'source': | - length(distinct(body.links, .href_url.domain.tld in $suspicious_tlds)) +name: "Link: Suspicious TLD Unique Count" +type: "query" +source: | + length(distinct(filter(body.links, .href_url.domain.tld in $suspicious_tlds), .href_url.url)) diff --git a/signals/links/link_url_shortener_distinct.yml b/signals/links/link_url_shortener_distinct.yml index 07ed3b2e666..130f7d81145 100644 --- a/signals/links/link_url_shortener_distinct.yml +++ b/signals/links/link_url_shortener_distinct.yml @@ -1,4 +1,4 @@ -'name': "Link: URL Shortener Unique Count" -'type': "query" -'source': | - length(distinct(body.links, .href_url.domain.root_domain in $url_shorteners)) +name: "Link: URL Shortener Unique Count" +type: "query" +source: | + length(distinct(filter(body.links, .href_url.domain.root_domain in $url_shorteners), .href_url.url))