-
Notifications
You must be signed in to change notification settings - Fork 322
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement NCMEC Upvote/Downvote in reference API #1624
Changes from all commits
965cfa8
90aa0ed
1da801d
be4cea2
a82dd7b
6dee071
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,7 +19,7 @@ | |
import urllib.parse | ||
|
||
import requests | ||
from requests.packages.urllib3.util.retry import Retry | ||
from urllib3.util.retry import Retry | ||
from threatexchange.exchanges.clients.utils.common import TimeoutHTTPAdapter | ||
|
||
|
||
|
@@ -123,6 +123,19 @@ class NCMECEntryType(Enum): | |
video = "video" | ||
|
||
|
||
@unique | ||
class NCMECFeedbackType(Enum): | ||
md5 = "MD5" | ||
sha1 = "SHA1" | ||
pdna = "PDNA" | ||
pdq = "PDQ" | ||
netclean = "NETCLEAN" | ||
videntifier = "VIDENTIFIER" | ||
tmk_pdqf = "TMK_PDQF" | ||
ssvh_pdna = "SSVH_PDNA" | ||
ssvh_safer_hash = "SSVH_SAFER_HASH" | ||
|
||
|
||
@dataclass | ||
class NCMECEntryUpdate: | ||
id: str | ||
|
@@ -131,6 +144,7 @@ class NCMECEntryUpdate: | |
deleted: bool | ||
classification: t.Optional[str] | ||
fingerprints: t.Dict[str, str] | ||
feedback: t.List[t.Dict[str, t.Any]] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (note to self): Consider whether it makes sense to use the enum here, or whether to leave as string in all cases |
||
|
||
@classmethod | ||
def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": | ||
|
@@ -148,6 +162,36 @@ def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate": | |
fingerprints={ | ||
x.tag: x.text for x in xml.maybe("fingerprints") if x.has_text | ||
}, | ||
feedback=( | ||
[ | ||
{ | ||
"sentiment": x.tag, # "affirmativeFeedback" or "negativeFeedback" | ||
"type": x.str("type"), | ||
"latest_feedback_time": x.str("lastUpdateTimestamp"), | ||
"members": [ | ||
{"id": m.str("id"), "name": m.text} | ||
for m in x.maybe("members") | ||
if m.has_text | ||
], | ||
Comment on lines
+171
to
+175
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (note to self): Should this be dict instead of synthesizing a a dict? What about a dedicated class? |
||
"reasons": [ | ||
{ | ||
"guid": r.maybe("reason").str("guid"), | ||
"name": r.maybe("reason").str("name"), | ||
"type": r.maybe("reason").str("type"), | ||
"members": [ | ||
{"id": m.str("id"), "name": m.text} | ||
for m in x.maybe("members") | ||
], | ||
} | ||
for r in x.maybe("reasons") | ||
if r.maybe("reason") | ||
], | ||
} | ||
for x in xml.maybe("feedback") | ||
] | ||
if xml.maybe("feedback").has_text | ||
else [] | ||
), | ||
) | ||
|
||
|
||
|
@@ -215,11 +259,49 @@ def estimated_entries_in_range(self) -> int: | |
) | ||
|
||
|
||
# TODO: once we know the shape of response, finish this class | ||
@dataclass | ||
class UpdateEntryResponse: | ||
updates: t.List[NCMECEntryUpdate] | ||
|
||
@classmethod | ||
def from_xml( | ||
cls, xml: _XMLWrapper, fallback_max_time: int | ||
) -> "UpdateEntryResponse": | ||
updates: t.List[NCMECEntryUpdate] = [] | ||
|
||
for content_xml in (xml.maybe("images"), xml.maybe("videos")): | ||
if not content_xml or not len(content_xml): | ||
continue | ||
updates.extend(NCMECEntryUpdate.from_xml(c) for c in content_xml) | ||
|
||
return cls(updates) | ||
|
||
|
||
@dataclass | ||
class GetFeedbackReasonsResponse: | ||
reasons: t.List[t.Dict[str, str]] | ||
|
||
@classmethod | ||
def from_xml(cls, xml: _XMLWrapper) -> "GetFeedbackReasonsResponse": | ||
reasons = [] | ||
for reason in xml.maybe("availableFeedbackReasons"): | ||
reasons.append( | ||
{ | ||
"guid": reason.str("guid"), | ||
"name": reason.str("name"), | ||
"type": reason.str("type"), | ||
} | ||
Comment on lines
+290
to
+294
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (note to self): Use a dedicated class here instead of dict |
||
) | ||
return cls(reasons) | ||
|
||
|
||
@unique | ||
class NCMECEndpoint(Enum): | ||
status = "status" | ||
entries = "entries" | ||
members = "members" | ||
feedback = "feedback" | ||
|
||
|
||
class NCMECEnvironment(Enum): | ||
|
@@ -261,15 +343,19 @@ def __init__( | |
username: str, | ||
password: str, | ||
environment: NCMECEnvironment, | ||
member_id: t.Optional[str] = None, | ||
reasons_map: t.Dict[str, t.List[t.Dict[str, str]]] = {}, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. blocking: We won't know this at client construction time, and can poll it afterwards. |
||
) -> None: | ||
assert is_valid_user_pass(username, password) | ||
self.username = username | ||
self.password = password | ||
self._base_url = environment.value | ||
self.member_id = member_id | ||
self.reasons_map = reasons_map or {} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm this |
||
|
||
def _get_session(self) -> requests.Session: | ||
""" | ||
Custom requests sesson | ||
Custom requests session | ||
|
||
Ideally, should be used within a context manager: | ||
``` | ||
|
@@ -295,14 +381,18 @@ def _get_session(self) -> requests.Session: | |
) | ||
return session | ||
|
||
def _get(self, endpoint: NCMECEndpoint, *, next_: str = "", **params) -> ET.Element: | ||
def _get( | ||
self, endpoint: NCMECEndpoint, *, path: str = "", next_: str = "", **params | ||
) -> ET.Element: | ||
""" | ||
Perform an HTTP GET request, and return the XML response payload. | ||
|
||
Same timeouts and retry strategy as `_get_session` above. | ||
""" | ||
|
||
url = "/".join((self._base_url, self.VERSION, endpoint.value)) | ||
if path: | ||
url = "/".join((url, path)) | ||
if next_: | ||
url = self._base_url + next_ | ||
params = {} | ||
|
@@ -328,16 +418,49 @@ def _post(self, endpoint: NCMECEndpoint, *, data=None) -> t.Any: | |
No timeout or retry strategy. | ||
""" | ||
|
||
url = "/".join((self._base_url, endpoint.value)) | ||
url = "/".join((self._base_url, self.VERSION, endpoint.value)) | ||
with self._get_session() as session: | ||
response = session.post(url, data=data) | ||
response.raise_for_status() | ||
return response | ||
|
||
def _put( | ||
self, | ||
endpoint: NCMECEndpoint, | ||
*, | ||
member_id: t.Optional[str] = None, | ||
entry_id: t.Optional[str] = None, | ||
feedback_type: t.Optional[NCMECFeedbackType] = None, | ||
data=None, | ||
) -> t.Any: | ||
""" | ||
Perform an HTTP PUT request, and return the XML response payload. | ||
|
||
No timeout or retry strategy. | ||
""" | ||
|
||
url = "/".join((self._base_url, self.VERSION, endpoint.value)) | ||
if feedback_type and member_id and entry_id: | ||
url = "/".join( | ||
( | ||
self._base_url, | ||
endpoint.value, | ||
member_id, | ||
entry_id, | ||
feedback_type.value, | ||
NCMECEndpoint.feedback.value, | ||
) | ||
) | ||
with self._get_session() as session: | ||
response = session.put(url, data=data) | ||
response.raise_for_status() | ||
return response | ||
|
||
def status(self) -> StatusResult: | ||
"""Query the status endpoint, which tells you who you are.""" | ||
response = self._get(NCMECEndpoint.status) | ||
member = _XMLWrapper(response)["member"] | ||
self.member_id = member.str("id") | ||
return StatusResult(member.int("id"), member.text) | ||
|
||
def members(self) -> t.List[StatusResult]: | ||
|
@@ -348,6 +471,17 @@ def members(self) -> t.List[StatusResult]: | |
for member in _XMLWrapper(response) | ||
] | ||
|
||
def feedback_reasons(self) -> GetFeedbackReasonsResponse: | ||
"""Get the possible negative feedback reasons for each feedback type""" | ||
for feedbackType in NCMECFeedbackType: | ||
resp = self._get( | ||
NCMECEndpoint.feedback, path=f"{feedbackType.value}/reasons" | ||
) | ||
reasonsResp = GetFeedbackReasonsResponse.from_xml(_XMLWrapper(resp)) | ||
self.reasons_map[feedbackType.value] = reasonsResp.reasons | ||
|
||
return reasonsResp | ||
|
||
def get_entries( | ||
self, | ||
*, | ||
|
@@ -401,6 +535,55 @@ def get_entries_iter( | |
has_more = bool(next_) | ||
yield result | ||
|
||
def submit_feedback( | ||
self, | ||
entry_id: str, | ||
feedback_type: NCMECFeedbackType, | ||
affirmative: bool, | ||
reason_id: t.Optional[str] = None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (note to self): Is a reason string better to use here than the guid? |
||
) -> GetEntriesResponse: | ||
if not affirmative and not reason_id: | ||
raise ValueError("Negative feedback must have a reason_id") | ||
Comment on lines
+545
to
+546
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (note to self): If reason map only has one item in it, choose that item. Also test NCMEC API to make sure it won't accept empty reason, which should be our pref otherwise. |
||
|
||
# need member_id to submit feedback | ||
if not self.member_id: | ||
self.status() | ||
|
||
# need valid reasons to submit negative feedback | ||
if not affirmative and not self.reasons_map: | ||
self.feedback_reasons() | ||
|
||
# Prepare the XML payload | ||
root = ET.Element("feedbackSubmission") | ||
root.set("xmlns", "https://hashsharing.ncmec.org/hashsharing/v2") | ||
vote = ET.SubElement(root, "affirmative" if affirmative else "negative") | ||
|
||
if not affirmative: | ||
valid_reason_ids = [ | ||
reason["guid"] for reason in self.reasons_map[feedback_type.value] | ||
] | ||
if reason_id not in valid_reason_ids: | ||
print( | ||
"must choose from the following reasons: ", | ||
self.reasons_map[feedback_type.value], | ||
) | ||
raise ValueError("Invalid reason_id") | ||
reasons = ET.SubElement(vote, "reasonIds") | ||
guid = ET.SubElement(reasons, "guid") | ||
guid.text = reason_id | ||
# ET.dump(root) | ||
|
||
resp = self._put( | ||
NCMECEndpoint.entries, | ||
member_id=self.member_id, | ||
entry_id=entry_id, | ||
feedback_type=feedback_type, | ||
data=ET.tostring(root), | ||
) | ||
|
||
# TODO: parse response here once we know the shape using UpdateEntryResponse | ||
return resp | ||
|
||
|
||
def _date_format(timestamp: int) -> str: | ||
"""ISO 8601 format yyyy-MM-dd'T'HH:mm:ss.SSSZ""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(note to self): Add note that these are also the entry types