facebook · aryzle · Sep 18, 2024 · Sep 26, 2024 · Sep 28, 2024 · Sep 28, 2024
@@ -19,7 +19,7 @@
 import urllib.parse
 
 import requests
-from requests.packages.urllib3.util.retry import Retry
+from urllib3.util.retry import Retry
 from threatexchange.exchanges.clients.utils.common import TimeoutHTTPAdapter
 
 
@@ -123,6 +123,19 @@ class NCMECEntryType(Enum):
     video = "video"
 
 
+@unique
+class NCMECFeedbackType(Enum):
+    md5 = "MD5"
+    sha1 = "SHA1"
+    pdna = "PDNA"
+    pdq = "PDQ"
+    netclean = "NETCLEAN"
+    videntifier = "VIDENTIFIER"
+    tmk_pdqf = "TMK_PDQF"
+    ssvh_pdna = "SSVH_PDNA"
+    ssvh_safer_hash = "SSVH_SAFER_HASH"
+
+
 @dataclass
 class NCMECEntryUpdate:
     id: str
@@ -131,6 +144,7 @@ class NCMECEntryUpdate:
     deleted: bool
     classification: t.Optional[str]
     fingerprints: t.Dict[str, str]
+    feedback: t.List[t.Dict[str, t.Any]]
 
     @classmethod
     def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate":
@@ -148,6 +162,36 @@ def from_xml(cls, xml: _XMLWrapper) -> "NCMECEntryUpdate":
             fingerprints={
                 x.tag: x.text for x in xml.maybe("fingerprints") if x.has_text
             },
+            feedback=(
+                [
+                    {
+                        "sentiment": x.tag,  # "affirmativeFeedback" or "negativeFeedback"
+                        "type": x.str("type"),
+                        "latest_feedback_time": x.str("lastUpdateTimestamp"),
+                        "members": [
+                            {"id": m.str("id"), "name": m.text}
+                            for m in x.maybe("members")
+                            if m.has_text
+                        ],
+                        "reasons": [
+                            {
+                                "guid": r.maybe("reason").str("guid"),
+                                "name": r.maybe("reason").str("name"),
+                                "type": r.maybe("reason").str("type"),
+                                "members": [
+                                    {"id": m.str("id"), "name": m.text}
+                                    for m in x.maybe("members")
+                                ],
+                            }
+                            for r in x.maybe("reasons")
+                            if r.maybe("reason")
+                        ],
+                    }
+                    for x in xml.maybe("feedback")
+                ]
+                if xml.maybe("feedback").has_text
+                else []
+            ),
         )
 
 
@@ -215,11 +259,49 @@ def estimated_entries_in_range(self) -> int:
         )
 
 
+# TODO: once we know the shape of response, finish this class
+@dataclass
+class UpdateEntryResponse:
+    updates: t.List[NCMECEntryUpdate]
+
+    @classmethod
+    def from_xml(
+        cls, xml: _XMLWrapper, fallback_max_time: int
+    ) -> "UpdateEntryResponse":
+        updates: t.List[NCMECEntryUpdate] = []
+
+        for content_xml in (xml.maybe("images"), xml.maybe("videos")):
+            if not content_xml or not len(content_xml):
+                continue
+            updates.extend(NCMECEntryUpdate.from_xml(c) for c in content_xml)
+
+        return cls(updates)
+
+
+@dataclass
+class GetFeedbackReasonsResponse:
+    reasons: t.List[t.Dict[str, str]]
+
+    @classmethod
+    def from_xml(cls, xml: _XMLWrapper) -> "GetFeedbackReasonsResponse":
+        reasons = []
+        for reason in xml.maybe("availableFeedbackReasons"):
+            reasons.append(
+                {
+                    "guid": reason.str("guid"),
+                    "name": reason.str("name"),
+                    "type": reason.str("type"),
+                }
+            )
+        return cls(reasons)
+
+
 @unique
 class NCMECEndpoint(Enum):
     status = "status"
     entries = "entries"
     members = "members"
+    feedback = "feedback"
 
 
 class NCMECEnvironment(Enum):
@@ -261,15 +343,19 @@ def __init__(
         username: str,
         password: str,
         environment: NCMECEnvironment,
+        member_id: t.Optional[str] = None,
+        reasons_map: t.Dict[str, t.List[t.Dict[str, str]]] = {},
     ) -> None:
         assert is_valid_user_pass(username, password)
         self.username = username
         self.password = password
         self._base_url = environment.value
+        self.member_id = member_id
+        self.reasons_map = reasons_map or {}
 
     def _get_session(self) -> requests.Session:
         """
-        Custom requests sesson
+        Custom requests session
 
         Ideally, should be used within a context manager:
         ```
@@ -295,14 +381,18 @@ def _get_session(self) -> requests.Session:
         )
         return session
 
-    def _get(self, endpoint: NCMECEndpoint, *, next_: str = "", **params) -> ET.Element:
+    def _get(
+        self, endpoint: NCMECEndpoint, *, path: str = "", next_: str = "", **params
+    ) -> ET.Element:
         """
         Perform an HTTP GET request, and return the XML response payload.
 
         Same timeouts and retry strategy as `_get_session` above.
         """
 
         url = "/".join((self._base_url, self.VERSION, endpoint.value))
+        if path:
+            url = "/".join((url, path))
         if next_:
             url = self._base_url + next_
             params = {}
@@ -328,16 +418,49 @@ def _post(self, endpoint: NCMECEndpoint, *, data=None) -> t.Any:
         No timeout or retry strategy.
         """
 
-        url = "/".join((self._base_url, endpoint.value))
+        url = "/".join((self._base_url, self.VERSION, endpoint.value))
         with self._get_session() as session:
             response = session.post(url, data=data)
             response.raise_for_status()
             return response
 
+    def _put(
+        self,
+        endpoint: NCMECEndpoint,
+        *,
+        member_id: t.Optional[str] = None,
+        entry_id: t.Optional[str] = None,
+        feedback_type: t.Optional[NCMECFeedbackType] = None,
+        data=None,
+    ) -> t.Any:
+        """
+        Perform an HTTP PUT request, and return the XML response payload.
+
+        No timeout or retry strategy.
+        """
+
+        url = "/".join((self._base_url, self.VERSION, endpoint.value))
+        if feedback_type and member_id and entry_id:
+            url = "/".join(
+                (
+                    self._base_url,
+                    endpoint.value,
+                    member_id,
+                    entry_id,
+                    feedback_type.value,
+                    NCMECEndpoint.feedback.value,
+                )
+            )
+        with self._get_session() as session:
+            response = session.put(url, data=data)
+            response.raise_for_status()
+            return response
+
     def status(self) -> StatusResult:
         """Query the status endpoint, which tells you who you are."""
         response = self._get(NCMECEndpoint.status)
         member = _XMLWrapper(response)["member"]
+        self.member_id = member.str("id")
         return StatusResult(member.int("id"), member.text)
 
     def members(self) -> t.List[StatusResult]:
@@ -348,6 +471,17 @@ def members(self) -> t.List[StatusResult]:
             for member in _XMLWrapper(response)
         ]
 
+    def feedback_reasons(self) -> GetFeedbackReasonsResponse:
+        """Get the possible negative feedback reasons for each feedback type"""
+        for feedbackType in NCMECFeedbackType:
+            resp = self._get(
+                NCMECEndpoint.feedback, path=f"{feedbackType.value}/reasons"
+            )
+            reasonsResp = GetFeedbackReasonsResponse.from_xml(_XMLWrapper(resp))
+            self.reasons_map[feedbackType.value] = reasonsResp.reasons
+
+        return reasonsResp
+
     def get_entries(
         self,
         *,
@@ -401,6 +535,55 @@ def get_entries_iter(
             has_more = bool(next_)
             yield result
 
+    def submit_feedback(
+        self,
+        entry_id: str,
+        feedback_type: NCMECFeedbackType,
+        affirmative: bool,
+        reason_id: t.Optional[str] = None,
+    ) -> GetEntriesResponse:
+        if not affirmative and not reason_id:
+            raise ValueError("Negative feedback must have a reason_id")
+
+        # need member_id to submit feedback
+        if not self.member_id:
+            self.status()
+
+        # need valid reasons to submit negative feedback
+        if not affirmative and not self.reasons_map:
+            self.feedback_reasons()
+
+        # Prepare the XML payload
+        root = ET.Element("feedbackSubmission")
+        root.set("xmlns", "https://hashsharing.ncmec.org/hashsharing/v2")
+        vote = ET.SubElement(root, "affirmative" if affirmative else "negative")
+
+        if not affirmative:
+            valid_reason_ids = [
+                reason["guid"] for reason in self.reasons_map[feedback_type.value]
+            ]
+            if reason_id not in valid_reason_ids:
+                print(
+                    "must choose from the following reasons: ",
+                    self.reasons_map[feedback_type.value],
+                )
+                raise ValueError("Invalid reason_id")
+            reasons = ET.SubElement(vote, "reasonIds")
+            guid = ET.SubElement(reasons, "guid")
+            guid.text = reason_id
+        # ET.dump(root)
+
+        resp = self._put(
+            NCMECEndpoint.entries,
+            member_id=self.member_id,
+            entry_id=entry_id,
+            feedback_type=feedback_type,
+            data=ET.tostring(root),
+        )
+
+        # TODO: parse response here once we know the shape using UpdateEntryResponse
+        return resp
+
 
 def _date_format(timestamp: int) -> str:
     """ISO 8601 format yyyy-MM-dd'T'HH:mm:ss.SSSZ"""