From 78a9cc585ddbe74539782326bb744847d10af664 Mon Sep 17 00:00:00 2001 From: WilsonLe Date: Mon, 11 Nov 2024 15:44:07 +0700 Subject: [PATCH 1/8] forward requester id to check username for spam callbacks --- synapse/handlers/user_directory.py | 2 +- synapse/module_api/callbacks/spamchecker_callbacks.py | 11 ++++++++--- tests/handlers/test_user_directory.py | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index a343637b82a..1281929d384 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -161,7 +161,7 @@ async def search_users( non_spammy_users = [] for user in results["results"]: if not await self._spam_checker_module_callbacks.check_username_for_spam( - user + user, user_id ): non_spammy_users.append(user) results["results"] = non_spammy_users diff --git a/synapse/module_api/callbacks/spamchecker_callbacks.py b/synapse/module_api/callbacks/spamchecker_callbacks.py index 17079ff781c..272476a890f 100644 --- a/synapse/module_api/callbacks/spamchecker_callbacks.py +++ b/synapse/module_api/callbacks/spamchecker_callbacks.py @@ -168,7 +168,7 @@ ] ], ] -CHECK_USERNAME_FOR_SPAM_CALLBACK = Callable[[UserProfile], Awaitable[bool]] +CHECK_USERNAME_FOR_SPAM_CALLBACK = Callable[[UserProfile, str], Awaitable[bool]] LEGACY_CHECK_REGISTRATION_FOR_SPAM_CALLBACK = Callable[ [ Optional[dict], @@ -716,7 +716,9 @@ async def user_may_publish_room( return self.NOT_SPAM - async def check_username_for_spam(self, user_profile: UserProfile) -> bool: + async def check_username_for_spam( + self, user_profile: UserProfile, requester_id: str + ) -> bool: """Checks if a user ID or display name are considered "spammy" by this server. If the server considers a username spammy, then it will not be included in @@ -727,6 +729,7 @@ async def check_username_for_spam(self, user_profile: UserProfile) -> bool: * user_id * display_name * avatar_url + requester_id: The user ID of the user making the user directory search request. Returns: True if the user is spammy. @@ -735,7 +738,9 @@ async def check_username_for_spam(self, user_profile: UserProfile) -> bool: with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): # Make a copy of the user profile object to ensure the spam checker cannot # modify it. - res = await delay_cancellation(callback(user_profile.copy())) + res = await delay_cancellation( + callback(user_profile.copy(), requester_id) + ) if res: return True diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 878d9683b6a..1807b5eaf7e 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -796,7 +796,7 @@ def test_spam_checker(self) -> None: s = self.get_success(self.handler.search_users(u1, "user2", 10)) self.assertEqual(len(s["results"]), 1) - async def allow_all(user_profile: UserProfile) -> bool: + async def allow_all(user_profile: UserProfile, requester_id: str) -> bool: # Allow all users. return False @@ -810,7 +810,7 @@ async def allow_all(user_profile: UserProfile) -> bool: self.assertEqual(len(s["results"]), 1) # Configure a spam checker that filters all users. - async def block_all(user_profile: UserProfile) -> bool: + async def block_all(user_profile: UserProfile, requester_id: str) -> bool: # All users are spammy. return True From 38e6eaa990bd4b72454760bda6ed8728a7a5dfa4 Mon Sep 17 00:00:00 2001 From: WilsonLe Date: Mon, 11 Nov 2024 15:51:32 +0700 Subject: [PATCH 2/8] add changelog --- changelog.d/17916.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/17916.feature diff --git a/changelog.d/17916.feature b/changelog.d/17916.feature new file mode 100644 index 00000000000..c0060779d09 --- /dev/null +++ b/changelog.d/17916.feature @@ -0,0 +1 @@ +Module developers will have access to user id of requester when adding `check_username_for_spam` callbacks to `spam_checker_module_callbacks`. \ No newline at end of file From 81a36d97b53a064ec09fbfec4a63cd2c513ad25a Mon Sep 17 00:00:00 2001 From: WilsonLe Date: Tue, 12 Nov 2024 22:48:51 +0700 Subject: [PATCH 3/8] add backward compat with callbacks without requester_id defined --- .../callbacks/spamchecker_callbacks.py | 27 +++++++++++--- tests/handlers/test_user_directory.py | 36 +++++++++++++++++-- 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/synapse/module_api/callbacks/spamchecker_callbacks.py b/synapse/module_api/callbacks/spamchecker_callbacks.py index 272476a890f..a2f328cafed 100644 --- a/synapse/module_api/callbacks/spamchecker_callbacks.py +++ b/synapse/module_api/callbacks/spamchecker_callbacks.py @@ -31,6 +31,7 @@ Optional, Tuple, Union, + cast, ) # `Literal` appears with Python 3.8. @@ -168,7 +169,10 @@ ] ], ] -CHECK_USERNAME_FOR_SPAM_CALLBACK = Callable[[UserProfile, str], Awaitable[bool]] +CHECK_USERNAME_FOR_SPAM_CALLBACK = Union[ + Callable[[UserProfile], Awaitable[bool]], + Callable[[UserProfile, str], Awaitable[bool]], +] LEGACY_CHECK_REGISTRATION_FOR_SPAM_CALLBACK = Callable[ [ Optional[dict], @@ -736,11 +740,26 @@ async def check_username_for_spam( """ for callback in self._check_username_for_spam_callbacks: with Measure(self.clock, f"{callback.__module__}.{callback.__qualname__}"): + checker_args = inspect.signature(callback) # Make a copy of the user profile object to ensure the spam checker cannot # modify it. - res = await delay_cancellation( - callback(user_profile.copy(), requester_id) - ) + # Also ensure backwards compatibility with spam checker callbacks + # that don't expect the requester_id argument. + if len(checker_args.parameters) == 2: + callback_with_requester_id = cast( + Callable[[UserProfile, str], Awaitable[bool]], callback + ) + res = await delay_cancellation( + callback_with_requester_id(user_profile.copy(), requester_id) + ) + else: + callback_without_requester_id = cast( + Callable[[UserProfile], Awaitable[bool]], callback + ) + res = await delay_cancellation( + callback_without_requester_id(user_profile.copy()) + ) + if res: return True diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 1807b5eaf7e..ea59e50ca3f 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -796,7 +796,7 @@ def test_spam_checker(self) -> None: s = self.get_success(self.handler.search_users(u1, "user2", 10)) self.assertEqual(len(s["results"]), 1) - async def allow_all(user_profile: UserProfile, requester_id: str) -> bool: + async def allow_all(user_profile: UserProfile) -> bool: # Allow all users. return False @@ -810,7 +810,7 @@ async def allow_all(user_profile: UserProfile, requester_id: str) -> bool: self.assertEqual(len(s["results"]), 1) # Configure a spam checker that filters all users. - async def block_all(user_profile: UserProfile, requester_id: str) -> bool: + async def block_all(user_profile: UserProfile) -> bool: # All users are spammy. return True @@ -820,6 +820,38 @@ async def block_all(user_profile: UserProfile, requester_id: str) -> bool: s = self.get_success(self.handler.search_users(u1, "user2", 10)) self.assertEqual(len(s["results"]), 0) + async def allow_all_expects_requester_id( + user_profile: UserProfile, requester_id: str + ) -> bool: + # Allow all users. + return False + + # Configure a spam checker that does not filter any users. + spam_checker = self.hs.get_module_api_callbacks().spam_checker + spam_checker._check_username_for_spam_callbacks = [ + allow_all_expects_requester_id + ] + + # The results do not change: + # We get one search result when searching for user2 by user1. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 1) + + # Configure a spam checker that filters all users. + async def block_all_expects_requester_id( + user_profile: UserProfile, requester_id: str + ) -> bool: + # All users are spammy. + return True + + spam_checker._check_username_for_spam_callbacks = [ + block_all_expects_requester_id + ] + + # User1 now gets no search results for any of the other users. + s = self.get_success(self.handler.search_users(u1, "user2", 10)) + self.assertEqual(len(s["results"]), 0) + @override_config( { "spam_checker": { From f9a1254f30ca75f16985384e466be73ea24bf21f Mon Sep 17 00:00:00 2001 From: WilsonLe Date: Tue, 12 Nov 2024 22:55:43 +0700 Subject: [PATCH 4/8] update docs --- docs/modules/spam_checker_callbacks.md | 2 +- docs/spam_checker.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md index ec306d81abf..a95db58832d 100644 --- a/docs/modules/spam_checker_callbacks.md +++ b/docs/modules/spam_checker_callbacks.md @@ -245,7 +245,7 @@ this callback. _First introduced in Synapse v1.37.0_ ```python -async def check_username_for_spam(user_profile: synapse.module_api.UserProfile) -> bool +async def check_username_for_spam(user_profile: synapse.module_api.UserProfile, requester_id: str) -> bool ``` Called when computing search results in the user directory. The module must return a diff --git a/docs/spam_checker.md b/docs/spam_checker.md index 1b6d814937c..4ace3512b33 100644 --- a/docs/spam_checker.md +++ b/docs/spam_checker.md @@ -72,8 +72,8 @@ class ExampleSpamChecker: async def user_may_publish_room(self, userid, room_id): return True # allow publishing of all rooms - async def check_username_for_spam(self, user_profile): - return False # allow all usernames + async def check_username_for_spam(self, user_profile, requester_id): + return False # allow all usernames regardless of requester async def check_registration_for_spam( self, From c6966205a5745d4cb644fe47a37dc950e6e7083d Mon Sep 17 00:00:00 2001 From: WilsonLe Date: Thu, 14 Nov 2024 12:44:34 +0700 Subject: [PATCH 5/8] add contribution credits --- changelog.d/17916.feature | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/changelog.d/17916.feature b/changelog.d/17916.feature index c0060779d09..7eec99fd0dc 100644 --- a/changelog.d/17916.feature +++ b/changelog.d/17916.feature @@ -1 +1,3 @@ -Module developers will have access to user id of requester when adding `check_username_for_spam` callbacks to `spam_checker_module_callbacks`. \ No newline at end of file +Module developers will have access to user id of requester when adding `check_username_for_spam` callbacks to `spam_checker_module_callbacks`. + +Contributed by Wilson@Pangea.chat \ No newline at end of file From b8d94e7fe78ed8923cea08438c5441559bec8325 Mon Sep 17 00:00:00 2001 From: WilsonLe Date: Thu, 14 Nov 2024 16:00:03 +0700 Subject: [PATCH 6/8] add stricter tests to check for requester id is correctly passed, and add comments on old tests on why keeping them is important --- tests/handlers/test_user_directory.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index ea59e50ca3f..eb9ab78c27d 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -796,6 +796,7 @@ def test_spam_checker(self) -> None: s = self.get_success(self.handler.search_users(u1, "user2", 10)) self.assertEqual(len(s["results"]), 1) + # Kept old spam checker without `requester_id` tests for backwards compatibility. async def allow_all(user_profile: UserProfile) -> bool: # Allow all users. return False @@ -809,6 +810,7 @@ async def allow_all(user_profile: UserProfile) -> bool: s = self.get_success(self.handler.search_users(u1, "user2", 10)) self.assertEqual(len(s["results"]), 1) + # Kept old spam checker without `requester_id` tests for backwards compatibility. # Configure a spam checker that filters all users. async def block_all(user_profile: UserProfile) -> bool: # All users are spammy. @@ -823,6 +825,7 @@ async def block_all(user_profile: UserProfile) -> bool: async def allow_all_expects_requester_id( user_profile: UserProfile, requester_id: str ) -> bool: + self.assertIsInstance(requester_id, str) # Allow all users. return False @@ -841,6 +844,7 @@ async def allow_all_expects_requester_id( async def block_all_expects_requester_id( user_profile: UserProfile, requester_id: str ) -> bool: + self.assertIsInstance(requester_id, str) # All users are spammy. return True From 97dcd01da922c3085d140bd7c6a583c1f50b6b1e Mon Sep 17 00:00:00 2001 From: Wilson Date: Fri, 15 Nov 2024 02:45:06 -0500 Subject: [PATCH 7/8] Update changelog.d/17916.feature Co-authored-by: Patrick Cloke --- changelog.d/17916.feature | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/changelog.d/17916.feature b/changelog.d/17916.feature index 7eec99fd0dc..118997c5e59 100644 --- a/changelog.d/17916.feature +++ b/changelog.d/17916.feature @@ -1,3 +1 @@ -Module developers will have access to user id of requester when adding `check_username_for_spam` callbacks to `spam_checker_module_callbacks`. - -Contributed by Wilson@Pangea.chat \ No newline at end of file +Module developers will have access to user id of requester when adding `check_username_for_spam` callbacks to `spam_checker_module_callbacks`. Contributed by Wilson@Pangea.chat. \ No newline at end of file From fe20d061f3034345c7ba8983677bd3a2f298e26a Mon Sep 17 00:00:00 2001 From: WilsonLe Date: Mon, 18 Nov 2024 13:15:31 -0500 Subject: [PATCH 8/8] adjust unit tests for stricter tests, add documentation on what requester id is --- docs/modules/spam_checker_callbacks.md | 2 ++ tests/handlers/test_user_directory.py | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/modules/spam_checker_callbacks.md b/docs/modules/spam_checker_callbacks.md index a95db58832d..c7f8606fd07 100644 --- a/docs/modules/spam_checker_callbacks.md +++ b/docs/modules/spam_checker_callbacks.md @@ -264,6 +264,8 @@ The profile is represented as a dictionary with the following keys: The module is given a copy of the original dictionary, so modifying it from within the module cannot modify a user's profile when included in user directory search results. +The requester_id parameter is the ID of the user that called the user directory API. + If multiple modules implement this callback, they will be considered in order. If a callback returns `False`, Synapse falls through to the next one. The value of the first callback that does not return `False` will be used. If this happens, Synapse will not call diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index eb9ab78c27d..a75095a79f1 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -825,7 +825,7 @@ async def block_all(user_profile: UserProfile) -> bool: async def allow_all_expects_requester_id( user_profile: UserProfile, requester_id: str ) -> bool: - self.assertIsInstance(requester_id, str) + self.assertEqual(requester_id, u1) # Allow all users. return False @@ -844,7 +844,7 @@ async def allow_all_expects_requester_id( async def block_all_expects_requester_id( user_profile: UserProfile, requester_id: str ) -> bool: - self.assertIsInstance(requester_id, str) + self.assertEqual(requester_id, u1) # All users are spammy. return True