From d443ef3baa2502f2d39d58b529d3596a8308a827 Mon Sep 17 00:00:00 2001 From: yashlamba Date: Mon, 11 Nov 2024 10:37:26 +0100 Subject: [PATCH] moderation: add rule results to dict and eval --- invenio.cfg | 8 +++++--- site/zenodo_rdm/moderation/config.py | 26 +++++++++++++------------- site/zenodo_rdm/moderation/handlers.py | 26 ++++++++++++++++---------- 3 files changed, 34 insertions(+), 26 deletions(-) diff --git a/invenio.cfg b/invenio.cfg index d211678a..4af9daa8 100644 --- a/invenio.cfg +++ b/invenio.cfg @@ -76,7 +76,7 @@ from zenodo_rdm.github.schemas import CitationMetadataSchema from zenodo_rdm.legacy.resources import record_serializers from zenodo_rdm.metrics.config import METRICS_CACHE_UPDATE_INTERVAL from zenodo_rdm.moderation.errors import UserBlockedException -from zenodo_rdm.moderation.handlers import CommunityScoreHandler, RecordScoreHandler +from zenodo_rdm.moderation.handlers import CommunityModerationHandler, RecordModerationHandler from zenodo_rdm.openaire.records.components import OpenAIREComponent from zenodo_rdm.permissions import ( ZenodoCommunityPermissionPolicy, @@ -817,11 +817,11 @@ RDM_RECORDS_SERVICE_COMPONENTS = DefaultRecordsComponents + [ """Addd OpenAIRE component to records service.""" RDM_CONTENT_MODERATION_HANDLERS = [ - RecordScoreHandler(), + RecordModerationHandler(), ] """Records content moderation handlers.""" RDM_COMMUNITY_CONTENT_MODERATION_HANDLERS = [ - CommunityScoreHandler(), + CommunityModerationHandler(), ] """Community content moderation handlers.""" @@ -1062,3 +1062,5 @@ COMMUNITIES_SHOW_BROWSE_MENU_ENTRY = True JOBS_ADMINISTRATION_ENABLED = True """Enable Jobs administration view.""" + +SPAM_DETECTOR_MODEL="spam-scikit:1.0.0" diff --git a/site/zenodo_rdm/moderation/config.py b/site/zenodo_rdm/moderation/config.py index 3ce2f997..a1be92b6 100644 --- a/site/zenodo_rdm/moderation/config.py +++ b/site/zenodo_rdm/moderation/config.py @@ -41,21 +41,21 @@ MODERATION_SPAM_FILE_EXTS = {"jpg", "jpeg", "pdf", "png", "jfif", "docx", "webp"} """Frequest spam file extensions.""" -MODERATION_RECORD_SCORE_RULES = [ - verified_user_rule, - links_rule, - files_rule, - text_sanitization_rule, - match_query_rule, -] +MODERATION_RECORD_SCORE_RULES = { + "verified-user-rule": verified_user_rule, + "links-rule": links_rule, + "files-rule": files_rule, + "text-sanitization-rule": text_sanitization_rule, + "match-query-rule": match_query_rule, +} """Scoring rules for record moderation.""" -MODERATION_COMMUNITY_SCORE_RULES = [ - links_rule, - text_sanitization_rule, - verified_user_rule, - match_query_rule, -] +MODERATION_COMMUNITY_SCORE_RULES = { + "links-rule": links_rule, + "text-sanitization-rule": text_sanitization_rule, + "verified-user-rule": verified_user_rule, + "match-query-rule": match_query_rule, +} """Scoring rules for communtiy moderation.""" MODERATION_PERCOLATOR_INDEX_PREFIX = "moderation-queries" diff --git a/site/zenodo_rdm/moderation/handlers.py b/site/zenodo_rdm/moderation/handlers.py index 2c041e3b..b7addb85 100644 --- a/site/zenodo_rdm/moderation/handlers.py +++ b/site/zenodo_rdm/moderation/handlers.py @@ -61,7 +61,7 @@ def on_post_rollback(self, uow): self.commit_op.on_post_commit(uow) -class BaseScoreHandler: +class BaseModerationHandler: """Base handler to calculate moderation scores based on rules.""" def __init__(self, rules=None): @@ -73,14 +73,17 @@ def rules(self): """Get scoring rules.""" if isinstance(self._rules, str): return current_app.config[self._rules] - return self._rules or [] + return self._rules or {} + + def evaluate_result(self, params): + return sum(params.values()) def run(self, identity, draft=None, record=None, uow=None): """Calculate the moderation score for a given record or draft.""" try: - score = 0 - for rule in self.rules: - score += rule(identity, draft=draft, record=record) + results = {} + for name, rule in self.rules.items(): + results[name] = rule(identity, draft=draft, record=record) # TODO: Move to caller handler method (i.e. publish, create, update, etc.) user_id = None @@ -100,10 +103,11 @@ def run(self, identity, draft=None, record=None, uow=None): action_ctx = { "user_id": user_id, "record_pid": record.pid.pid_value, - "score": score, + "results": results, } apply_actions = current_app.config.get("MODERATION_APPLY_ACTIONS", False) - if score > current_scores.spam_threshold: + + if self.evaluate_result(results) > current_scores.spam_threshold: action_ctx["action"] = "block" if apply_actions: uow.register( @@ -132,7 +136,7 @@ def run(self, identity, draft=None, record=None, uow=None): "Block moderation action triggered", extra=action_ctx, ) - elif score < current_scores.ham_threshold: + elif self.evaluate_result(results) < current_scores.ham_threshold: # If the user is already verified, we don't need to verify again if user.verified: return @@ -176,7 +180,7 @@ def run(self, identity, draft=None, record=None, uow=None): current_app.logger.exception("Error calculating moderation score") -class RecordScoreHandler(BaseHandler, BaseScoreHandler): +class RecordModerationHandler(BaseHandler, BaseModerationHandler): """Handler for calculating scores for records.""" def __init__(self): @@ -188,7 +192,9 @@ def publish(self, identity, draft=None, record=None, uow=None, **kwargs): self.run(identity, record=record, uow=uow) -class CommunityScoreHandler(community_moderation.BaseHandler, BaseScoreHandler): +class CommunityModerationHandler( + community_moderation.BaseHandler, BaseModerationHandler +): """Handler for calculating scores for communities.""" def __init__(self):