Skip to content

Commit

Permalink
moderation: add rule results to dict and eval
Browse files Browse the repository at this point in the history
  • Loading branch information
yashlamba committed Nov 11, 2024
1 parent 6210c61 commit d443ef3
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 26 deletions.
8 changes: 5 additions & 3 deletions invenio.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ from zenodo_rdm.github.schemas import CitationMetadataSchema
from zenodo_rdm.legacy.resources import record_serializers
from zenodo_rdm.metrics.config import METRICS_CACHE_UPDATE_INTERVAL
from zenodo_rdm.moderation.errors import UserBlockedException
from zenodo_rdm.moderation.handlers import CommunityScoreHandler, RecordScoreHandler
from zenodo_rdm.moderation.handlers import CommunityModerationHandler, RecordModerationHandler
from zenodo_rdm.openaire.records.components import OpenAIREComponent
from zenodo_rdm.permissions import (
ZenodoCommunityPermissionPolicy,
Expand Down Expand Up @@ -817,11 +817,11 @@ RDM_RECORDS_SERVICE_COMPONENTS = DefaultRecordsComponents + [
"""Addd OpenAIRE component to records service."""

RDM_CONTENT_MODERATION_HANDLERS = [
RecordScoreHandler(),
RecordModerationHandler(),
]
"""Records content moderation handlers."""
RDM_COMMUNITY_CONTENT_MODERATION_HANDLERS = [
CommunityScoreHandler(),
CommunityModerationHandler(),
]
"""Community content moderation handlers."""

Expand Down Expand Up @@ -1062,3 +1062,5 @@ COMMUNITIES_SHOW_BROWSE_MENU_ENTRY = True

JOBS_ADMINISTRATION_ENABLED = True
"""Enable Jobs administration view."""

SPAM_DETECTOR_MODEL="spam-scikit:1.0.0"
26 changes: 13 additions & 13 deletions site/zenodo_rdm/moderation/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,21 +41,21 @@
MODERATION_SPAM_FILE_EXTS = {"jpg", "jpeg", "pdf", "png", "jfif", "docx", "webp"}
"""Frequest spam file extensions."""

MODERATION_RECORD_SCORE_RULES = [
verified_user_rule,
links_rule,
files_rule,
text_sanitization_rule,
match_query_rule,
]
MODERATION_RECORD_SCORE_RULES = {
"verified-user-rule": verified_user_rule,
"links-rule": links_rule,
"files-rule": files_rule,
"text-sanitization-rule": text_sanitization_rule,
"match-query-rule": match_query_rule,
}
"""Scoring rules for record moderation."""

MODERATION_COMMUNITY_SCORE_RULES = [
links_rule,
text_sanitization_rule,
verified_user_rule,
match_query_rule,
]
MODERATION_COMMUNITY_SCORE_RULES = {
"links-rule": links_rule,
"text-sanitization-rule": text_sanitization_rule,
"verified-user-rule": verified_user_rule,
"match-query-rule": match_query_rule,
}
"""Scoring rules for communtiy moderation."""

MODERATION_PERCOLATOR_INDEX_PREFIX = "moderation-queries"
Expand Down
26 changes: 16 additions & 10 deletions site/zenodo_rdm/moderation/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def on_post_rollback(self, uow):
self.commit_op.on_post_commit(uow)


class BaseScoreHandler:
class BaseModerationHandler:
"""Base handler to calculate moderation scores based on rules."""

def __init__(self, rules=None):
Expand All @@ -73,14 +73,17 @@ def rules(self):
"""Get scoring rules."""
if isinstance(self._rules, str):
return current_app.config[self._rules]
return self._rules or []
return self._rules or {}

def evaluate_result(self, params):
return sum(params.values())

def run(self, identity, draft=None, record=None, uow=None):
"""Calculate the moderation score for a given record or draft."""
try:
score = 0
for rule in self.rules:
score += rule(identity, draft=draft, record=record)
results = {}
for name, rule in self.rules.items():
results[name] = rule(identity, draft=draft, record=record)

# TODO: Move to caller handler method (i.e. publish, create, update, etc.)
user_id = None
Expand All @@ -100,10 +103,11 @@ def run(self, identity, draft=None, record=None, uow=None):
action_ctx = {
"user_id": user_id,
"record_pid": record.pid.pid_value,
"score": score,
"results": results,
}
apply_actions = current_app.config.get("MODERATION_APPLY_ACTIONS", False)
if score > current_scores.spam_threshold:

if self.evaluate_result(results) > current_scores.spam_threshold:
action_ctx["action"] = "block"
if apply_actions:
uow.register(
Expand Down Expand Up @@ -132,7 +136,7 @@ def run(self, identity, draft=None, record=None, uow=None):
"Block moderation action triggered",
extra=action_ctx,
)
elif score < current_scores.ham_threshold:
elif self.evaluate_result(results) < current_scores.ham_threshold:
# If the user is already verified, we don't need to verify again
if user.verified:
return
Expand Down Expand Up @@ -176,7 +180,7 @@ def run(self, identity, draft=None, record=None, uow=None):
current_app.logger.exception("Error calculating moderation score")


class RecordScoreHandler(BaseHandler, BaseScoreHandler):
class RecordModerationHandler(BaseHandler, BaseModerationHandler):
"""Handler for calculating scores for records."""

def __init__(self):
Expand All @@ -188,7 +192,9 @@ def publish(self, identity, draft=None, record=None, uow=None, **kwargs):
self.run(identity, record=record, uow=uow)


class CommunityScoreHandler(community_moderation.BaseHandler, BaseScoreHandler):
class CommunityModerationHandler(
community_moderation.BaseHandler, BaseModerationHandler
):
"""Handler for calculating scores for communities."""

def __init__(self):
Expand Down

0 comments on commit d443ef3

Please sign in to comment.