Skip to content

Commit

Permalink
moderation: added query match rule
Browse files Browse the repository at this point in the history
  • Loading branch information
0einstein0 committed Nov 6, 2024
1 parent a6e1e5b commit b2983dc
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 3 deletions.
5 changes: 4 additions & 1 deletion site/zenodo_rdm/moderation/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

"""Moderation config."""

from .rules import files_rule, links_rule, text_sanitization_rule, verified_user_rule
from .rules import (files_rule, links_rule, match_query_rule,
text_sanitization_rule, verified_user_rule)

MODERATION_BANNED_LINK_DOMAINS = []
"""Banned domains for links."""
Expand Down Expand Up @@ -46,12 +47,14 @@
links_rule,
files_rule,
text_sanitization_rule,
match_query_rule,
]
"""Scoring rules for record moderation."""

MODERATION_COMMUNITY_SCORE_RULES = [
links_rule,
text_sanitization_rule,
verified_user_rule,
match_query_rule,
]
"""Scoring rules for communtiy moderation."""
3 changes: 2 additions & 1 deletion site/zenodo_rdm/moderation/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from invenio_requests.tasks import request_moderation
from invenio_users_resources.proxies import current_users_service
from invenio_users_resources.records.api import UserAggregate
from invenio_users_resources.services.users.tasks import execute_moderation_actions
from invenio_users_resources.services.users.tasks import \
execute_moderation_actions
from werkzeug.utils import cached_property

from .errors import UserBlockedException
Expand Down
68 changes: 68 additions & 0 deletions site/zenodo_rdm/moderation/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2024 CERN.
#
# ZenodoRDM is free software; you can redistribute it and/or modify
# it under the terms of the MIT License; see LICENSE file for more details.

"""Moderation models."""

from invenio_db import db
from invenio_search import current_search_client


class ModerationQuery(db.Model):
"""Moderation queries model."""

__tablename__ = "moderation_queries"

id = db.Column(db.Integer, primary_key=True, autoincrement=True)
"""Primary key identifier for the moderation query."""

score = db.Column(db.Integer, default=0)
"""Score associated with the query."""

query_string = db.Column(db.Text, nullable=False)
"""Query string containing the filter criteria."""

notes = db.Column(db.Text, nullable=True)
"""Additional notes or comments regarding the moderation query."""

active = db.Column(db.Boolean, default=True)
"""Indicates whether the moderation query is currently active."""

@classmethod
def create(cls, query_string, notes=None, score=0, active=True):
"""Create a new moderation query."""
query = cls(query_string=query_string, notes=notes, score=score, active=active)
db.session.add(query)

try:
current_search_client.index(
index="moderation-queries",
# id=query.id,
body={
"query": {"query_string": {"query": query_string}},
"active": active,
"score": score,
"notes": notes,
},
)
except Exception as e:
print(query_string)
print(f"Failed to index query: {e}")

return query

@classmethod
def get(cls, query_id=None):
"""Retrieve a moderation query by ID or return all queries if no ID is provided."""
if query_id is not None:
return cls.query.filter_by(id=query_id).one_or_none()
return cls.query.all()

def __repr__(self):
"""Get a string representation of the moderation query."""
return (
f"<ModerationQuery id={self.id}, score={self.score}, active={self.active}>"
)
19 changes: 18 additions & 1 deletion site/zenodo_rdm/moderation/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import re

from flask import current_app

from invenio_search import current_search_client
from zenodo_rdm.moderation.proxies import current_domain_tree

from .proxies import current_domain_tree, current_scores
Expand Down Expand Up @@ -123,3 +123,20 @@ def files_rule(identity, draft=None, record=None):
score += current_scores.ham_files

return score


def match_query_rule(identity, draft=None, record=None):
"""Calculate a score based on matched percolate queries against the record."""
breakpoint()
matched_queries = current_search_client.search(
index="moderation-queries",
body={"query": {"percolate": {"field": "query", "document": record.dumps()}}},
)
breakpoint()
score = 0

for hit in matched_queries["hits"]["hits"]:
query_score = hit["_source"].get("score", 0)
score += query_score

return score

0 comments on commit b2983dc

Please sign in to comment.