Skip to content

Commit

Permalink
fix: set is_marked_spam to True for SpamModeration.Status SPAM or S…
Browse files Browse the repository at this point in the history
…PAM_LIKELY

- fix tests
- add asdf & direnv to .gitignore and .dockerignore
  • Loading branch information
asuworks committed Nov 21, 2024
1 parent 0b8d8db commit 5b1ce46
Show file tree
Hide file tree
Showing 9 changed files with 167 additions and 111 deletions.
3 changes: 3 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
.direnv/
.tool-versions
.envrc
.git
.yarn/cache
.yarn/install-state.gz
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,7 @@ vignettes/*.pdf

# End of https://www.toptal.com/developers/gitignore/api/r

# asdf & direnv
.direnv/
.tool-versions
.envrc
49 changes: 27 additions & 22 deletions django/core/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
from django.shortcuts import redirect
from django.utils import timezone
from rest_framework import serializers
from rest_framework.decorators import action
from rest_framework.exceptions import NotFound
from rest_framework.response import Response
from rest_framework.decorators import action

from .models import SpamModeration
from .permissions import ViewRestrictedObjectPermissions, ModeratorPermissions
from .permissions import ModeratorPermissions, ViewRestrictedObjectPermissions

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -249,11 +249,11 @@ class SpamCatcherViewSetMixin:

def perform_create(self, serializer: serializers.Serializer):
super().perform_create(serializer)
self.handle_spam_detection(serializer)
self.create_or_update_spam_moderation_object(serializer)

def perform_update(self, serializer):
super().perform_update(serializer)
self.handle_spam_detection(serializer)
self.create_or_update_spam_moderation_object(serializer)

def _validate_content_object(self, instance):
# make sure that the instance has a spam_moderation attribute as well as the
Expand Down Expand Up @@ -294,10 +294,12 @@ def mark_spam(self, request, **kwargs):
spam_moderation.save()
return redirect(instance.get_list_url())

def handle_spam_detection(self, serializer: serializers.Serializer):
def create_or_update_spam_moderation_object(
self, serializer: serializers.Serializer
):
try:
self._validate_content_object(serializer.instance)
self._record_spam(
self._create_or_update_spam_moderation_object(
serializer.instance,
(
serializer.context["spam_context"]
Expand All @@ -308,24 +310,27 @@ def handle_spam_detection(self, serializer: serializers.Serializer):
except ValueError as e:
logger.warning("Cannot flag %s as spam: %s", serializer.instance, e)

def _record_spam(self, instance, spam_context: dict = None):
def _create_or_update_spam_moderation_object(
self, instance, spam_context: dict = None
):
content_type = ContentType.objects.get_for_model(type(instance))
default_status = (
SpamModeration.Status.SPAM_LIKELY
if spam_context
else SpamModeration.Status.SCHEDULED_FOR_CHECK
)
default_spam_moderation = {
"status": default_status,
"detection_method": (
spam_context.get("detection_method", "") if spam_context else ""
),
"detection_details": (
spam_context.get("detection_details", "") if spam_context else ""
),
}

# SpamModeration updates the content instance on save
spam_moderation, created = SpamModeration.objects.get_or_create(
SpamModeration.objects.update_or_create(
content_type=content_type,
object_id=instance.id,
defaults={
"status": SpamModeration.Status.SCHEDULED_FOR_CHECK,
"detection_method": (
spam_context["detection_method"] if spam_context else ""
),
"detection_details": (
spam_context["detection_details"] if spam_context else ""
),
},
defaults=default_spam_moderation,
)

if not created:
spam_moderation.status = SpamModeration.Status.SCHEDULED_FOR_CHECK
spam_moderation.save()
18 changes: 10 additions & 8 deletions django/core/models.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import logging
import pathlib
from datetime import timedelta
from enum import Enum
import logging
import pathlib

from allauth.account.models import EmailAddress
from django import forms
from django.conf import settings
from django.contrib.auth import get_user_model
from django.contrib.auth.models import Group, User
from django.contrib.postgres.fields import ArrayField
from django.contrib.contenttypes.fields import GenericForeignKey
from django.contrib.contenttypes.models import ContentType
from django.contrib.postgres.fields import ArrayField
from django.db import models, transaction
from django.urls import reverse
from django.utils import timezone
Expand Down Expand Up @@ -134,16 +134,15 @@ def deploy_environment(self):

class SpamModeration(models.Model):
class Status(models.TextChoices):
UNREVIEWED = "unreviewed", _("Unreviewed")
SPAM = "spam", _("Confirmed spam")
NOT_SPAM = "not_spam", _("Confirmed not spam")
SCHEDULED_FOR_CHECK = "scheduled_for_check", _("Scheduled for check by LLM")
SPAM_LIKELY = "spam_likely", _("Marked spam by LLM")
NOT_SPAM_LIKELY = "not_spam_likely", _("Marked as not spam by LLM")
SPAM_LIKELY = "spam_likely", _("Automatically marked as spam")
NOT_SPAM_LIKELY = "not_spam_likely", _("Automatically marked as not spam")

status = models.CharField(
choices=Status.choices,
default=Status.UNREVIEWED,
default=Status.SCHEDULED_FOR_CHECK,
max_length=32,
)
content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE)
Expand Down Expand Up @@ -195,7 +194,10 @@ def update_related_object(self):
related_object = self.content_object
if hasattr(related_object, "is_marked_spam"):
related_object.spam_moderation = self
related_object.is_marked_spam = self.status == self.Status.SPAM
related_object.is_marked_spam = self.status in {
self.Status.SPAM,
self.Status.SPAM_LIKELY,
}
related_object.save()

def __str__(self):
Expand Down
4 changes: 3 additions & 1 deletion django/core/settings/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,7 +527,9 @@ def is_test(self):
DISCOURSE_API_KEY = read_secret("discourse_api_key", "unconfigured")
DISCOURSE_API_USERNAME = os.getenv("DISCOURSE_API_USERNAME", "unconfigured")

LLM_SPAM_CHECK_API_KEY = read_secret("llm_spam_check_api_key", "unconfigured")
LLM_SPAM_CHECK_API_KEY = (
read_secret("llm_spam_check_api_key", "unconfigured") or "unconfigured"
)

# https://docs.djangoproject.com/en/4.2/ref/settings/#templates
TEMPLATES = [
Expand Down
64 changes: 48 additions & 16 deletions django/core/tests/test_views.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
import logging

from django.conf import settings
from rest_framework.test import APIClient

from django.urls import reverse
from django.test import TestCase
from django.urls import reverse
from rest_framework.test import APIClient

from core.models import ComsesGroups, Event, Job, SpamModeration
from core.tests.base import UserFactory
from core.tests.permissions_base import BaseViewSetTestCase
from core.views import EventViewSet, JobViewSet
from core.models import Job, Event, SpamModeration, ComsesGroups
from .base import JobFactory, EventFactory

from .base import EventFactory, JobFactory

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -162,9 +160,13 @@ def test_event_creation_with_honeypot_spam(self):
)
self.assertResponseCreated(response)
event = Event.objects.get(title=data["title"])
self.assertTrue(event.is_marked_spam)

self.assertIsNotNone(event.spam_moderation)
self.assertEqual(
event.spam_moderation.status, SpamModeration.Status.SPAM_LIKELY
)
self.assertEqual(event.spam_moderation.detection_method, "honeypot")
self.assertTrue(event.is_marked_spam)

def test_job_creation_with_timer_spam(self):
# FIXME: should incorporate how long a typical request takes to resolve
Expand All @@ -179,9 +181,11 @@ def test_job_creation_with_timer_spam(self):
)
self.assertResponseCreated(response)
job = Job.objects.get(title=data["title"])
self.assertTrue(job.is_marked_spam)

self.assertIsNotNone(job.spam_moderation)
self.assertEqual(job.spam_moderation.status, SpamModeration.Status.SPAM_LIKELY)
self.assertEqual(job.spam_moderation.detection_method, "form_submit_time")
self.assertTrue(job.is_marked_spam)

def test_mark_spam(self):
data = self.event_factory.get_request_data()
Expand All @@ -193,18 +197,29 @@ def test_mark_spam(self):
format="json",
)
event = Event.objects.get(title=data["title"])
self.assertIsNotNone(event.spam_moderation)
self.assertEqual(
event.spam_moderation.status, SpamModeration.Status.SCHEDULED_FOR_CHECK
)
# by default, all created objects will have is_marked_spam = False unless spam_moderation.status is explicitly SPAM or SPAM_LIKELY
self.assertFalse(event.is_marked_spam)
self.assertIsNone(event.spam_moderation)

response = self.client.post(
reverse("core:event-mark-spam", kwargs={"pk": event.id}),
data,
HTTP_ACCEPT="application/json",
format="json",
)

event.refresh_from_db()
# non-moderators cannot mark content as spam
# non-moderators cannot mark content as spam (set status to SPAM)
self.assertEquals(response.status_code, 403)
self.assertIsNotNone(event.spam_moderation)
self.assertEqual(
event.spam_moderation.status, SpamModeration.Status.SCHEDULED_FOR_CHECK
)
self.assertFalse(event.is_marked_spam)

# check moderator
self.client.login(
username=self.moderator.username, password=self.user_factory.password
Expand All @@ -217,12 +232,17 @@ def test_mark_spam(self):
format="json",
)
event.refresh_from_db()
self.assertTrue(event.is_marked_spam)
self.assertIsNotNone(event.spam_moderation)
self.assertEqual(event.spam_moderation.status, SpamModeration.Status.SPAM)
self.assertTrue(event.is_marked_spam)

event.mark_not_spam(self.moderator)
event.refresh_from_db()
self.assertFalse(event.is_marked_spam)

self.assertIsNotNone(event.spam_moderation)
self.assertEqual(event.spam_moderation.status, SpamModeration.Status.NOT_SPAM)
self.assertFalse(event.is_marked_spam)

# check superuser
self.client.login(
username=self.superuser.username, password=self.user_factory.password
Expand All @@ -234,9 +254,10 @@ def test_mark_spam(self):
format="json",
)
event.refresh_from_db()
self.assertTrue(event.is_marked_spam)

self.assertIsNotNone(event.spam_moderation)
self.assertEqual(event.spam_moderation.status, SpamModeration.Status.SPAM)
self.assertTrue(event.is_marked_spam)

def test_event_creation_without_spam(self):
data = self.event_factory.get_request_data()
Expand All @@ -248,8 +269,12 @@ def test_event_creation_without_spam(self):
)
self.assertResponseCreated(response)
event = Event.objects.get(title=data["title"])

self.assertIsNotNone(event.spam_moderation)
self.assertEqual(
event.spam_moderation.status, SpamModeration.Status.SCHEDULED_FOR_CHECK
)
self.assertFalse(event.is_marked_spam)
self.assertIsNone(event.spam_moderation)

def test_job_update_with_spam(self):
data = self.job_factory.get_request_data()
Expand All @@ -261,8 +286,13 @@ def test_job_update_with_spam(self):
)
self.assertResponseCreated(response)
job = Job.objects.get(title=data["title"])

self.assertIsNotNone(job.spam_moderation)
self.assertEqual(
job.spam_moderation.status, SpamModeration.Status.SCHEDULED_FOR_CHECK
)
self.assertFalse(job.is_marked_spam)
self.assertIsNone(job.spam_moderation)

data = self.job_factory.get_request_data(
honeypot_value="spammy content",
elapsed_time=settings.SPAM_LIKELY_SECONDS_THRESHOLD + 1,
Expand All @@ -274,9 +304,11 @@ def test_job_update_with_spam(self):
format="json",
)
job.refresh_from_db()
self.assertTrue(job.is_marked_spam)

self.assertIsNotNone(job.spam_moderation)
self.assertEqual(job.spam_moderation.status, SpamModeration.Status.SPAM_LIKELY)
self.assertEqual(job.spam_moderation.detection_method, "honeypot")
self.assertTrue(job.is_marked_spam)

def test_exclude_spam_from_public_views(self):
data = self.event_factory.get_request_data(honeypot_value="spammy content")
Expand Down
8 changes: 2 additions & 6 deletions django/curator/serializers.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
from rest_framework import serializers
from core.models import SpamModeration
from django.contrib.contenttypes.models import ContentType
from rest_framework import serializers
from core.models import Event, Job, SpamModeration
from django.contrib.contenttypes.models import ContentType

from core.models import Event, Job, SpamModeration
from library.models import Codebase


Expand Down Expand Up @@ -61,7 +57,7 @@ class Meta:


class SpamUpdateSerializer(serializers.Serializer):
object_id = serializers.IntegerField()
id = serializers.IntegerField()
is_spam = serializers.BooleanField()
spam_indicators = serializers.ListField(
child=serializers.CharField(), required=False
Expand Down
Loading

0 comments on commit 5b1ce46

Please sign in to comment.