From 5b1ce46b3503e534d41e36232dc0de561a67e619 Mon Sep 17 00:00:00 2001 From: Anton Suharev Date: Tue, 21 May 2024 16:12:54 -0700 Subject: [PATCH] fix: set `is_marked_spam` to True for SpamModeration.Status SPAM or SPAM_LIKELY - fix tests - add asdf & direnv to .gitignore and .dockerignore --- .dockerignore | 3 + .gitignore | 4 + django/core/mixins.py | 49 +++++----- django/core/models.py | 18 ++-- django/core/settings/defaults.py | 4 +- django/core/tests/test_views.py | 64 +++++++++---- django/curator/serializers.py | 8 +- .../curator/tests/test_llm_spam_moderation.py | 91 +++++++++++-------- django/curator/views.py | 37 +++----- 9 files changed, 167 insertions(+), 111 deletions(-) diff --git a/.dockerignore b/.dockerignore index 877b7bb5d..967d7240e 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,3 +1,6 @@ +.direnv/ +.tool-versions +.envrc .git .yarn/cache .yarn/install-state.gz diff --git a/.gitignore b/.gitignore index 848556075..f8101620f 100644 --- a/.gitignore +++ b/.gitignore @@ -180,3 +180,7 @@ vignettes/*.pdf # End of https://www.toptal.com/developers/gitignore/api/r +# asdf & direnv +.direnv/ +.tool-versions +.envrc \ No newline at end of file diff --git a/django/core/mixins.py b/django/core/mixins.py index 8738ddb55..19ed9a49b 100644 --- a/django/core/mixins.py +++ b/django/core/mixins.py @@ -7,12 +7,12 @@ from django.shortcuts import redirect from django.utils import timezone from rest_framework import serializers +from rest_framework.decorators import action from rest_framework.exceptions import NotFound from rest_framework.response import Response -from rest_framework.decorators import action from .models import SpamModeration -from .permissions import ViewRestrictedObjectPermissions, ModeratorPermissions +from .permissions import ModeratorPermissions, ViewRestrictedObjectPermissions logger = logging.getLogger(__name__) @@ -249,11 +249,11 @@ class SpamCatcherViewSetMixin: def perform_create(self, serializer: serializers.Serializer): super().perform_create(serializer) - self.handle_spam_detection(serializer) + self.create_or_update_spam_moderation_object(serializer) def perform_update(self, serializer): super().perform_update(serializer) - self.handle_spam_detection(serializer) + self.create_or_update_spam_moderation_object(serializer) def _validate_content_object(self, instance): # make sure that the instance has a spam_moderation attribute as well as the @@ -294,10 +294,12 @@ def mark_spam(self, request, **kwargs): spam_moderation.save() return redirect(instance.get_list_url()) - def handle_spam_detection(self, serializer: serializers.Serializer): + def create_or_update_spam_moderation_object( + self, serializer: serializers.Serializer + ): try: self._validate_content_object(serializer.instance) - self._record_spam( + self._create_or_update_spam_moderation_object( serializer.instance, ( serializer.context["spam_context"] @@ -308,24 +310,27 @@ def handle_spam_detection(self, serializer: serializers.Serializer): except ValueError as e: logger.warning("Cannot flag %s as spam: %s", serializer.instance, e) - def _record_spam(self, instance, spam_context: dict = None): + def _create_or_update_spam_moderation_object( + self, instance, spam_context: dict = None + ): content_type = ContentType.objects.get_for_model(type(instance)) + default_status = ( + SpamModeration.Status.SPAM_LIKELY + if spam_context + else SpamModeration.Status.SCHEDULED_FOR_CHECK + ) + default_spam_moderation = { + "status": default_status, + "detection_method": ( + spam_context.get("detection_method", "") if spam_context else "" + ), + "detection_details": ( + spam_context.get("detection_details", "") if spam_context else "" + ), + } - # SpamModeration updates the content instance on save - spam_moderation, created = SpamModeration.objects.get_or_create( + SpamModeration.objects.update_or_create( content_type=content_type, object_id=instance.id, - defaults={ - "status": SpamModeration.Status.SCHEDULED_FOR_CHECK, - "detection_method": ( - spam_context["detection_method"] if spam_context else "" - ), - "detection_details": ( - spam_context["detection_details"] if spam_context else "" - ), - }, + defaults=default_spam_moderation, ) - - if not created: - spam_moderation.status = SpamModeration.Status.SCHEDULED_FOR_CHECK - spam_moderation.save() diff --git a/django/core/models.py b/django/core/models.py index fa1cb2858..8ddb3d305 100644 --- a/django/core/models.py +++ b/django/core/models.py @@ -1,16 +1,16 @@ -import logging -import pathlib from datetime import timedelta from enum import Enum +import logging +import pathlib from allauth.account.models import EmailAddress from django import forms from django.conf import settings from django.contrib.auth import get_user_model from django.contrib.auth.models import Group, User -from django.contrib.postgres.fields import ArrayField from django.contrib.contenttypes.fields import GenericForeignKey from django.contrib.contenttypes.models import ContentType +from django.contrib.postgres.fields import ArrayField from django.db import models, transaction from django.urls import reverse from django.utils import timezone @@ -134,16 +134,15 @@ def deploy_environment(self): class SpamModeration(models.Model): class Status(models.TextChoices): - UNREVIEWED = "unreviewed", _("Unreviewed") SPAM = "spam", _("Confirmed spam") NOT_SPAM = "not_spam", _("Confirmed not spam") SCHEDULED_FOR_CHECK = "scheduled_for_check", _("Scheduled for check by LLM") - SPAM_LIKELY = "spam_likely", _("Marked spam by LLM") - NOT_SPAM_LIKELY = "not_spam_likely", _("Marked as not spam by LLM") + SPAM_LIKELY = "spam_likely", _("Automatically marked as spam") + NOT_SPAM_LIKELY = "not_spam_likely", _("Automatically marked as not spam") status = models.CharField( choices=Status.choices, - default=Status.UNREVIEWED, + default=Status.SCHEDULED_FOR_CHECK, max_length=32, ) content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE) @@ -195,7 +194,10 @@ def update_related_object(self): related_object = self.content_object if hasattr(related_object, "is_marked_spam"): related_object.spam_moderation = self - related_object.is_marked_spam = self.status == self.Status.SPAM + related_object.is_marked_spam = self.status in { + self.Status.SPAM, + self.Status.SPAM_LIKELY, + } related_object.save() def __str__(self): diff --git a/django/core/settings/defaults.py b/django/core/settings/defaults.py index ecdcfe84c..405f65baa 100644 --- a/django/core/settings/defaults.py +++ b/django/core/settings/defaults.py @@ -527,7 +527,9 @@ def is_test(self): DISCOURSE_API_KEY = read_secret("discourse_api_key", "unconfigured") DISCOURSE_API_USERNAME = os.getenv("DISCOURSE_API_USERNAME", "unconfigured") -LLM_SPAM_CHECK_API_KEY = read_secret("llm_spam_check_api_key", "unconfigured") +LLM_SPAM_CHECK_API_KEY = ( + read_secret("llm_spam_check_api_key", "unconfigured") or "unconfigured" +) # https://docs.djangoproject.com/en/4.2/ref/settings/#templates TEMPLATES = [ diff --git a/django/core/tests/test_views.py b/django/core/tests/test_views.py index 2cb6eacb3..d267e98f2 100644 --- a/django/core/tests/test_views.py +++ b/django/core/tests/test_views.py @@ -1,17 +1,15 @@ import logging from django.conf import settings -from rest_framework.test import APIClient - -from django.urls import reverse from django.test import TestCase +from django.urls import reverse +from rest_framework.test import APIClient +from core.models import ComsesGroups, Event, Job, SpamModeration from core.tests.base import UserFactory from core.tests.permissions_base import BaseViewSetTestCase from core.views import EventViewSet, JobViewSet -from core.models import Job, Event, SpamModeration, ComsesGroups -from .base import JobFactory, EventFactory - +from .base import EventFactory, JobFactory logger = logging.getLogger(__name__) @@ -162,9 +160,13 @@ def test_event_creation_with_honeypot_spam(self): ) self.assertResponseCreated(response) event = Event.objects.get(title=data["title"]) - self.assertTrue(event.is_marked_spam) + self.assertIsNotNone(event.spam_moderation) + self.assertEqual( + event.spam_moderation.status, SpamModeration.Status.SPAM_LIKELY + ) self.assertEqual(event.spam_moderation.detection_method, "honeypot") + self.assertTrue(event.is_marked_spam) def test_job_creation_with_timer_spam(self): # FIXME: should incorporate how long a typical request takes to resolve @@ -179,9 +181,11 @@ def test_job_creation_with_timer_spam(self): ) self.assertResponseCreated(response) job = Job.objects.get(title=data["title"]) - self.assertTrue(job.is_marked_spam) + self.assertIsNotNone(job.spam_moderation) + self.assertEqual(job.spam_moderation.status, SpamModeration.Status.SPAM_LIKELY) self.assertEqual(job.spam_moderation.detection_method, "form_submit_time") + self.assertTrue(job.is_marked_spam) def test_mark_spam(self): data = self.event_factory.get_request_data() @@ -193,18 +197,29 @@ def test_mark_spam(self): format="json", ) event = Event.objects.get(title=data["title"]) + self.assertIsNotNone(event.spam_moderation) + self.assertEqual( + event.spam_moderation.status, SpamModeration.Status.SCHEDULED_FOR_CHECK + ) + # by default, all created objects will have is_marked_spam = False unless spam_moderation.status is explicitly SPAM or SPAM_LIKELY self.assertFalse(event.is_marked_spam) - self.assertIsNone(event.spam_moderation) + response = self.client.post( reverse("core:event-mark-spam", kwargs={"pk": event.id}), data, HTTP_ACCEPT="application/json", format="json", ) + event.refresh_from_db() - # non-moderators cannot mark content as spam + # non-moderators cannot mark content as spam (set status to SPAM) self.assertEquals(response.status_code, 403) + self.assertIsNotNone(event.spam_moderation) + self.assertEqual( + event.spam_moderation.status, SpamModeration.Status.SCHEDULED_FOR_CHECK + ) self.assertFalse(event.is_marked_spam) + # check moderator self.client.login( username=self.moderator.username, password=self.user_factory.password @@ -217,12 +232,17 @@ def test_mark_spam(self): format="json", ) event.refresh_from_db() - self.assertTrue(event.is_marked_spam) self.assertIsNotNone(event.spam_moderation) + self.assertEqual(event.spam_moderation.status, SpamModeration.Status.SPAM) + self.assertTrue(event.is_marked_spam) + event.mark_not_spam(self.moderator) event.refresh_from_db() - self.assertFalse(event.is_marked_spam) + self.assertIsNotNone(event.spam_moderation) + self.assertEqual(event.spam_moderation.status, SpamModeration.Status.NOT_SPAM) + self.assertFalse(event.is_marked_spam) + # check superuser self.client.login( username=self.superuser.username, password=self.user_factory.password @@ -234,9 +254,10 @@ def test_mark_spam(self): format="json", ) event.refresh_from_db() - self.assertTrue(event.is_marked_spam) + self.assertIsNotNone(event.spam_moderation) self.assertEqual(event.spam_moderation.status, SpamModeration.Status.SPAM) + self.assertTrue(event.is_marked_spam) def test_event_creation_without_spam(self): data = self.event_factory.get_request_data() @@ -248,8 +269,12 @@ def test_event_creation_without_spam(self): ) self.assertResponseCreated(response) event = Event.objects.get(title=data["title"]) + + self.assertIsNotNone(event.spam_moderation) + self.assertEqual( + event.spam_moderation.status, SpamModeration.Status.SCHEDULED_FOR_CHECK + ) self.assertFalse(event.is_marked_spam) - self.assertIsNone(event.spam_moderation) def test_job_update_with_spam(self): data = self.job_factory.get_request_data() @@ -261,8 +286,13 @@ def test_job_update_with_spam(self): ) self.assertResponseCreated(response) job = Job.objects.get(title=data["title"]) + + self.assertIsNotNone(job.spam_moderation) + self.assertEqual( + job.spam_moderation.status, SpamModeration.Status.SCHEDULED_FOR_CHECK + ) self.assertFalse(job.is_marked_spam) - self.assertIsNone(job.spam_moderation) + data = self.job_factory.get_request_data( honeypot_value="spammy content", elapsed_time=settings.SPAM_LIKELY_SECONDS_THRESHOLD + 1, @@ -274,9 +304,11 @@ def test_job_update_with_spam(self): format="json", ) job.refresh_from_db() - self.assertTrue(job.is_marked_spam) + self.assertIsNotNone(job.spam_moderation) + self.assertEqual(job.spam_moderation.status, SpamModeration.Status.SPAM_LIKELY) self.assertEqual(job.spam_moderation.detection_method, "honeypot") + self.assertTrue(job.is_marked_spam) def test_exclude_spam_from_public_views(self): data = self.event_factory.get_request_data(honeypot_value="spammy content") diff --git a/django/curator/serializers.py b/django/curator/serializers.py index 30ec7bd28..778fd3cf8 100644 --- a/django/curator/serializers.py +++ b/django/curator/serializers.py @@ -1,10 +1,6 @@ from rest_framework import serializers -from core.models import SpamModeration -from django.contrib.contenttypes.models import ContentType -from rest_framework import serializers -from core.models import Event, Job, SpamModeration -from django.contrib.contenttypes.models import ContentType +from core.models import Event, Job, SpamModeration from library.models import Codebase @@ -61,7 +57,7 @@ class Meta: class SpamUpdateSerializer(serializers.Serializer): - object_id = serializers.IntegerField() + id = serializers.IntegerField() is_spam = serializers.BooleanField() spam_indicators = serializers.ListField( child=serializers.CharField(), required=False diff --git a/django/curator/tests/test_llm_spam_moderation.py b/django/curator/tests/test_llm_spam_moderation.py index 80fda1846..f385d2003 100644 --- a/django/curator/tests/test_llm_spam_moderation.py +++ b/django/curator/tests/test_llm_spam_moderation.py @@ -2,12 +2,12 @@ from django.conf import settings from django.utils import timezone - -from rest_framework.test import APIClient from rest_framework import status +from rest_framework.test import APIClient -from core.models import SpamModeration +from core.models import Event, Job, SpamModeration from core.tests.base import BaseModelTestCase, EventFactory, JobFactory +from library.models import Codebase from library.tests.base import CodebaseFactory @@ -21,11 +21,11 @@ def setUp(self): self.job_factory = JobFactory(submitter=self.user) self.event_factory = EventFactory(submitter=self.user) + self.codebase_factory = CodebaseFactory(submitter=self.user) today = timezone.now() # Create test objects - self.job = self.job_factory.create( application_deadline=today + timedelta(days=30), title="Test Job", @@ -38,8 +38,7 @@ def setUp(self): title="Test Event", ) - codebase_factory = CodebaseFactory(submitter=self.user) - self.codebase = codebase_factory.create( + self.codebase = self.codebase_factory.create( title="Test Codebase", description="Codebase Description" ) @@ -70,20 +69,30 @@ def test_authentication_required(self): self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) response = self.client.post( - "/api/spam/update/", {"object_id": 1, "is_spam": True}, format="json" + "/api/spam/update/", + {"id": self.event.spam_moderation.id, "is_spam": True}, + format="json", ) self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) # Test with correct API key self.client.credentials(HTTP_X_API_KEY=self.api_key) - response = self.client.get("/api/spam/get-latest-batch/") self.assertEqual(response.status_code, status.HTTP_200_OK) + event = Event.objects.get(id=self.event.id) response = self.client.post( - "/api/spam/update/", {"object_id": 1, "is_spam": True}, format="json" + "/api/spam/update/", + {"id": event.spam_moderation.id, "is_spam": True}, + format="json", ) self.assertEqual(response.status_code, status.HTTP_200_OK) + event.refresh_from_db() + self.assertIsNotNone(event.spam_moderation) + self.assertEqual( + event.spam_moderation.status, SpamModeration.Status.SPAM_LIKELY + ) + self.assertTrue(event.is_marked_spam) def test_get_latest_spam_batch(self): self.client.credentials(HTTP_X_API_KEY=self.api_key) @@ -112,19 +121,20 @@ def test_get_latest_spam_batch_empty(self): self.client.credentials(HTTP_X_API_KEY=self.api_key) # Change all SpamModeration statuses - SpamModeration.objects.all().update(status=SpamModeration.Status.SPAM) + SpamModeration.objects.all().update(status=SpamModeration.Status.NOT_SPAM) response = self.client.get("/api/spam/get-latest-batch/") self.assertEqual(response.status_code, status.HTTP_200_OK) data = response.json() - self.assertEqual(len(data), 0) # We expect an empty list + # only objects with status SCHEDULED_FOR_CHECK should be present in the batch, so we expect an empty list here + self.assertEqual(len(data), 0) def test_update_spam_moderation_success(self): self.client.credentials(HTTP_X_API_KEY=self.api_key) data = { - "object_id": self.job_spam.id, + "id": self.job.spam_moderation.id, "is_spam": True, "spam_indicators": ["indicator1", "indicator2"], "reasoning": "Test reasoning", @@ -133,29 +143,31 @@ def test_update_spam_moderation_success(self): response = self.client.post("/api/spam/update/", data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertEqual( - response.json()["message"], "SpamModeration updated successfully" - ) - # Check if SpamModeration object was updated correctly - updated_spam = SpamModeration.objects.get(id=self.job_spam.id) - self.assertEqual(updated_spam.status, SpamModeration.Status.SPAM_LIKELY) - self.assertEqual(updated_spam.detection_method, "LLM") + job = Job.objects.get(id=self.job.id) + + # Check if SpamModeration object was updated + self.assertIsNotNone(job.spam_moderation) + self.assertEqual(job.spam_moderation.status, SpamModeration.Status.SPAM_LIKELY) + self.assertTrue(job.is_marked_spam) + self.assertEqual(job.spam_moderation.detection_method, "LLM") self.assertEqual( - updated_spam.detection_details["spam_indicators"], + job.spam_moderation.detection_details["spam_indicators"], ["indicator1", "indicator2"], ) - self.assertEqual(updated_spam.detection_details["reasoning"], "Test reasoning") - self.assertEqual(updated_spam.detection_details["confidence"], 0.9) + self.assertEqual( + job.spam_moderation.detection_details["reasoning"], "Test reasoning" + ) + self.assertEqual(job.spam_moderation.detection_details["confidence"], 0.9) # Check if related content object was updated - self.assertTrue(updated_spam.content_object.is_marked_spam) + self.assertTrue(job.is_marked_spam) def test_update_spam_moderation_not_spam(self): self.client.credentials(HTTP_X_API_KEY=self.api_key) data = { - "object_id": self.event_spam.id, + "id": self.event.spam_moderation.id, "is_spam": False, "reasoning": "Not spam reasoning", "confidence": 0.8, @@ -164,14 +176,16 @@ def test_update_spam_moderation_not_spam(self): response = self.client.post("/api/spam/update/", data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) - updated_spam = SpamModeration.objects.get(id=self.event_spam.id) - self.assertEqual(updated_spam.status, SpamModeration.Status.NOT_SPAM_LIKELY) - self.assertFalse(updated_spam.content_object.is_marked_spam) + event = Event.objects.get(id=self.event.id) + self.assertEqual( + event.spam_moderation.status, SpamModeration.Status.NOT_SPAM_LIKELY + ) + self.assertFalse(event.is_marked_spam) def test_update_spam_moderation_not_found(self): self.client.credentials(HTTP_X_API_KEY=self.api_key) - data = {"object_id": 99999, "is_spam": True} # Non-existent ID + data = {"id": 99999, "is_spam": True} # Non-existent ID response = self.client.post("/api/spam/update/", data, format="json") self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) @@ -180,7 +194,7 @@ def test_update_spam_moderation_invalid_data(self): self.client.credentials(HTTP_X_API_KEY=self.api_key) data = { - "object_id": self.codebase_spam.id, + "id": self.codebase_spam.id, # Missing required 'is_spam' field } @@ -191,7 +205,7 @@ def test_update_spam_moderation_partial_update(self): self.client.credentials(HTTP_X_API_KEY=self.api_key) data = { - "object_id": self.job_spam.id, + "id": self.codebase_spam.id, "is_spam": True, # Only providing partial data } @@ -199,9 +213,14 @@ def test_update_spam_moderation_partial_update(self): response = self.client.post("/api/spam/update/", data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) - updated_spam = SpamModeration.objects.get(id=self.job_spam.id) - - self.assertEqual(updated_spam.status, SpamModeration.Status.SPAM_LIKELY) - self.assertEqual(updated_spam.detection_details.get("spam_indicators"), []) - self.assertEqual(updated_spam.detection_details.get("reasoning"), "") - self.assertIsNone(updated_spam.detection_details.get("confidence")) + codebase = Codebase.objects.get(id=self.codebase.id) + self.assertEqual( + codebase.spam_moderation.status, SpamModeration.Status.SPAM_LIKELY + ) + self.assertEqual( + codebase.spam_moderation.detection_details.get("spam_indicators"), [] + ) + self.assertEqual( + codebase.spam_moderation.detection_details.get("reasoning"), "" + ) + self.assertIsNone(codebase.spam_moderation.detection_details.get("confidence")) diff --git a/django/curator/views.py b/django/curator/views.py index a49828c2d..e3dbb650a 100644 --- a/django/curator/views.py +++ b/django/curator/views.py @@ -1,25 +1,26 @@ -from django.contrib.auth.decorators import permission_required +import bleach from django.contrib import messages -from django.shortcuts import get_object_or_404 -from django.http import HttpResponseRedirect, HttpResponseBadRequest -from django.views.decorators.http import require_POST -from wagtail_modeladmin.helpers import AdminURLHelper +from django.contrib.auth.decorators import permission_required from django.core.exceptions import ObjectDoesNotExist +from django.http import HttpResponseBadRequest, HttpResponseRedirect from django.shortcuts import get_object_or_404 - - +from django.views.decorators.http import require_POST +from djangorestframework_camel_case.render import CamelCaseJSONRenderer +from rest_framework import status from rest_framework.decorators import ( api_view, - permission_classes, authentication_classes, + permission_classes, + renderer_classes, ) -from rest_framework.response import Response -from rest_framework import status -from curator.auth import APIKeyAuthentication from rest_framework.permissions import AllowAny -from rest_framework.decorators import api_view, authentication_classes, renderer_classes -from djangorestframework_camel_case.render import CamelCaseJSONRenderer +from rest_framework.response import Response +from wagtail_modeladmin.helpers import AdminURLHelper +from core.models import SpamModeration +from curator.auth import APIKeyAuthentication +from curator.models import TagCleanup +from curator.wagtail_hooks import TagCleanupAction from .serializers import ( MinimalCodebaseSerializer, MinimalEventSerializer, @@ -27,12 +28,6 @@ SpamModerationSerializer, SpamUpdateSerializer, ) -from curator.models import TagCleanup -from curator.wagtail_hooks import TagCleanupAction - -from core.models import SpamModeration - -import bleach TAG_CLEANUP_ACTIONS = { TagCleanupAction.process.name: TagCleanup.objects.process, @@ -129,11 +124,9 @@ def update_spam_moderation(request): serializer = SpamUpdateSerializer(data=request.data) if serializer.is_valid(): data = serializer.validated_data - print(data) - print(data["object_id"]) try: spam_moderation = SpamModeration.objects.get( - id=data["object_id"], + id=data["id"], ) except ObjectDoesNotExist: return Response(