diff --git a/analysis/admin.py b/analysis/admin.py index e254195..0476320 100644 --- a/analysis/admin.py +++ b/analysis/admin.py @@ -1,6 +1,10 @@ """ Admin page for analysis """ -# from django.contrib import admin +from django.contrib import admin +from django_find_similar.models import TextToken +from django_find_similar.models.text import Token # Register your models here. +admin.site.register(TextToken) +admin.site.register(Token) \ No newline at end of file diff --git a/analysis/forms.py b/analysis/forms.py index a65e87f..bd08d47 100644 --- a/analysis/forms.py +++ b/analysis/forms.py @@ -35,3 +35,12 @@ class LoadTrainingDataForm(forms.Form): sheet_name = forms.IntegerField(required=False, initial=0, widget=forms.NumberInput(attrs={ 'class': 'form-control' })) + + +class FindSimilarForm(forms.Form): + """ + Form with one text + """ + text = forms.CharField(max_length=128, widget=forms.TextInput(attrs={ + 'class': 'form-control' + })) diff --git a/analysis/models.py b/analysis/models.py index fc348d3..ebfcef2 100644 --- a/analysis/models.py +++ b/analysis/models.py @@ -1,6 +1,8 @@ """ Analisys models """ +from io import StringIO + import pandas as pd from django.db import models @@ -12,7 +14,7 @@ class TrainingData(models.Model): update = models.DateTimeField(auto_now=True) def get_dataframe(self) -> pd.DataFrame: - return pd.read_json(self.data, dtype=str) + return pd.read_json(StringIO(self.data), dtype=str) @property def columns_count(self): @@ -29,4 +31,4 @@ def to_list(dataframe: pd.DataFrame) -> list: for column in columns: data_list = dataframe[column].values.tolist() result += data_list - return result \ No newline at end of file + return result diff --git a/analysis/templates/analysis/find_similar.html b/analysis/templates/analysis/find_similar.html new file mode 100644 index 0000000..9f91789 --- /dev/null +++ b/analysis/templates/analysis/find_similar.html @@ -0,0 +1,33 @@ +{% extends "base.html" %} +{% block main %} +

{{object.name}}

+

+
+ {% csrf_token %} + {{form.as_p}} + +
+{% endblock %} +{% block results %} + {% with object.get_dataframe as data %} + + + {% for column in data.columns %} + + {% endfor %} + + {% for index, row in data.iterrows %} + + {% for cell in row %} + + {% endfor %} + + {% endfor %} +
+ {{column}} +
+ {{cell}} +
+ {% endwith %} + +{% endblock %} diff --git a/analysis/templates/analysis/result.html b/analysis/templates/analysis/result.html new file mode 100644 index 0000000..3fb7640 --- /dev/null +++ b/analysis/templates/analysis/result.html @@ -0,0 +1,57 @@ +{% extends "base.html" %} +{% block main %} +{% with object.text as text %} + + + + + + + + + + + + + + + + + +
NameValue
+ Text + + {{text.text}} +
+ Language + + {{text.language}} +
+ Remove Stop Words + + {{text.remove_stopwords}} +
+{% endwith %} +{% endblock %} +{% block results %} + +{% with object.checkresultitem_set.all as items %} + + + + + + + + {% for item in items %} + + + + + + + {% endfor %} +
ordertextcos%
{{item.order}}{{item.text.text}}{{item.cos}}{{item.cos_percent}}%
+ +{% endwith %} +{% endblock %} diff --git a/analysis/templates/analysis/result_list.html b/analysis/templates/analysis/result_list.html new file mode 100644 index 0000000..e9dda2c --- /dev/null +++ b/analysis/templates/analysis/result_list.html @@ -0,0 +1,36 @@ +{% extends "base.html" %} +{% block main %} +

Results List

+{% endblock %} +{% block results %} + + + + + + + + + {% for result in object_list %} + {% with result.text as text %} + + + + + + + + {% endwith %} + {% endfor %} +
TextLanguageRemove Stop WordsCreatedDetail
+ + {{text.text}} + + {{text.language}}{{text.remove_stopwords}} + {{result.create}} + + + Detail + +
+{% endblock %} diff --git a/analysis/templates/analysis/training_data.html b/analysis/templates/analysis/training_data.html index ad39cef..47bbcc8 100644 --- a/analysis/templates/analysis/training_data.html +++ b/analysis/templates/analysis/training_data.html @@ -4,7 +4,7 @@

{{object.name}}

Total rating One column rating -Find similar +Find similar Tokenize Delete {% endblock %} diff --git a/analysis/tests/test_urls.py b/analysis/tests/test_urls.py index 68eb1cd..9e94d34 100644 --- a/analysis/tests/test_urls.py +++ b/analysis/tests/test_urls.py @@ -3,6 +3,8 @@ """ from django.test import SimpleTestCase, TestCase from django.urls import reverse +from django_find_similar.models import CheckResult +from mixer.backend.django import mixer from analysis.tests.data import get_2x2_training_data @@ -38,6 +40,10 @@ def test_reverse(self): 'url': 'training_data_list', 'reverse': 'training-data-list/', }, + { + 'url': 'result_list', + 'reverse': 'result-list/', + }, ] for url in urls: app_url = f'{app_name}:{url["url"]}' @@ -58,6 +64,7 @@ def test_reverse(self): """ training_data = get_2x2_training_data() + check_result = mixer.blend(CheckResult) app_name = 'analysis' urls = [ @@ -82,6 +89,13 @@ def test_reverse(self): }, 'reverse': f'find-similar/{training_data.pk}/', }, + { + 'url': 'result', + 'kwargs': { + 'pk': check_result.pk + }, + 'reverse': f'result/{check_result.pk}/', + }, ] for url in urls: app_url = f'{app_name}:{url["url"]}' diff --git a/analysis/tests/tests_forms.py b/analysis/tests/tests_forms.py index ae39a39..b511fdc 100644 --- a/analysis/tests/tests_forms.py +++ b/analysis/tests/tests_forms.py @@ -8,6 +8,7 @@ OneTextForm, TwoTextForm, LoadTrainingDataForm, + FindSimilarForm, ) @@ -78,4 +79,27 @@ def test_fields(self): ) current_form = LoadTrainingDataForm() + self.assertTrueForm(current_form, true_form) + + +class TestFindSimilarForm(SimpleTestCase): + """ + One text form test + """ + + def test_fields(self): + """ + Test available fields + """ + + true_form = TrueForm( + fields=Fields( + count=1, + types={ + 'text': forms.CharField + } + ) + ) + + current_form = FindSimilarForm() self.assertTrueForm(current_form, true_form) \ No newline at end of file diff --git a/analysis/tests/tests_views.py b/analysis/tests/tests_views.py index f27c6d5..c07f13d 100644 --- a/analysis/tests/tests_views.py +++ b/analysis/tests/tests_views.py @@ -3,6 +3,7 @@ """ from django.core.files.uploadedfile import SimpleUploadedFile from django.urls import reverse +from django_find_similar.models import CheckResult from dry_tests import ( TestCase, SimpleTestCase, @@ -12,6 +13,8 @@ Context, POST, ) +from django_find_similar.forms import FindSimilarForm +from mixer.backend.django import mixer from analysis.forms import OneTextForm, TwoTextForm, LoadTrainingDataForm from analysis.models import TrainingData from analysis.tests.data import get_2x2_filepath, get_2x2_training_data @@ -485,12 +488,81 @@ def test_get(self): true_response = TrueResponse( status_code=200, context=Context( - keys=['object'], + keys=['object', 'form'], items={ 'object': self.training_data, }, + types={ + 'form': FindSimilarForm + } ) ) current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + def test_post(self): + + data = { + 'text': '1', + 'language': 'english', + 'remove_stopwords': True, + } + + request = Request( + url=self.url, + method=POST, + data=data + ) + + true_response = TrueResponse( + status_code=302, + redirect_url=f'/analysis/result-list/' + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + +class ResultListTestCase(TestCase): + + def setUp(self): + self.url = reverse('analysis:result_list') + + def test_get(self): + request = Request( + url=self.url + ) - self.assertTrueResponse(current_response, true_response) \ No newline at end of file + true_response = TrueResponse( + status_code=200, + context=Context( + keys=['object_list'], + ), + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + +class TestResultDetailView(TestCase): + + def setUp(self): + self.check_result = mixer.blend(CheckResult) + self.url = reverse('analysis:result', kwargs={'pk': self.check_result.pk}) + + def test_get(self): + request = Request( + url=self.url, + ) + + true_response = TrueResponse( + status_code=200, + context=Context( + items={ + 'object': self.check_result, + } + ) + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) diff --git a/analysis/urls.py b/analysis/urls.py index 6ba5431..d6f4c42 100644 --- a/analysis/urls.py +++ b/analysis/urls.py @@ -12,7 +12,9 @@ path('example-frequency/', views.ExampleFrequencyAnalysis.as_view(), name="example_frequency"), path('load-training-data/', views.LoadTrainingDataView.as_view(), name="load_training_data"), path('training-data//', views.TrainingDataDetailView.as_view(), name="training_data"), - path('find-similar//', views.TrainingDataDetailView.as_view(), name="find_similar"), + path('find-similar//', views.FindSimilarFormView.as_view(), name="find_similar"), path('delete-training-data//', views.TrainingDataDeleteView.as_view(), name="delete_training_data"), path('training-data-list/', views.TrainingDataListView.as_view(), name="training_data_list"), + path('result-list/', views.ResultListView.as_view(), name="result_list"), + path('result//', views.ResultDetailView.as_view(), name="result"), ] diff --git a/analysis/views.py b/analysis/views.py index 4c0ed52..8910d3b 100644 --- a/analysis/views.py +++ b/analysis/views.py @@ -2,9 +2,14 @@ Analysis views """ import os +from django.shortcuts import get_object_or_404 from django.views.generic import FormView, DetailView, ListView, DeleteView from django.urls import reverse, reverse_lazy from django.conf import settings +from django_find_similar.forms import FindSimilarForm +from django_find_similar.models import TextToken, TokenTextAdapter, CheckResult +from find_similar import find_similar + from analysis.functions import ( analyze_one_item, analyze_two_items, @@ -13,9 +18,9 @@ ) from .forms import ( OneTextForm, - TwoTextForm, LoadTrainingDataForm, + TwoTextForm, LoadTrainingDataForm ) -from .models import TrainingData +from .models import TrainingData, to_list class TokenizeOneView(FormView): @@ -164,3 +169,71 @@ class TrainingDataDeleteView(DeleteView): model = TrainingData template_name = 'analysis/training_data_delete_confirm.html' success_url = reverse_lazy('analysis:training_data_list') + + +class FindSimilarFormView(FormView): + form_class = FindSimilarForm + template_name = 'analysis/find_similar.html' + success_url = reverse_lazy('analysis:result_list') + + def dispatch(self, request, *args, **kwargs): + pk = kwargs['pk'] + self.object = get_object_or_404(TrainingData, pk=pk) + return super().dispatch(request, *args, **kwargs) + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + context['object'] = self.object + return context + + def form_valid(self, form): + # Get cleaned data from FindSimilarForm + data = form.cleaned_data + + text = data['text'] + language = data['language'] + remove_stopwords = data['remove_stopwords'] + + # Get or create TextToken model + text_token, _ = TextToken.objects.get_or_create( + text=text, + language=language, + remove_stopwords=remove_stopwords, + ) + # Adapt TextToken for find_similar + adapter = TokenTextAdapter(text_token) + + # save all data from dataset to TextToken + # self.object + data_list = to_list(self.object.get_dataframe()) + + new_token_texts = [] + for item in data_list: + item_text_token = TextToken( + text=item, + language=language, + remove_stopwords=remove_stopwords + ) + new_token_texts.append(item_text_token) + + TextToken.objects.bulk_create(new_token_texts, ignore_conflicts=True) + + # Adapt TextToken + adapters = [TokenTextAdapter(item) for item in TextToken.objects.all()] + # use find_similar + result = find_similar(adapter, adapters, count=len(data_list)) + + # save results to the database + CheckResult.save_result(text_token, result) + return super().form_valid(form) + + +class ResultListView(ListView): + model = CheckResult + template_name = 'analysis/result_list.html' + ordering = ['-create'] + + +class ResultDetailView(DetailView): + model = CheckResult + template_name = 'analysis/result.html' diff --git a/laboratory/settings.py b/laboratory/settings.py index 9980886..a7d5fb4 100644 --- a/laboratory/settings.py +++ b/laboratory/settings.py @@ -50,6 +50,8 @@ "django.contrib.sessions", "django.contrib.messages", "django.contrib.staticfiles", + # others + 'django_find_similar', # My 'core', 'analysis', diff --git a/requirements.txt b/requirements.txt index 0a5fe78..b2b963e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,7 @@ Django==4.2.6 django-dry-tests==1.0.0 +django-find-similar==1.1.0 pandas==2.1.1 openpyxl==3.1.2 -coverage==7.3.2 \ No newline at end of file +coverage==7.3.2 +mixer==7.2.2 \ No newline at end of file diff --git a/templates/base.html b/templates/base.html index 5ee5a9a..8445a33 100644 --- a/templates/base.html +++ b/templates/base.html @@ -95,28 +95,10 @@