diff --git a/analysis/admin.py b/analysis/admin.py
index e254195..0476320 100644
--- a/analysis/admin.py
+++ b/analysis/admin.py
@@ -1,6 +1,10 @@
"""
Admin page for analysis
"""
-# from django.contrib import admin
+from django.contrib import admin
+from django_find_similar.models import TextToken
+from django_find_similar.models.text import Token
# Register your models here.
+admin.site.register(TextToken)
+admin.site.register(Token)
\ No newline at end of file
diff --git a/analysis/forms.py b/analysis/forms.py
index a65e87f..bd08d47 100644
--- a/analysis/forms.py
+++ b/analysis/forms.py
@@ -35,3 +35,12 @@ class LoadTrainingDataForm(forms.Form):
sheet_name = forms.IntegerField(required=False, initial=0, widget=forms.NumberInput(attrs={
'class': 'form-control'
}))
+
+
+class FindSimilarForm(forms.Form):
+ """
+ Form with one text
+ """
+ text = forms.CharField(max_length=128, widget=forms.TextInput(attrs={
+ 'class': 'form-control'
+ }))
diff --git a/analysis/models.py b/analysis/models.py
index fc348d3..ebfcef2 100644
--- a/analysis/models.py
+++ b/analysis/models.py
@@ -1,6 +1,8 @@
"""
Analisys models
"""
+from io import StringIO
+
import pandas as pd
from django.db import models
@@ -12,7 +14,7 @@ class TrainingData(models.Model):
update = models.DateTimeField(auto_now=True)
def get_dataframe(self) -> pd.DataFrame:
- return pd.read_json(self.data, dtype=str)
+ return pd.read_json(StringIO(self.data), dtype=str)
@property
def columns_count(self):
@@ -29,4 +31,4 @@ def to_list(dataframe: pd.DataFrame) -> list:
for column in columns:
data_list = dataframe[column].values.tolist()
result += data_list
- return result
\ No newline at end of file
+ return result
diff --git a/analysis/templates/analysis/find_similar.html b/analysis/templates/analysis/find_similar.html
new file mode 100644
index 0000000..9f91789
--- /dev/null
+++ b/analysis/templates/analysis/find_similar.html
@@ -0,0 +1,33 @@
+{% extends "base.html" %}
+{% block main %}
+
{{object.name}}
+
+
+{% endblock %}
+{% block results %}
+ {% with object.get_dataframe as data %}
+
+
+ {% for column in data.columns %}
+
+ {{column}}
+ |
+ {% endfor %}
+
+ {% for index, row in data.iterrows %}
+
+ {% for cell in row %}
+
+ {{cell}}
+ |
+ {% endfor %}
+
+ {% endfor %}
+
+ {% endwith %}
+
+{% endblock %}
diff --git a/analysis/templates/analysis/result.html b/analysis/templates/analysis/result.html
new file mode 100644
index 0000000..3fb7640
--- /dev/null
+++ b/analysis/templates/analysis/result.html
@@ -0,0 +1,57 @@
+{% extends "base.html" %}
+{% block main %}
+{% with object.text as text %}
+
+
+ Name |
+ Value |
+
+
+
+ Text
+ |
+
+ {{text.text}}
+ |
+
+
+
+ Language
+ |
+
+ {{text.language}}
+ |
+
+
+
+ Remove Stop Words
+ |
+
+ {{text.remove_stopwords}}
+ |
+
+
+{% endwith %}
+{% endblock %}
+{% block results %}
+
+{% with object.checkresultitem_set.all as items %}
+
+
+ order |
+ text |
+ cos |
+ % |
+
+ {% for item in items %}
+
+ {{item.order}} |
+ {{item.text.text}} |
+ {{item.cos}} |
+ {{item.cos_percent}}% |
+
+ {% endfor %}
+
+
+{% endwith %}
+{% endblock %}
diff --git a/analysis/templates/analysis/result_list.html b/analysis/templates/analysis/result_list.html
new file mode 100644
index 0000000..e9dda2c
--- /dev/null
+++ b/analysis/templates/analysis/result_list.html
@@ -0,0 +1,36 @@
+{% extends "base.html" %}
+{% block main %}
+ Results List
+{% endblock %}
+{% block results %}
+
+
+ Text |
+ Language |
+ Remove Stop Words |
+ Created |
+ Detail |
+
+ {% for result in object_list %}
+ {% with result.text as text %}
+
+
+
+ {{text.text}}
+
+ |
+ {{text.language}} |
+ {{text.remove_stopwords}} |
+
+ {{result.create}}
+ |
+
+
+ Detail
+
+ |
+
+ {% endwith %}
+ {% endfor %}
+
+{% endblock %}
diff --git a/analysis/templates/analysis/training_data.html b/analysis/templates/analysis/training_data.html
index ad39cef..47bbcc8 100644
--- a/analysis/templates/analysis/training_data.html
+++ b/analysis/templates/analysis/training_data.html
@@ -4,7 +4,7 @@ {{object.name}}
Total rating
One column rating
-Find similar
+Find similar
Tokenize
Delete
{% endblock %}
diff --git a/analysis/tests/test_urls.py b/analysis/tests/test_urls.py
index 68eb1cd..9e94d34 100644
--- a/analysis/tests/test_urls.py
+++ b/analysis/tests/test_urls.py
@@ -3,6 +3,8 @@
"""
from django.test import SimpleTestCase, TestCase
from django.urls import reverse
+from django_find_similar.models import CheckResult
+from mixer.backend.django import mixer
from analysis.tests.data import get_2x2_training_data
@@ -38,6 +40,10 @@ def test_reverse(self):
'url': 'training_data_list',
'reverse': 'training-data-list/',
},
+ {
+ 'url': 'result_list',
+ 'reverse': 'result-list/',
+ },
]
for url in urls:
app_url = f'{app_name}:{url["url"]}'
@@ -58,6 +64,7 @@ def test_reverse(self):
"""
training_data = get_2x2_training_data()
+ check_result = mixer.blend(CheckResult)
app_name = 'analysis'
urls = [
@@ -82,6 +89,13 @@ def test_reverse(self):
},
'reverse': f'find-similar/{training_data.pk}/',
},
+ {
+ 'url': 'result',
+ 'kwargs': {
+ 'pk': check_result.pk
+ },
+ 'reverse': f'result/{check_result.pk}/',
+ },
]
for url in urls:
app_url = f'{app_name}:{url["url"]}'
diff --git a/analysis/tests/tests_forms.py b/analysis/tests/tests_forms.py
index ae39a39..b511fdc 100644
--- a/analysis/tests/tests_forms.py
+++ b/analysis/tests/tests_forms.py
@@ -8,6 +8,7 @@
OneTextForm,
TwoTextForm,
LoadTrainingDataForm,
+ FindSimilarForm,
)
@@ -78,4 +79,27 @@ def test_fields(self):
)
current_form = LoadTrainingDataForm()
+ self.assertTrueForm(current_form, true_form)
+
+
+class TestFindSimilarForm(SimpleTestCase):
+ """
+ One text form test
+ """
+
+ def test_fields(self):
+ """
+ Test available fields
+ """
+
+ true_form = TrueForm(
+ fields=Fields(
+ count=1,
+ types={
+ 'text': forms.CharField
+ }
+ )
+ )
+
+ current_form = FindSimilarForm()
self.assertTrueForm(current_form, true_form)
\ No newline at end of file
diff --git a/analysis/tests/tests_views.py b/analysis/tests/tests_views.py
index f27c6d5..c07f13d 100644
--- a/analysis/tests/tests_views.py
+++ b/analysis/tests/tests_views.py
@@ -3,6 +3,7 @@
"""
from django.core.files.uploadedfile import SimpleUploadedFile
from django.urls import reverse
+from django_find_similar.models import CheckResult
from dry_tests import (
TestCase,
SimpleTestCase,
@@ -12,6 +13,8 @@
Context,
POST,
)
+from django_find_similar.forms import FindSimilarForm
+from mixer.backend.django import mixer
from analysis.forms import OneTextForm, TwoTextForm, LoadTrainingDataForm
from analysis.models import TrainingData
from analysis.tests.data import get_2x2_filepath, get_2x2_training_data
@@ -485,12 +488,81 @@ def test_get(self):
true_response = TrueResponse(
status_code=200,
context=Context(
- keys=['object'],
+ keys=['object', 'form'],
items={
'object': self.training_data,
},
+ types={
+ 'form': FindSimilarForm
+ }
)
)
current_response = request.get_response(self.client)
+ self.assertTrueResponse(current_response, true_response)
+
+ def test_post(self):
+
+ data = {
+ 'text': '1',
+ 'language': 'english',
+ 'remove_stopwords': True,
+ }
+
+ request = Request(
+ url=self.url,
+ method=POST,
+ data=data
+ )
+
+ true_response = TrueResponse(
+ status_code=302,
+ redirect_url=f'/analysis/result-list/'
+ )
+
+ current_response = request.get_response(self.client)
+ self.assertTrueResponse(current_response, true_response)
+
+
+class ResultListTestCase(TestCase):
+
+ def setUp(self):
+ self.url = reverse('analysis:result_list')
+
+ def test_get(self):
+ request = Request(
+ url=self.url
+ )
- self.assertTrueResponse(current_response, true_response)
\ No newline at end of file
+ true_response = TrueResponse(
+ status_code=200,
+ context=Context(
+ keys=['object_list'],
+ ),
+ )
+
+ current_response = request.get_response(self.client)
+ self.assertTrueResponse(current_response, true_response)
+
+
+class TestResultDetailView(TestCase):
+
+ def setUp(self):
+ self.check_result = mixer.blend(CheckResult)
+ self.url = reverse('analysis:result', kwargs={'pk': self.check_result.pk})
+
+ def test_get(self):
+ request = Request(
+ url=self.url,
+ )
+
+ true_response = TrueResponse(
+ status_code=200,
+ context=Context(
+ items={
+ 'object': self.check_result,
+ }
+ )
+ )
+
+ current_response = request.get_response(self.client)
+ self.assertTrueResponse(current_response, true_response)
diff --git a/analysis/urls.py b/analysis/urls.py
index 6ba5431..d6f4c42 100644
--- a/analysis/urls.py
+++ b/analysis/urls.py
@@ -12,7 +12,9 @@
path('example-frequency/', views.ExampleFrequencyAnalysis.as_view(), name="example_frequency"),
path('load-training-data/', views.LoadTrainingDataView.as_view(), name="load_training_data"),
path('training-data//', views.TrainingDataDetailView.as_view(), name="training_data"),
- path('find-similar//', views.TrainingDataDetailView.as_view(), name="find_similar"),
+ path('find-similar//', views.FindSimilarFormView.as_view(), name="find_similar"),
path('delete-training-data//', views.TrainingDataDeleteView.as_view(), name="delete_training_data"),
path('training-data-list/', views.TrainingDataListView.as_view(), name="training_data_list"),
+ path('result-list/', views.ResultListView.as_view(), name="result_list"),
+ path('result//', views.ResultDetailView.as_view(), name="result"),
]
diff --git a/analysis/views.py b/analysis/views.py
index 4c0ed52..8910d3b 100644
--- a/analysis/views.py
+++ b/analysis/views.py
@@ -2,9 +2,14 @@
Analysis views
"""
import os
+from django.shortcuts import get_object_or_404
from django.views.generic import FormView, DetailView, ListView, DeleteView
from django.urls import reverse, reverse_lazy
from django.conf import settings
+from django_find_similar.forms import FindSimilarForm
+from django_find_similar.models import TextToken, TokenTextAdapter, CheckResult
+from find_similar import find_similar
+
from analysis.functions import (
analyze_one_item,
analyze_two_items,
@@ -13,9 +18,9 @@
)
from .forms import (
OneTextForm,
- TwoTextForm, LoadTrainingDataForm,
+ TwoTextForm, LoadTrainingDataForm
)
-from .models import TrainingData
+from .models import TrainingData, to_list
class TokenizeOneView(FormView):
@@ -164,3 +169,71 @@ class TrainingDataDeleteView(DeleteView):
model = TrainingData
template_name = 'analysis/training_data_delete_confirm.html'
success_url = reverse_lazy('analysis:training_data_list')
+
+
+class FindSimilarFormView(FormView):
+ form_class = FindSimilarForm
+ template_name = 'analysis/find_similar.html'
+ success_url = reverse_lazy('analysis:result_list')
+
+ def dispatch(self, request, *args, **kwargs):
+ pk = kwargs['pk']
+ self.object = get_object_or_404(TrainingData, pk=pk)
+ return super().dispatch(request, *args, **kwargs)
+
+ def get_context_data(self, **kwargs):
+ context = super().get_context_data(**kwargs)
+ context['object'] = self.object
+ return context
+
+ def form_valid(self, form):
+ # Get cleaned data from FindSimilarForm
+ data = form.cleaned_data
+
+ text = data['text']
+ language = data['language']
+ remove_stopwords = data['remove_stopwords']
+
+ # Get or create TextToken model
+ text_token, _ = TextToken.objects.get_or_create(
+ text=text,
+ language=language,
+ remove_stopwords=remove_stopwords,
+ )
+ # Adapt TextToken for find_similar
+ adapter = TokenTextAdapter(text_token)
+
+ # save all data from dataset to TextToken
+ # self.object
+ data_list = to_list(self.object.get_dataframe())
+
+ new_token_texts = []
+ for item in data_list:
+ item_text_token = TextToken(
+ text=item,
+ language=language,
+ remove_stopwords=remove_stopwords
+ )
+ new_token_texts.append(item_text_token)
+
+ TextToken.objects.bulk_create(new_token_texts, ignore_conflicts=True)
+
+ # Adapt TextToken
+ adapters = [TokenTextAdapter(item) for item in TextToken.objects.all()]
+ # use find_similar
+ result = find_similar(adapter, adapters, count=len(data_list))
+
+ # save results to the database
+ CheckResult.save_result(text_token, result)
+ return super().form_valid(form)
+
+
+class ResultListView(ListView):
+ model = CheckResult
+ template_name = 'analysis/result_list.html'
+ ordering = ['-create']
+
+
+class ResultDetailView(DetailView):
+ model = CheckResult
+ template_name = 'analysis/result.html'
diff --git a/laboratory/settings.py b/laboratory/settings.py
index 9980886..a7d5fb4 100644
--- a/laboratory/settings.py
+++ b/laboratory/settings.py
@@ -50,6 +50,8 @@
"django.contrib.sessions",
"django.contrib.messages",
"django.contrib.staticfiles",
+ # others
+ 'django_find_similar',
# My
'core',
'analysis',
diff --git a/requirements.txt b/requirements.txt
index 0a5fe78..b2b963e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,7 @@
Django==4.2.6
django-dry-tests==1.0.0
+django-find-similar==1.1.0
pandas==2.1.1
openpyxl==3.1.2
-coverage==7.3.2
\ No newline at end of file
+coverage==7.3.2
+mixer==7.2.2
\ No newline at end of file
diff --git a/templates/base.html b/templates/base.html
index 5ee5a9a..8445a33 100644
--- a/templates/base.html
+++ b/templates/base.html
@@ -95,28 +95,10 @@
-
-
- Current month
-
-
-
-
-
- Last quarter
-
-
-
-
-
- Social engagement
-
-
-
-
+
+
- Year-end sale
+ Result List