From 56526b2d18278c067ffd275968a0752e99234db6 Mon Sep 17 00:00:00 2001 From: quillcraftsman Date: Mon, 6 Nov 2023 19:12:48 +0400 Subject: [PATCH 01/17] add some optimization --- .gitignore | 6 +- analysis/models.py | 10 +++- analysis/templates/analysis/find_similar.html | 2 +- .../templates/analysis/training_data.html | 2 +- analysis/tests/tests_functions.py | 6 +- analysis/tests/tests_models.py | 6 +- analysis/tests/tests_views.py | 2 +- analysis/views.py | 58 ++++++++++++++----- core/tests/test_numpy.py | 7 +++ laboratory/settings.py | 8 +++ laboratory/urls.py | 1 + requirements.txt | 5 +- uploads/.empty | 0 13 files changed, 85 insertions(+), 28 deletions(-) create mode 100644 core/tests/test_numpy.py create mode 100644 uploads/.empty diff --git a/.gitignore b/.gitignore index b0b6f3a..55cdd1c 100644 --- a/.gitignore +++ b/.gitignore @@ -124,6 +124,7 @@ celerybeat.pid .venv env/ venv/ +venv10/ ENV/ env.bak/ venv.bak/ @@ -157,4 +158,7 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -.idea/ \ No newline at end of file +.idea/ + +# uploads folder +uploads/loaddata.xlsx \ No newline at end of file diff --git a/analysis/models.py b/analysis/models.py index ebfcef2..eb97e87 100644 --- a/analysis/models.py +++ b/analysis/models.py @@ -5,6 +5,7 @@ import pandas as pd from django.db import models +from django.utils.functional import cached_property class TrainingData(models.Model): @@ -13,16 +14,21 @@ class TrainingData(models.Model): create = models.DateTimeField(auto_now_add=True) update = models.DateTimeField(auto_now=True) + @cached_property def get_dataframe(self) -> pd.DataFrame: return pd.read_json(StringIO(self.data), dtype=str) @property def columns_count(self): - return len(self.get_dataframe().columns) + return len(self.get_dataframe.columns) @property def rows_count(self): - return len(self.get_dataframe().index) + return len(self.get_dataframe.index) + + def display_dataframe(self): + dataframe = self.get_dataframe + return dataframe.head(10) def to_list(dataframe: pd.DataFrame) -> list: diff --git a/analysis/templates/analysis/find_similar.html b/analysis/templates/analysis/find_similar.html index 9f91789..0d98e1b 100644 --- a/analysis/templates/analysis/find_similar.html +++ b/analysis/templates/analysis/find_similar.html @@ -9,7 +9,7 @@

{{object.name}}

{% endblock %} {% block results %} - {% with object.get_dataframe as data %} + {% with object.display_dataframe as data %} {% for column in data.columns %} diff --git a/analysis/templates/analysis/training_data.html b/analysis/templates/analysis/training_data.html index 73a14f8..532e5ae 100644 --- a/analysis/templates/analysis/training_data.html +++ b/analysis/templates/analysis/training_data.html @@ -9,7 +9,7 @@

{{object.name}}

Delete {% endblock %} {% block results %} - {% with object.get_dataframe as data %} + {% with object.display_dataframe as data %}
{% for column in data.columns %} diff --git a/analysis/tests/tests_functions.py b/analysis/tests/tests_functions.py index d401d31..7e7e6dd 100644 --- a/analysis/tests/tests_functions.py +++ b/analysis/tests/tests_functions.py @@ -251,7 +251,7 @@ def test_load_testing_data(self): expected = get_2x2_expected_data() result = load_training_data('first', filepath, sheet_name=0, printer=self.testing_printer) self.assertTrue(isinstance(result, TrainingData)) - self.assertTrue(expected.equals(result.get_dataframe())) + self.assertTrue(expected.equals(result.get_dataframe)) # prints expected_prints = [ @@ -276,10 +276,10 @@ def find_similar_2x2(text, texts): training_data = get_2x2_training_data() text = '2' - dataframe = training_data.get_dataframe() + dataframe = training_data.get_dataframe similars = find_similar_dataframe( text, - training_data.get_dataframe(), + training_data.get_dataframe, find_similar_2x2, printer=self.testing_printer ) diff --git a/analysis/tests/tests_models.py b/analysis/tests/tests_models.py index 21bab9e..6505c49 100644 --- a/analysis/tests/tests_models.py +++ b/analysis/tests/tests_models.py @@ -21,11 +21,11 @@ def test_save(self): self.assertTrue(self.dataframe.equals(get_data)) def test_data_from_json(self): - self.assertTrue(self.dataframe.equals(self.training_data.get_dataframe())) + self.assertTrue(self.dataframe.equals(self.training_data.get_dataframe)) def test_count(self): - self.assertEqual(len(self.training_data.get_dataframe().columns), 2) - self.assertEqual(len(self.training_data.get_dataframe().index), 2) + self.assertEqual(len(self.training_data.get_dataframe.columns), 2) + self.assertEqual(len(self.training_data.get_dataframe.index), 2) self.assertEqual(self.training_data.columns_count, 2) self.assertEqual(self.training_data.rows_count, 2) diff --git a/analysis/tests/tests_views.py b/analysis/tests/tests_views.py index 56c2bcd..d5721b1 100644 --- a/analysis/tests/tests_views.py +++ b/analysis/tests/tests_views.py @@ -384,7 +384,7 @@ def test_get(self): self.training_data.name, ] - dataframe = self.training_data.get_dataframe() + dataframe = self.training_data.get_dataframe # add headers columns = dataframe.columns diff --git a/analysis/views.py b/analysis/views.py index e3f34b9..356a406 100644 --- a/analysis/views.py +++ b/analysis/views.py @@ -1,6 +1,7 @@ """ Analysis views """ +import cProfile import os from django.http import HttpResponseRedirect @@ -9,8 +10,9 @@ from django.urls import reverse, reverse_lazy from django.conf import settings from django_find_similar.forms import FindSimilarForm, FindSimilarParamsForm -from django_find_similar.models import TextToken, TokenTextAdapter, CheckResult +from django_find_similar.models import TextToken, TokenTextAdapter, CheckResult, Token, CheckResultItem from find_similar import find_similar +from find_similar.tokenize import tokenize from analysis.functions import ( analyze_one_item, @@ -149,7 +151,6 @@ def form_valid(self, form): uploaded_path = self.handle_uploaded_file(excel_file) name = data['name'] sheet_name = data.get('sheet_name', 0) - print('SHEET_NAME', sheet_name) self.training_data = load_training_data(name=name, filepath=uploaded_path, sheet_name=sheet_name) return super().form_valid(form) @@ -208,7 +209,7 @@ def form_valid(self, form): # save all data from dataset to TextToken # self.object - data_list = to_list(self.object.get_dataframe()) + data_list = to_list(self.object.get_dataframe) new_token_texts = [] for item in data_list: @@ -227,7 +228,7 @@ def form_valid(self, form): result = find_similar(adapter, adapters, count=len(data_list)) # save results to the database - CheckResult.save_result(text_token, result) + # CheckResult.save_result(text_token, result) return super().form_valid(form) @@ -246,6 +247,10 @@ class TextTokenListView(ListView): model = TextToken template_name = 'analysis/text_token_list.html' ordering = ['-create'] + paginate_by = 3000 + + def get_queryset(self): + return TextToken.objects.prefetch_related('token_set').all() class TextTokenDetailView(DetailView): @@ -256,12 +261,19 @@ class TextTokenDetailView(DetailView): def clear_training_data(request): if request.method == 'POST': TrainingData.objects.all().delete() + CheckResultItem.objects.all().delete() + Token.objects.all().delete() + CheckResult.objects.all().delete() + TextToken.objects.all().delete() return HttpResponseRedirect(reverse('analysis:training_data_list')) return render(request, 'analysis/clear_data.html', context={'model_name': 'Training Data'}) def clear_text_token(request): if request.method == 'POST': + CheckResultItem.objects.all().delete() + Token.objects.all().delete() + CheckResult.objects.all().delete() TextToken.objects.all().delete() return HttpResponseRedirect(reverse('analysis:text_token_list')) return render(request, 'analysis/clear_data.html', context={'model_name': 'Text Tokens'}) @@ -273,25 +285,43 @@ class TokenizeView(FormView): success_url = reverse_lazy('analysis:training_data_list') def form_valid(self, form): + # profiler = cProfile.Profile() + # profiler.enable() cleaned_data = form.cleaned_data language = cleaned_data['language'] remove_stopwords = cleaned_data['remove_stopwords'] # Make all training data (In a future we shout get just one) training_data_list = TrainingData.objects.all() + all_token_texts = [] for training_data in training_data_list: - data_list = to_list(training_data.get_dataframe()) - - new_token_texts = [] + data_list = to_list(training_data.get_dataframe) + for item in data_list: - item_text_token = TextToken( + all_token_texts.append(TextToken( text=item, language=language, remove_stopwords=remove_stopwords - ) - new_token_texts.append(item_text_token) - TextToken.objects.bulk_create(new_token_texts, ignore_conflicts=True) + )) - for text_token in TextToken.objects.all(): - text_token.create_tokens() - + TextToken.objects.bulk_create(all_token_texts, ignore_conflicts=True) + + all_token_texts = TextToken.objects.all() + + all_tokens = [] + # for text_token in TextToken.objects.all(): + for text_token in all_token_texts: + # text_token.create_tokens() + token_set = tokenize( + text_token.text, + language=text_token.language, + remove_stopwords=text_token.remove_stopwords + ) + # tokens = map(lambda text_str: Token(value=text_str, token_text=text_token), token_set) + # tokens = [Token(value=text_str, token_text=text_token) for text_str in token_set] + # all_tokens += tokens + for text_str in token_set: + all_tokens.append(Token(value=text_str, token_text=text_token)) + + Token.objects.bulk_create(all_tokens, ignore_conflicts=True) + # profiler.disable() return super().form_valid(form) \ No newline at end of file diff --git a/core/tests/test_numpy.py b/core/tests/test_numpy.py new file mode 100644 index 0000000..7ef9182 --- /dev/null +++ b/core/tests/test_numpy.py @@ -0,0 +1,7 @@ +from django.test import SimpleTestCase + + +class NumpyTestCase(SimpleTestCase): + + def test_matrix_updates(self): + self.assertTrue(True) \ No newline at end of file diff --git a/laboratory/settings.py b/laboratory/settings.py index a7d5fb4..72ad43b 100644 --- a/laboratory/settings.py +++ b/laboratory/settings.py @@ -52,6 +52,7 @@ "django.contrib.staticfiles", # others 'django_find_similar', + "debug_toolbar", # My 'core', 'analysis', @@ -62,6 +63,7 @@ "django.contrib.sessions.middleware.SessionMiddleware", "django.middleware.common.CommonMiddleware", "django.middleware.csrf.CsrfViewMiddleware", + "debug_toolbar.middleware.DebugToolbarMiddleware", "django.contrib.auth.middleware.AuthenticationMiddleware", "django.contrib.messages.middleware.MessageMiddleware", "django.middleware.clickjacking.XFrameOptionsMiddleware", @@ -140,3 +142,9 @@ # https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" + +INTERNAL_IPS = [ + # ... + "127.0.0.1", + # ... +] \ No newline at end of file diff --git a/laboratory/urls.py b/laboratory/urls.py index 1e2c8d0..b89d509 100644 --- a/laboratory/urls.py +++ b/laboratory/urls.py @@ -8,4 +8,5 @@ path("admin/", admin.site.urls), path('', include('core.urls')), path('analysis/', include('analysis.urls')), + path("__debug__/", include("debug_toolbar.urls")), ] diff --git a/requirements.txt b/requirements.txt index c7877b8..983824d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,8 @@ Django==4.2.6 django-dry-tests==1.0.0 -django-find-similar==1.2.0 +django-find-similar==1.3.0 pandas==2.1.1 openpyxl==3.1.2 coverage==7.3.2 -mixer==7.2.2 \ No newline at end of file +mixer==7.2.2 +django-debug-toolbar==4.2.0 \ No newline at end of file diff --git a/uploads/.empty b/uploads/.empty new file mode 100644 index 0000000..e69de29 From 9915621aeceb02816ee5530a90c194a98c151742 Mon Sep 17 00:00:00 2001 From: quillcraftsman Date: Tue, 7 Nov 2023 18:07:09 +0400 Subject: [PATCH 02/17] numpy matrix tests --- core/core_functions.py | 62 ++++++++ core/tests/test_numpy.py | 118 ++++++++++++++- core/tests/tests_core_functions.py | 221 +++++++++++++++++++++++++++++ 3 files changed, 400 insertions(+), 1 deletion(-) create mode 100644 core/core_functions.py create mode 100644 core/tests/tests_core_functions.py diff --git a/core/core_functions.py b/core/core_functions.py new file mode 100644 index 0000000..728ff49 --- /dev/null +++ b/core/core_functions.py @@ -0,0 +1,62 @@ +""" +Core functions to analyze find_similar proximity +""" +import numpy as np +from find_similar import TokenText, find_similar + + +def to_matrix(data: list) -> np.matrix: + """ + Convert data list to the Matrix + :param data: data in list of lists + :return: Matrix + """ + return np.matrix(data) + + +def str_to_token_text(text: str) -> TokenText: + """ + Create TokenText from text str + :param text: some str text + :return: TokenText with tokens + """ + return TokenText(text) + + +tokenize_vector = np.vectorize(str_to_token_text) + + +def matrix_to_list(matrix: np.matrix) -> list: + """ + Create list from matrix + :param matrix: matrix with data + :return: list of all matrix values + """ + return list(np.array(matrix).reshape(-1, )) + + +find_similar_vector = np.vectorize(find_similar, otypes=[TokenText], excluded=[ + 'texts', + 'language', + 'count', + 'dictionary', + 'keywords' + ] + ) + + +def reshape_results(results: list, shape: dict) -> np.matrix: + arr = np.array(results, dtype=TokenText) + arr = arr.reshape(shape) + matrix = np.asmatrix(arr) + return matrix + + +reshape_results_vector = np.vectorize(reshape_results, otypes=[TokenText], excluded=['shape']) + + +def get_matrix_head(matrix: np.matrix, count: int = 1): + return matrix[:count] + + +# get_matrix_head_vector = np.vectorize(get_matrix_head, excluded=['count']) \ No newline at end of file diff --git a/core/tests/test_numpy.py b/core/tests/test_numpy.py index 7ef9182..c33d3cf 100644 --- a/core/tests/test_numpy.py +++ b/core/tests/test_numpy.py @@ -1,7 +1,123 @@ +import numpy as np +from find_similar import TokenText, find_similar from django.test import SimpleTestCase class NumpyTestCase(SimpleTestCase): def test_matrix_updates(self): - self.assertTrue(True) \ No newline at end of file + + # data = [ + # ['one two', 'uno two', 'uno one'], + # ['uno', 'one', 'uno one'], + # ['new', 'nef', 'nef new'] + # ] + + data = [ + ['one two', 'uno two'], + ['uno', 'one'], + ] + + data_matrix = np.matrix(data, dtype=str) + + matrix_shape = data_matrix.shape + print('SHAPE', matrix_shape) + + self.assertIsInstance(data_matrix, np.matrix) + + self.assertIsInstance(data_matrix[0, 0], str) + + # Matrix str to TokenText (tokenize all matrix) + def str_to_token_text(e: str) -> TokenText: + return TokenText( + text=e, + language='russian', + remove_stopwords=True, + ) + + str_to_token_text = np.vectorize(str_to_token_text) + token_text_matrix = str_to_token_text(data_matrix) + + first_element = token_text_matrix[0, 0] + + self.assertIsInstance(first_element, TokenText) + self.assertEqual(first_element.tokens, {'one', 'two'}) + + # one way how to get list from matrix + data_list = list(np.array(data_matrix).reshape(-1,)) + + """ + text_to_check, + texts, + language="russian", + count=5, + dictionary=None, + remove_stopwords=True, + keywords=None, + """ + + find_similar_vector_two = np.vectorize(find_similar, excluded=['texts', 'language', 'count', 'dictionary', 'keywords']) + similars_matrix = find_similar_vector_two(text_to_check=token_text_matrix, texts=data_list) + + def to_cos_element(e: TokenText): + return e.cos + + to_cos_element_vector = np.vectorize(to_cos_element) + + def to_cos(e: np.ndarray) -> np.ndarray: + return to_cos_element_vector(e) + # return e + + to_cos_vector = np.vectorize(to_cos, otypes=[object]) + + cos_matrix = to_cos_vector(similars_matrix) + + def reshape_list(e: np.ndarray) -> np.ndarray: + arr = np.array(e, dtype=TokenText) + arr = arr.reshape(matrix_shape) + arr = np.asmatrix(arr) + return arr + + reshape_list_vectorize = np.vectorize(reshape_list, otypes=[object]) + + similar_shaped_matrix = reshape_list_vectorize(similars_matrix) + + print('STR MATRIX') + print(data_matrix) + print('TokenText MATRIX') + print(token_text_matrix) + print('Similars MATRIX') + print(similars_matrix) + print('cos MATRIX') + print(cos_matrix) + print('reshaped MATRIX') + print(similar_shaped_matrix) + + first = similar_shaped_matrix[0, 0] + self.assertIsInstance(first, np.matrix) + self.assertEqual(first.shape, matrix_shape) + + @np.vectorize + def token_to_text(e: TokenText) -> str: + return e.text + + first_texts = token_to_text(first) + + print('BEGIN') + print(data_matrix) + print('FIRST') + print(first_texts) + + second = similar_shaped_matrix[1, 0] + second_texts = token_to_text(second) + + print('SECOND') + print(second_texts) + + # self.assertTrue(np.array_equal(first_texts, data_matrix)) + + # 1. вариант анализа + # - сколько совпадений в 1-ой строчке - столько % по каждому элементу + # 2. варинат указываем топ границу и ищем чтобы было в столько строках (общий вариант 1-го случая) + # т.е. 1-ый пункт это 2-ой пункт с границей 1 + self.assertTrue(True) diff --git a/core/tests/tests_core_functions.py b/core/tests/tests_core_functions.py new file mode 100644 index 0000000..62c38c5 --- /dev/null +++ b/core/tests/tests_core_functions.py @@ -0,0 +1,221 @@ +import numpy as np +from django.test import SimpleTestCase +from find_similar import TokenText + +from core.core_functions import ( + to_matrix, + str_to_token_text, + tokenize_vector, + matrix_to_list, + find_similar_vector, + reshape_results, + reshape_results_vector, + get_matrix_head, +) + + +class CoreFunctionsSimpleTestCase(SimpleTestCase): + + def setUp(self): + self.first_str = 'one two' + self.one_one = ['one two'] + self.one_two = [['one two', 'one']] + self.two_two = [ + ['one 1984', '1984'], + ['two 50', '50'], + ] + + def test_to_matrix(self): + params = [ + { + 'data': self.one_one, + 'shape': (1,1) + }, + { + 'data': self.one_two, + 'shape': (1, 2) + }, + { + 'data': self.two_two, + 'shape': (2, 2) + }, + ] + for param in params: + matrix = to_matrix(param['data']) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, param['shape']) + + def test_str_to_token_text(self): + token_text = str_to_token_text(self.first_str) + self.assertIsInstance(token_text, TokenText) + self.assertEqual(len(token_text.tokens), 2) + + def test_tokenize_matrix(self): + params = [ + { + 'data': self.one_one, + }, + { + 'data': self.one_two, + }, + { + 'data': self.two_two, + }, + ] + for param in params: + old = to_matrix(param['data']) + new = tokenize_vector(old) + self.assertIsInstance(new, np.matrix) + self.assertTrue(new.dtype, TokenText) + self.assertEqual(new.shape, old.shape) + self.assertEqual(new[0, 0].text, old[0, 0]) + + def test_matrix_to_list(self): + params = [ + { + 'data': self.one_one, + 'value': ['one two'] + }, + { + 'data': self.one_two, + 'value': ['one two', 'one'] + }, + { + 'data': self.two_two, + 'value': ['one 1984', '1984', 'two 50', '50'], + }, + ] + for param in params: + old = to_matrix(param['data']) + new = matrix_to_list(old) + self.assertIsInstance(new, list) + x, y = old.shape + count = x * y + self.assertEqual(len(new), count) + self.assertEqual(new, param['value']) + + def test_find_similar_vector(self): + params = [ + { + 'data': self.one_one, + }, + { + 'data': self.one_two, + }, + { + 'data': self.two_two, + }, + ] + for param in params: + old = to_matrix(param['data']) + texts = matrix_to_list(old) + new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) + self.assertIsInstance(new, np.matrix) + self.assertIsInstance(new[0, 0], list) + self.assertEqual(new[0, 0][0].text, old[0, 0]) + self.assertEqual(new.shape, old.shape) + + def test_reshape_results(self): + params = [ + # { + # 'data': self.one_one, + # 'expected': np.matrix( + # ['one two'] + # ), + # }, + { + 'data': self.one_two, + 'expected': np.matrix( + [['one two', 'one']] + ), + }, + { + 'data': self.two_two, + 'expected': np.matrix( + [ + ['one 1984', '1984'], + ['two 50', '50'], + ] + ), + }, + ] + for param in params: + old = to_matrix(param['data']) + texts = matrix_to_list(old) + new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) + # first + results = new[0, 0] + matrix = reshape_results(results, old.shape) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, old.shape) + expected_matrix = tokenize_vector(param['expected']) + self.assertTrue(np.array_equal(matrix, expected_matrix)) + + # second + results = new[0, 1] + matrix = reshape_results(results, old.shape) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, old.shape) + expected_matrix = tokenize_vector(param['expected']) + self.assertFalse(np.array_equal(matrix, expected_matrix)) + + def test_reshape_results_vector(self): + params = [ + # { + # 'data': self.one_one, + # 'expected': np.matrix( + # ['one two'] + # ), + # }, + { + 'data': self.one_two, + 'expected': np.matrix( + [['one two', 'one']] + ), + }, + { + 'data': self.two_two, + 'expected': np.matrix( + [ + ['one 1984', '1984'], + ['two 50', '50'], + ] + ), + }, + ] + for param in params: + old = to_matrix(param['data']) + texts = matrix_to_list(old) + new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) + new = reshape_results_vector(results=new, shape=new.shape) + # first + matrix = new[0, 0] + # matrix = reshape_results(results, old.shape) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, old.shape) + expected_matrix = tokenize_vector(param['expected']) + self.assertTrue(np.array_equal(matrix, expected_matrix)) + + # second + matrix = new[0, 1] + # matrix = reshape_results(results, old.shape) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, old.shape) + expected_matrix = tokenize_vector(param['expected']) + self.assertFalse(np.array_equal(matrix, expected_matrix)) + + def test_get_matrix_head(self): + lines = 1 + old = to_matrix(self.one_one) + head = get_matrix_head(old, lines) + self.assertIsInstance(head, np.matrix) + self.assertTrue(np.array_equal(old, head)) + + old = to_matrix(self.two_two) + head = get_matrix_head(old, lines) + self.assertFalse(np.array_equal(old, head)) + self.assertEqual(head.shape, (1, 2)) + + old = to_matrix(self.two_two) + head = get_matrix_head(old, 2) + self.assertTrue(np.array_equal(old, head)) \ No newline at end of file From c3b92d900e38eb9e03fb52634568f7cc589b9ae0 Mon Sep 17 00:00:00 2001 From: quillcraftsman Date: Tue, 7 Nov 2023 18:53:09 +0400 Subject: [PATCH 03/17] remove prints --- core/tests/test_numpy.py | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/core/tests/test_numpy.py b/core/tests/test_numpy.py index c33d3cf..ea64759 100644 --- a/core/tests/test_numpy.py +++ b/core/tests/test_numpy.py @@ -21,7 +21,7 @@ def test_matrix_updates(self): data_matrix = np.matrix(data, dtype=str) matrix_shape = data_matrix.shape - print('SHAPE', matrix_shape) + # print('SHAPE', matrix_shape) self.assertIsInstance(data_matrix, np.matrix) @@ -46,15 +46,6 @@ def str_to_token_text(e: str) -> TokenText: # one way how to get list from matrix data_list = list(np.array(data_matrix).reshape(-1,)) - """ - text_to_check, - texts, - language="russian", - count=5, - dictionary=None, - remove_stopwords=True, - keywords=None, - """ find_similar_vector_two = np.vectorize(find_similar, excluded=['texts', 'language', 'count', 'dictionary', 'keywords']) similars_matrix = find_similar_vector_two(text_to_check=token_text_matrix, texts=data_list) @@ -82,15 +73,15 @@ def reshape_list(e: np.ndarray) -> np.ndarray: similar_shaped_matrix = reshape_list_vectorize(similars_matrix) - print('STR MATRIX') + #print('STR MATRIX') print(data_matrix) - print('TokenText MATRIX') + #print('TokenText MATRIX') print(token_text_matrix) - print('Similars MATRIX') + #print('Similars MATRIX') print(similars_matrix) - print('cos MATRIX') + #print('cos MATRIX') print(cos_matrix) - print('reshaped MATRIX') + #print('reshaped MATRIX') print(similar_shaped_matrix) first = similar_shaped_matrix[0, 0] @@ -103,15 +94,15 @@ def token_to_text(e: TokenText) -> str: first_texts = token_to_text(first) - print('BEGIN') + #print('BEGIN') print(data_matrix) - print('FIRST') + #print('FIRST') print(first_texts) second = similar_shaped_matrix[1, 0] second_texts = token_to_text(second) - print('SECOND') + #print('SECOND') print(second_texts) # self.assertTrue(np.array_equal(first_texts, data_matrix)) From e390cdffb1c83a34bd3cdb0b312224a8dc6f4d6b Mon Sep 17 00:00:00 2001 From: quillcraftsman Date: Tue, 7 Nov 2023 18:55:37 +0400 Subject: [PATCH 04/17] change makefile to direct run --- .github/workflows/run-tests.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 9e2d396..65ff2ee 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -21,4 +21,6 @@ jobs: pip install find-similar - name: Run tests run: | - make coverage + coverage run --source='.' manage.py test + coverage html --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,analysis/management/* + coverage report --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,analysis/management/* --fail-under=100 From 5826eaf05a3f97585396edffc29b5515024b01bd Mon Sep 17 00:00:00 2001 From: quillcraftsman Date: Tue, 7 Nov 2023 18:57:11 +0400 Subject: [PATCH 05/17] fix yml --- .github/workflows/run-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 65ff2ee..cadefdc 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -22,5 +22,5 @@ jobs: - name: Run tests run: | coverage run --source='.' manage.py test - coverage html --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,analysis/management/* - coverage report --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,analysis/management/* --fail-under=100 + coverage html --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,analysis/management/* + coverage report --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,analysis/management/* --fail-under=100 From 4e0125395368ddfe2b0d1853936e2c838e610fb9 Mon Sep 17 00:00:00 2001 From: quillcraftsman Date: Tue, 7 Nov 2023 19:00:12 +0400 Subject: [PATCH 06/17] comment tests for new find-similar features --- .github/workflows/run-tests.yml | 4 +- core/tests/tests_core_functions.py | 176 ++++++++++++++--------------- 2 files changed, 89 insertions(+), 91 deletions(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index cadefdc..9e2d396 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -21,6 +21,4 @@ jobs: pip install find-similar - name: Run tests run: | - coverage run --source='.' manage.py test - coverage html --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,analysis/management/* - coverage report --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,analysis/management/* --fail-under=100 + make coverage diff --git a/core/tests/tests_core_functions.py b/core/tests/tests_core_functions.py index 62c38c5..0d9e90a 100644 --- a/core/tests/tests_core_functions.py +++ b/core/tests/tests_core_functions.py @@ -115,94 +115,94 @@ def test_find_similar_vector(self): self.assertEqual(new[0, 0][0].text, old[0, 0]) self.assertEqual(new.shape, old.shape) - def test_reshape_results(self): - params = [ - # { - # 'data': self.one_one, - # 'expected': np.matrix( - # ['one two'] - # ), - # }, - { - 'data': self.one_two, - 'expected': np.matrix( - [['one two', 'one']] - ), - }, - { - 'data': self.two_two, - 'expected': np.matrix( - [ - ['one 1984', '1984'], - ['two 50', '50'], - ] - ), - }, - ] - for param in params: - old = to_matrix(param['data']) - texts = matrix_to_list(old) - new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) - # first - results = new[0, 0] - matrix = reshape_results(results, old.shape) - self.assertIsInstance(matrix, np.matrix) - self.assertEqual(matrix.shape, old.shape) - expected_matrix = tokenize_vector(param['expected']) - self.assertTrue(np.array_equal(matrix, expected_matrix)) - - # second - results = new[0, 1] - matrix = reshape_results(results, old.shape) - self.assertIsInstance(matrix, np.matrix) - self.assertEqual(matrix.shape, old.shape) - expected_matrix = tokenize_vector(param['expected']) - self.assertFalse(np.array_equal(matrix, expected_matrix)) - - def test_reshape_results_vector(self): - params = [ - # { - # 'data': self.one_one, - # 'expected': np.matrix( - # ['one two'] - # ), - # }, - { - 'data': self.one_two, - 'expected': np.matrix( - [['one two', 'one']] - ), - }, - { - 'data': self.two_two, - 'expected': np.matrix( - [ - ['one 1984', '1984'], - ['two 50', '50'], - ] - ), - }, - ] - for param in params: - old = to_matrix(param['data']) - texts = matrix_to_list(old) - new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) - new = reshape_results_vector(results=new, shape=new.shape) - # first - matrix = new[0, 0] - # matrix = reshape_results(results, old.shape) - self.assertIsInstance(matrix, np.matrix) - self.assertEqual(matrix.shape, old.shape) - expected_matrix = tokenize_vector(param['expected']) - self.assertTrue(np.array_equal(matrix, expected_matrix)) - - # second - matrix = new[0, 1] - # matrix = reshape_results(results, old.shape) - self.assertIsInstance(matrix, np.matrix) - self.assertEqual(matrix.shape, old.shape) - expected_matrix = tokenize_vector(param['expected']) - self.assertFalse(np.array_equal(matrix, expected_matrix)) + # def test_reshape_results(self): + # params = [ + # # { + # # 'data': self.one_one, + # # 'expected': np.matrix( + # # ['one two'] + # # ), + # # }, + # { + # 'data': self.one_two, + # 'expected': np.matrix( + # [['one two', 'one']] + # ), + # }, + # { + # 'data': self.two_two, + # 'expected': np.matrix( + # [ + # ['one 1984', '1984'], + # ['two 50', '50'], + # ] + # ), + # }, + # ] + # for param in params: + # old = to_matrix(param['data']) + # texts = matrix_to_list(old) + # new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) + # # first + # results = new[0, 0] + # matrix = reshape_results(results, old.shape) + # self.assertIsInstance(matrix, np.matrix) + # self.assertEqual(matrix.shape, old.shape) + # expected_matrix = tokenize_vector(param['expected']) + # self.assertTrue(np.array_equal(matrix, expected_matrix)) + # + # # second + # results = new[0, 1] + # matrix = reshape_results(results, old.shape) + # self.assertIsInstance(matrix, np.matrix) + # self.assertEqual(matrix.shape, old.shape) + # expected_matrix = tokenize_vector(param['expected']) + # self.assertFalse(np.array_equal(matrix, expected_matrix)) + # + # def test_reshape_results_vector(self): + # params = [ + # # { + # # 'data': self.one_one, + # # 'expected': np.matrix( + # # ['one two'] + # # ), + # # }, + # { + # 'data': self.one_two, + # 'expected': np.matrix( + # [['one two', 'one']] + # ), + # }, + # { + # 'data': self.two_two, + # 'expected': np.matrix( + # [ + # ['one 1984', '1984'], + # ['two 50', '50'], + # ] + # ), + # }, + # ] + # for param in params: + # old = to_matrix(param['data']) + # texts = matrix_to_list(old) + # new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) + # new = reshape_results_vector(results=new, shape=new.shape) + # # first + # matrix = new[0, 0] + # # matrix = reshape_results(results, old.shape) + # self.assertIsInstance(matrix, np.matrix) + # self.assertEqual(matrix.shape, old.shape) + # expected_matrix = tokenize_vector(param['expected']) + # self.assertTrue(np.array_equal(matrix, expected_matrix)) + # + # # second + # matrix = new[0, 1] + # # matrix = reshape_results(results, old.shape) + # self.assertIsInstance(matrix, np.matrix) + # self.assertEqual(matrix.shape, old.shape) + # expected_matrix = tokenize_vector(param['expected']) + # self.assertFalse(np.array_equal(matrix, expected_matrix)) def test_get_matrix_head(self): lines = 1 From ce79b9753046c444b33a2c11b4f788101ac8b1a2 Mon Sep 17 00:00:00 2001 From: quillcraftsman Date: Tue, 7 Nov 2023 19:03:16 +0400 Subject: [PATCH 07/17] mock eq method --- core/tests/tests_core_functions.py | 181 +++++++++++++++-------------- 1 file changed, 93 insertions(+), 88 deletions(-) diff --git a/core/tests/tests_core_functions.py b/core/tests/tests_core_functions.py index 0d9e90a..7248bad 100644 --- a/core/tests/tests_core_functions.py +++ b/core/tests/tests_core_functions.py @@ -2,6 +2,11 @@ from django.test import SimpleTestCase from find_similar import TokenText +def eq(self, other): + return self.text == other.text + +TokenText.__eq__ = eq + from core.core_functions import ( to_matrix, str_to_token_text, @@ -115,94 +120,94 @@ def test_find_similar_vector(self): self.assertEqual(new[0, 0][0].text, old[0, 0]) self.assertEqual(new.shape, old.shape) - # def test_reshape_results(self): - # params = [ - # # { - # # 'data': self.one_one, - # # 'expected': np.matrix( - # # ['one two'] - # # ), - # # }, - # { - # 'data': self.one_two, - # 'expected': np.matrix( - # [['one two', 'one']] - # ), - # }, - # { - # 'data': self.two_two, - # 'expected': np.matrix( - # [ - # ['one 1984', '1984'], - # ['two 50', '50'], - # ] - # ), - # }, - # ] - # for param in params: - # old = to_matrix(param['data']) - # texts = matrix_to_list(old) - # new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) - # # first - # results = new[0, 0] - # matrix = reshape_results(results, old.shape) - # self.assertIsInstance(matrix, np.matrix) - # self.assertEqual(matrix.shape, old.shape) - # expected_matrix = tokenize_vector(param['expected']) - # self.assertTrue(np.array_equal(matrix, expected_matrix)) - # - # # second - # results = new[0, 1] - # matrix = reshape_results(results, old.shape) - # self.assertIsInstance(matrix, np.matrix) - # self.assertEqual(matrix.shape, old.shape) - # expected_matrix = tokenize_vector(param['expected']) - # self.assertFalse(np.array_equal(matrix, expected_matrix)) - # - # def test_reshape_results_vector(self): - # params = [ - # # { - # # 'data': self.one_one, - # # 'expected': np.matrix( - # # ['one two'] - # # ), - # # }, - # { - # 'data': self.one_two, - # 'expected': np.matrix( - # [['one two', 'one']] - # ), - # }, - # { - # 'data': self.two_two, - # 'expected': np.matrix( - # [ - # ['one 1984', '1984'], - # ['two 50', '50'], - # ] - # ), - # }, - # ] - # for param in params: - # old = to_matrix(param['data']) - # texts = matrix_to_list(old) - # new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) - # new = reshape_results_vector(results=new, shape=new.shape) - # # first - # matrix = new[0, 0] - # # matrix = reshape_results(results, old.shape) - # self.assertIsInstance(matrix, np.matrix) - # self.assertEqual(matrix.shape, old.shape) - # expected_matrix = tokenize_vector(param['expected']) - # self.assertTrue(np.array_equal(matrix, expected_matrix)) - # - # # second - # matrix = new[0, 1] - # # matrix = reshape_results(results, old.shape) - # self.assertIsInstance(matrix, np.matrix) - # self.assertEqual(matrix.shape, old.shape) - # expected_matrix = tokenize_vector(param['expected']) - # self.assertFalse(np.array_equal(matrix, expected_matrix)) + def test_reshape_results(self): + params = [ + # { + # 'data': self.one_one, + # 'expected': np.matrix( + # ['one two'] + # ), + # }, + { + 'data': self.one_two, + 'expected': np.matrix( + [['one two', 'one']] + ), + }, + { + 'data': self.two_two, + 'expected': np.matrix( + [ + ['one 1984', '1984'], + ['two 50', '50'], + ] + ), + }, + ] + for param in params: + old = to_matrix(param['data']) + texts = matrix_to_list(old) + new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) + # first + results = new[0, 0] + matrix = reshape_results(results, old.shape) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, old.shape) + expected_matrix = tokenize_vector(param['expected']) + self.assertTrue(np.array_equal(matrix, expected_matrix)) + + # second + results = new[0, 1] + matrix = reshape_results(results, old.shape) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, old.shape) + expected_matrix = tokenize_vector(param['expected']) + self.assertFalse(np.array_equal(matrix, expected_matrix)) + + def test_reshape_results_vector(self): + params = [ + # { + # 'data': self.one_one, + # 'expected': np.matrix( + # ['one two'] + # ), + # }, + { + 'data': self.one_two, + 'expected': np.matrix( + [['one two', 'one']] + ), + }, + { + 'data': self.two_two, + 'expected': np.matrix( + [ + ['one 1984', '1984'], + ['two 50', '50'], + ] + ), + }, + ] + for param in params: + old = to_matrix(param['data']) + texts = matrix_to_list(old) + new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) + new = reshape_results_vector(results=new, shape=new.shape) + # first + matrix = new[0, 0] + # matrix = reshape_results(results, old.shape) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, old.shape) + expected_matrix = tokenize_vector(param['expected']) + self.assertTrue(np.array_equal(matrix, expected_matrix)) + + # second + matrix = new[0, 1] + # matrix = reshape_results(results, old.shape) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, old.shape) + expected_matrix = tokenize_vector(param['expected']) + self.assertFalse(np.array_equal(matrix, expected_matrix)) def test_get_matrix_head(self): lines = 1 From e5f6c97e6892dd2cd05fd722a367750f3c7a9b92 Mon Sep 17 00:00:00 2001 From: quillcraftsman Date: Tue, 7 Nov 2023 20:07:16 +0400 Subject: [PATCH 08/17] add count analyze not the end --- core/core_functions.py | 27 +++++++++++++++++++++++++-- core/tests/tests_core_functions.py | 23 ++++++++++++++++++++++- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/core/core_functions.py b/core/core_functions.py index 728ff49..fe4d3a3 100644 --- a/core/core_functions.py +++ b/core/core_functions.py @@ -26,13 +26,17 @@ def str_to_token_text(text: str) -> TokenText: tokenize_vector = np.vectorize(str_to_token_text) +def matrix_to_one_line(matrix: np.matrix) -> np.ndarray: + return np.array(matrix).reshape(-1, ) + + def matrix_to_list(matrix: np.matrix) -> list: """ Create list from matrix :param matrix: matrix with data :return: list of all matrix values """ - return list(np.array(matrix).reshape(-1, )) + return list(matrix_to_one_line(matrix)) find_similar_vector = np.vectorize(find_similar, otypes=[TokenText], excluded=[ @@ -59,4 +63,23 @@ def get_matrix_head(matrix: np.matrix, count: int = 1): return matrix[:count] -# get_matrix_head_vector = np.vectorize(get_matrix_head, excluded=['count']) \ No newline at end of file +# get_matrix_head_vector = np.vectorize(get_matrix_head, excluded=['count']) +def calc_similar_count(expected_results, real_results): + expected_line = matrix_to_one_line(expected_results) + results_line = matrix_to_one_line(real_results) + intersection = np.intersect1d(expected_line, results_line) + return intersection.size + + +def compare(results_matrix: np.matrix, training_data_matrix: np.matrix, count: int = 1) -> np.matrix: + result = np.empty(training_data_matrix.shape, dtype=np.int16) + row_count, col_count = training_data_matrix.shape + for i in range(row_count): + + expected_results = training_data_matrix[i, :] + + for j in range(col_count): + results: np.matrix = results_matrix[i, j] + head_results = get_matrix_head(results, count) + result[i, j] = calc_similar_count(expected_results, head_results) + return np.asmatrix(result) diff --git a/core/tests/tests_core_functions.py b/core/tests/tests_core_functions.py index 7248bad..422d0a0 100644 --- a/core/tests/tests_core_functions.py +++ b/core/tests/tests_core_functions.py @@ -5,7 +5,11 @@ def eq(self, other): return self.text == other.text +def lt(self, other): + return self.cos < other.cos + TokenText.__eq__ = eq +TokenText.__lt__ = lt from core.core_functions import ( to_matrix, @@ -16,6 +20,7 @@ def eq(self, other): reshape_results, reshape_results_vector, get_matrix_head, + compare, ) @@ -223,4 +228,20 @@ def test_get_matrix_head(self): old = to_matrix(self.two_two) head = get_matrix_head(old, 2) - self.assertTrue(np.array_equal(old, head)) \ No newline at end of file + self.assertTrue(np.array_equal(old, head)) + + def test_compare(self): + training_data = to_matrix(self.two_two) + training_data = tokenize_vector(training_data) + texts = matrix_to_list(training_data) + similars = find_similar_vector(text_to_check=training_data, texts=texts, count=len(texts)) + results = reshape_results_vector(results=similars, shape=training_data.shape) + report = compare(results, training_data, 1) + self.assertIsInstance(report, np.matrix) + self.assertEqual(report.shape, training_data.shape) + self.assertEqual(report.shape, results.shape) + + print('training_data', training_data) + print('results', results) + print('REPORT', report) + self.assertEqual(report[0, 0], 2) \ No newline at end of file From 5feaf7217714fc7b6785155f68510b0ea5b30172 Mon Sep 17 00:00:00 2001 From: quillcraftsman Date: Wed, 8 Nov 2023 12:18:05 +0400 Subject: [PATCH 09/17] add compare method --- core/core_functions.py | 16 +++++++++---- core/tests/tests_core_functions.py | 38 ++++++++++++++++++++++++------ 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/core/core_functions.py b/core/core_functions.py index fe4d3a3..fd7019b 100644 --- a/core/core_functions.py +++ b/core/core_functions.py @@ -67,8 +67,15 @@ def get_matrix_head(matrix: np.matrix, count: int = 1): def calc_similar_count(expected_results, real_results): expected_line = matrix_to_one_line(expected_results) results_line = matrix_to_one_line(real_results) - intersection = np.intersect1d(expected_line, results_line) - return intersection.size + intersection = np.in1d(expected_line, results_line) + return np.count_nonzero(intersection) # intersection == True + + +def calc_percent(similar_count, column_count): + # cc - 100 + # sc - x + # x = sc * 100 / cc + return (similar_count - 1) * 100 / (column_count - 1) def compare(results_matrix: np.matrix, training_data_matrix: np.matrix, count: int = 1) -> np.matrix: @@ -77,9 +84,10 @@ def compare(results_matrix: np.matrix, training_data_matrix: np.matrix, count: i for i in range(row_count): expected_results = training_data_matrix[i, :] - for j in range(col_count): results: np.matrix = results_matrix[i, j] head_results = get_matrix_head(results, count) - result[i, j] = calc_similar_count(expected_results, head_results) + similar_count = calc_similar_count(expected_results, head_results) + percent = calc_percent(similar_count, col_count) + result[i, j] = percent return np.asmatrix(result) diff --git a/core/tests/tests_core_functions.py b/core/tests/tests_core_functions.py index 422d0a0..bc529b3 100644 --- a/core/tests/tests_core_functions.py +++ b/core/tests/tests_core_functions.py @@ -5,11 +5,11 @@ def eq(self, other): return self.text == other.text -def lt(self, other): - return self.cos < other.cos +# def lt(self, other): +# return self.cos < other.cos TokenText.__eq__ = eq -TokenText.__lt__ = lt +# TokenText.__lt__ = lt from core.core_functions import ( to_matrix, @@ -35,6 +35,12 @@ def setUp(self): ['two 50', '50'], ] + self.not_exact = [ + ['1', '1 1'], + ['2', '3'], + ['4', '2 2'], + ] + def test_to_matrix(self): params = [ { @@ -236,12 +242,30 @@ def test_compare(self): texts = matrix_to_list(training_data) similars = find_similar_vector(text_to_check=training_data, texts=texts, count=len(texts)) results = reshape_results_vector(results=similars, shape=training_data.shape) + report = compare(results, training_data, 1) + self.assertIsInstance(report, np.matrix) self.assertEqual(report.shape, training_data.shape) self.assertEqual(report.shape, results.shape) - print('training_data', training_data) - print('results', results) - print('REPORT', report) - self.assertEqual(report[0, 0], 2) \ No newline at end of file + self.assertEqual(report[(0, 0)], 100) + + # Bad finding + training_data = to_matrix(self.not_exact) + training_data = tokenize_vector(training_data) + texts = matrix_to_list(training_data) + similars = find_similar_vector(text_to_check=training_data, texts=texts, count=len(texts)) + results = reshape_results_vector(results=similars, shape=training_data.shape) + + report = compare(results, training_data, 1) + + self.assertEqual(report[(1, 1)], 0) + + # Here we can check several lines + report = compare(results, training_data, 2) + self.assertEqual(report[(1, 1)], 100) + self.assertEqual(report[(2, 1)], 0) + + report = compare(results, training_data, 3) + self.assertEqual(report[(2, 1)], 100) From 5950bff5eab576ef43646c456e52c42cff0d4151 Mon Sep 17 00:00:00 2001 From: quillcraftsman Date: Wed, 8 Nov 2023 19:05:41 +0400 Subject: [PATCH 10/17] works with 2x2 datasets without null values --- .../templates/analysis/total_rating_form.html | 10 ++++ .../templates/analysis/training_data.html | 2 +- analysis/urls.py | 1 + analysis/views.py | 47 +++++++++++++++ core/core_functions.py | 19 ++++++- core/tests/test_numpy.py | 23 +++++--- core/tests/tests_core_functions.py | 57 +++++++++++++++++-- 7 files changed, 144 insertions(+), 15 deletions(-) create mode 100644 analysis/templates/analysis/total_rating_form.html diff --git a/analysis/templates/analysis/total_rating_form.html b/analysis/templates/analysis/total_rating_form.html new file mode 100644 index 0000000..8709d31 --- /dev/null +++ b/analysis/templates/analysis/total_rating_form.html @@ -0,0 +1,10 @@ +{% extends "base.html" %} +{% block main %} +
+ {% csrf_token %} + {{form.as_p}} + + +{% endblock %} +{% block results %} +{% endblock %} diff --git a/analysis/templates/analysis/training_data.html b/analysis/templates/analysis/training_data.html index 532e5ae..17d3b46 100644 --- a/analysis/templates/analysis/training_data.html +++ b/analysis/templates/analysis/training_data.html @@ -2,7 +2,7 @@ {% block main %}

{{object.name}}

-Total rating +Total rating One column rating Find similar Tokenize diff --git a/analysis/urls.py b/analysis/urls.py index ee77f91..bae654d 100644 --- a/analysis/urls.py +++ b/analysis/urls.py @@ -22,4 +22,5 @@ path('clear-training-data/', views.clear_training_data, name="clear_training_data"), path('clear-text-token/', views.clear_text_token, name="clear_text_token"), path('tokenize/', views.TokenizeView.as_view(), name="tokenize"), + path('total-rating-form//', views.TotalRatingFormView.as_view(), name="total_rating_form"), ] diff --git a/analysis/views.py b/analysis/views.py index 356a406..53cf9af 100644 --- a/analysis/views.py +++ b/analysis/views.py @@ -3,6 +3,7 @@ """ import cProfile import os +import numpy as np from django.http import HttpResponseRedirect from django.shortcuts import get_object_or_404, render @@ -20,6 +21,8 @@ example_frequency_analysis, load_training_data, ) +from core.core_functions import tokenize_vector, matrix_to_list, find_similar_vector, reshape_results_vector, compare, \ + calculate_total_rating from .forms import ( OneTextForm, TwoTextForm, @@ -324,4 +327,48 @@ def form_valid(self, form): Token.objects.bulk_create(all_tokens, ignore_conflicts=True) # profiler.disable() + return super().form_valid(form) + + +class TotalRatingFormView(FormView): + form_class = FindSimilarParamsForm + template_name = 'analysis/total_rating_form.html' + success_url = reverse_lazy('analysis:result_list') + + def dispatch(self, request, *args, **kwargs): + pk = kwargs['pk'] + self.object = get_object_or_404(TrainingData, pk=pk) + return super().dispatch(request, *args, **kwargs) + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + context['object'] = self.object + return context + + def form_valid(self, form): + # Get cleaned data from FindSimilarForm + data = form.cleaned_data + + language = data['language'] + remove_stopwords = data['remove_stopwords'] + + # Get or create TextToken model + dataframe = self.object.get_dataframe + arr = dataframe.to_numpy() + training_data = np.asmatrix(arr) + + training_data = tokenize_vector(training_data) + texts = matrix_to_list(training_data) + similars = find_similar_vector(text_to_check=training_data, texts=texts, count=len(texts)) + results = reshape_results_vector(results=similars, shape=training_data.shape) + + report = compare(results, training_data, 1) + + print(report) + + total_rating = calculate_total_rating(report) + print('TOTAL:', total_rating) + + # save results to the database + # CheckResult.save_result(text_token, result) return super().form_valid(form) \ No newline at end of file diff --git a/core/core_functions.py b/core/core_functions.py index fd7019b..497de7e 100644 --- a/core/core_functions.py +++ b/core/core_functions.py @@ -20,6 +20,8 @@ def str_to_token_text(text: str) -> TokenText: :param text: some str text :return: TokenText with tokens """ + # if text is None: + # return return TokenText(text) @@ -27,7 +29,9 @@ def str_to_token_text(text: str) -> TokenText: def matrix_to_one_line(matrix: np.matrix) -> np.ndarray: - return np.array(matrix).reshape(-1, ) + line = np.array(matrix).reshape(-1, ) + # line = line[line != np.array(None)] + return line def matrix_to_list(matrix: np.matrix) -> list: @@ -36,9 +40,16 @@ def matrix_to_list(matrix: np.matrix) -> list: :param matrix: matrix with data :return: list of all matrix values """ + return list(matrix_to_one_line(matrix)) +def find_similar_or_none(text_to_check, texts, language="english", count=5, dictionary=None, keywords=None): + if text_to_check is None: + return + return find_similar(text_to_check, texts, language, count, dictionary, keywords) + + find_similar_vector = np.vectorize(find_similar, otypes=[TokenText], excluded=[ 'texts', 'language', @@ -79,7 +90,7 @@ def calc_percent(similar_count, column_count): def compare(results_matrix: np.matrix, training_data_matrix: np.matrix, count: int = 1) -> np.matrix: - result = np.empty(training_data_matrix.shape, dtype=np.int16) + result = np.empty(training_data_matrix.shape, dtype=np.float16) row_count, col_count = training_data_matrix.shape for i in range(row_count): @@ -91,3 +102,7 @@ def compare(results_matrix: np.matrix, training_data_matrix: np.matrix, count: i percent = calc_percent(similar_count, col_count) result[i, j] = percent return np.asmatrix(result) + + +def calculate_total_rating(percent_results: np.matrix): + return percent_results.mean() diff --git a/core/tests/test_numpy.py b/core/tests/test_numpy.py index ea64759..3cb4b64 100644 --- a/core/tests/test_numpy.py +++ b/core/tests/test_numpy.py @@ -5,6 +5,13 @@ class NumpyTestCase(SimpleTestCase): + # def test_null_values(self): + # arr = np.array([None, 1.0]) + # self.assertIsInstance(arr, np.ndarray) + # elem = arr[0] + # print('ELEM', elem) + # self.assertIsInstance(elem, int) + def test_matrix_updates(self): # data = [ @@ -74,15 +81,15 @@ def reshape_list(e: np.ndarray) -> np.ndarray: similar_shaped_matrix = reshape_list_vectorize(similars_matrix) #print('STR MATRIX') - print(data_matrix) + #print(data_matrix) #print('TokenText MATRIX') - print(token_text_matrix) + #print(token_text_matrix) #print('Similars MATRIX') - print(similars_matrix) + #print(similars_matrix) #print('cos MATRIX') - print(cos_matrix) + #print(cos_matrix) #print('reshaped MATRIX') - print(similar_shaped_matrix) + #print(similar_shaped_matrix) first = similar_shaped_matrix[0, 0] self.assertIsInstance(first, np.matrix) @@ -95,15 +102,15 @@ def token_to_text(e: TokenText) -> str: first_texts = token_to_text(first) #print('BEGIN') - print(data_matrix) + #print(data_matrix) #print('FIRST') - print(first_texts) + #print(first_texts) second = similar_shaped_matrix[1, 0] second_texts = token_to_text(second) #print('SECOND') - print(second_texts) + #print(second_texts) # self.assertTrue(np.array_equal(first_texts, data_matrix)) diff --git a/core/tests/tests_core_functions.py b/core/tests/tests_core_functions.py index bc529b3..d1b6885 100644 --- a/core/tests/tests_core_functions.py +++ b/core/tests/tests_core_functions.py @@ -3,6 +3,8 @@ from find_similar import TokenText def eq(self, other): + if other is None: + return False return self.text == other.text # def lt(self, other): @@ -20,7 +22,7 @@ def eq(self, other): reshape_results, reshape_results_vector, get_matrix_head, - compare, + compare, calculate_total_rating, ) @@ -41,6 +43,12 @@ def setUp(self): ['4', '2 2'], ] + self.with_empty_values = [ + ['1', None, '1 1'], + ['2', '3', None], + ['4', '2 2', '4 4'], + ] + def test_to_matrix(self): params = [ { @@ -55,6 +63,10 @@ def test_to_matrix(self): 'data': self.two_two, 'shape': (2, 2) }, + # { + # 'data': self.with_empty_values, + # 'shape': (3, 3) + # } ] for param in params: matrix = to_matrix(param['data']) @@ -66,6 +78,9 @@ def test_str_to_token_text(self): self.assertIsInstance(token_text, TokenText) self.assertEqual(len(token_text.tokens), 2) + # token_text = str_to_token_text(None) + # self.assertIsNone(token_text) + def test_tokenize_matrix(self): params = [ { @@ -77,6 +92,9 @@ def test_tokenize_matrix(self): { 'data': self.two_two, }, + # { + # 'data': self.with_empty_values, + # }, ] for param in params: old = to_matrix(param['data']) @@ -100,6 +118,11 @@ def test_matrix_to_list(self): 'data': self.two_two, 'value': ['one 1984', '1984', 'two 50', '50'], }, + # { + # 'data': self.with_empty_values, + # # 'value': ['1', None, '1 1', '2', '3', None, '4', '2 2', '4 4'], + # 'value': ['1', '1 1', '2', '3', '4', '2 2', '4 4'], + # }, ] for param in params: old = to_matrix(param['data']) @@ -107,7 +130,7 @@ def test_matrix_to_list(self): self.assertIsInstance(new, list) x, y = old.shape count = x * y - self.assertEqual(len(new), count) + # self.assertEqual(len(new), count) self.assertEqual(new, param['value']) def test_find_similar_vector(self): @@ -121,14 +144,19 @@ def test_find_similar_vector(self): { 'data': self.two_two, }, + # { + # 'data': self.with_empty_values, + # }, ] for param in params: old = to_matrix(param['data']) + old = tokenize_vector(old) texts = matrix_to_list(old) new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) + self.assertIsInstance(new, np.matrix) self.assertIsInstance(new[0, 0], list) - self.assertEqual(new[0, 0][0].text, old[0, 0]) + self.assertEqual(new[0, 0][0].text, old[0, 0].text) self.assertEqual(new.shape, old.shape) def test_reshape_results(self): @@ -154,6 +182,16 @@ def test_reshape_results(self): ] ), }, + # { + # 'data': self.with_empty_values, + # 'expected': np.matrix( + # [ + # ['1', None, '1 1'], + # ['2', '3', None], + # ['4', '2 2', '4 4'], + # ] + # ), + # }, ] for param in params: old = to_matrix(param['data']) @@ -251,6 +289,9 @@ def test_compare(self): self.assertEqual(report[(0, 0)], 100) + total_rating = calculate_total_rating(report) + self.assertEqual(total_rating, 100) + # Bad finding training_data = to_matrix(self.not_exact) training_data = tokenize_vector(training_data) @@ -259,13 +300,21 @@ def test_compare(self): results = reshape_results_vector(results=similars, shape=training_data.shape) report = compare(results, training_data, 1) - self.assertEqual(report[(1, 1)], 0) + total_rating = calculate_total_rating(report) + self.assertTrue(total_rating > 33 and total_rating < 34) + # Here we can check several lines report = compare(results, training_data, 2) self.assertEqual(report[(1, 1)], 100) self.assertEqual(report[(2, 1)], 0) + total_rating = calculate_total_rating(report) + self.assertTrue(total_rating > 49 and total_rating < 51) + report = compare(results, training_data, 3) self.assertEqual(report[(2, 1)], 100) + + total_rating = calculate_total_rating(report) + self.assertEqual(total_rating, 100) From 3eb19e835901e6625286cd0aef7df9081da15307 Mon Sep 17 00:00:00 2001 From: quillcraftsman Date: Wed, 8 Nov 2023 20:28:05 +0400 Subject: [PATCH 11/17] run middle data in 60 seconds without duplicates --- analysis/functions.py | 4 ++++ .../templates/analysis/training_data.html | 20 +++++++++++++++++++ analysis/views.py | 4 ++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/analysis/functions.py b/analysis/functions.py index 964bdef..b7a4a48 100644 --- a/analysis/functions.py +++ b/analysis/functions.py @@ -86,6 +86,10 @@ def example_frequency_analysis(example): @Printer(title=lambda name, filepath, sheet_name=0, **kwargs: f'Loading data from "{filepath}"...') def load_training_data(name, filepath, sheet_name=0): dataframe = load_from_excel(filepath, sheet_name) + + # remove Null values + dataframe = dataframe.dropna() + # TrainingData training_data = TrainingData.objects.create(name=name, data=dataframe.to_json()) return training_data diff --git a/analysis/templates/analysis/training_data.html b/analysis/templates/analysis/training_data.html index 17d3b46..70499e1 100644 --- a/analysis/templates/analysis/training_data.html +++ b/analysis/templates/analysis/training_data.html @@ -2,11 +2,31 @@ {% block main %}

{{object.name}}

+
+ + + + + + + + + + + +
NameColumns countRows count
+ {{object.name}} + + {{object.columns_count}} + + {{object.rows_count}} +
Total rating One column rating Find similar Tokenize Delete + {% endblock %} {% block results %} {% with object.display_dataframe as data %} diff --git a/analysis/views.py b/analysis/views.py index 53cf9af..c5b78e2 100644 --- a/analysis/views.py +++ b/analysis/views.py @@ -362,9 +362,9 @@ def form_valid(self, form): similars = find_similar_vector(text_to_check=training_data, texts=texts, count=len(texts)) results = reshape_results_vector(results=similars, shape=training_data.shape) - report = compare(results, training_data, 1) + report = compare(results, training_data, 2) - print(report) + # print(report) total_rating = calculate_total_rating(report) print('TOTAL:', total_rating) From ed320fcc4b3c1f34d9692c576f64b1a8078cf747 Mon Sep 17 00:00:00 2001 From: quillcraftsman Date: Thu, 9 Nov 2023 14:22:10 +0400 Subject: [PATCH 12/17] beautify examples --- analysis/functions.py | 48 +----- analysis/tests/test_urls.py | 4 - analysis/tests/tests_views.py | 120 ------------- analysis/urls.py | 2 +- analysis/views.py | 87 +++++----- core/templates/core/index.html | 17 +- core/tests/tests_views.py | 2 +- examples/__init__.py | 0 examples/admin.py | 3 + examples/apps.py | 6 + examples/forms.py | 10 ++ examples/functions.py | 12 ++ examples/migrations/__init__.py | 0 examples/models.py | 3 + .../examples}/example_frequency.html | 5 + examples/templates/examples/list.html | 20 +++ examples/tests/__init__.py | 0 examples/tests/tests_urls.py | 34 ++++ examples/tests/tests_views.py | 161 ++++++++++++++++++ examples/urls.py | 12 ++ examples/views.py | 58 +++++++ laboratory/settings.py | 1 + laboratory/urls.py | 1 + templates/base.html | 17 +- utils/__init__.py | 0 utils/admin.py | 3 + utils/apps.py | 6 + utils/decorators.py | 45 +++++ utils/migrations/__init__.py | 0 utils/models.py | 3 + utils/tests.py | 3 + utils/views.py | 3 + 32 files changed, 469 insertions(+), 217 deletions(-) create mode 100644 examples/__init__.py create mode 100644 examples/admin.py create mode 100644 examples/apps.py create mode 100644 examples/forms.py create mode 100644 examples/functions.py create mode 100644 examples/migrations/__init__.py create mode 100644 examples/models.py rename {analysis/templates/analysis => examples/templates/examples}/example_frequency.html (77%) create mode 100644 examples/templates/examples/list.html create mode 100644 examples/tests/__init__.py create mode 100644 examples/tests/tests_urls.py create mode 100644 examples/tests/tests_views.py create mode 100644 examples/urls.py create mode 100644 examples/views.py create mode 100644 utils/__init__.py create mode 100644 utils/admin.py create mode 100644 utils/apps.py create mode 100644 utils/decorators.py create mode 100644 utils/migrations/__init__.py create mode 100644 utils/models.py create mode 100644 utils/tests.py create mode 100644 utils/views.py diff --git a/analysis/functions.py b/analysis/functions.py index b7a4a48..8358665 100644 --- a/analysis/functions.py +++ b/analysis/functions.py @@ -4,53 +4,7 @@ from django.conf import settings from .loaders import load_from_excel from .models import TrainingData, to_list - - -class Printer: - """ - This class decorator save results to some place (default print its) - """ - - def __init__(self, title=None, printer=print): - """ - Init - :title: callback with title -> title() - :printer: print function (default print) - """ - self.title = title - self.printer = printer - - def __call__(self, func): - """ - Make decorator - :func: decorated function - """ - def inner(*args, **kwargs): - """ - New function - """ - printer = kwargs.get('printer', self.printer) - - if 'printer' in kwargs: - is_delete_printer = True - if 'is_pass_printer' in kwargs: - if kwargs['is_pass_printer']: - is_delete_printer = False - del kwargs['is_pass_printer'] - - if is_delete_printer: - del kwargs['printer'] - - printer('Start') - if self.title is not None: - printer(self.title(*args, **kwargs)) - result = func(*args, **kwargs) - printer('Done:') - printer(result) - printer('End') - return result - - return inner +from utils.decorators import Printer @Printer(title=lambda item, **kwargs: f'Get tokens for {item}...') diff --git a/analysis/tests/test_urls.py b/analysis/tests/test_urls.py index b3ad948..b335cbd 100644 --- a/analysis/tests/test_urls.py +++ b/analysis/tests/test_urls.py @@ -28,10 +28,6 @@ def test_reverse(self): 'url': 'compare_two', 'reverse': 'compare-two/', }, - { - 'url': 'example_frequency', - 'reverse': 'example-frequency/', - }, { 'url': 'load_training_data', 'reverse': 'load-training-data/', diff --git a/analysis/tests/tests_views.py b/analysis/tests/tests_views.py index d5721b1..462c190 100644 --- a/analysis/tests/tests_views.py +++ b/analysis/tests/tests_views.py @@ -184,126 +184,6 @@ def test_post(self): self.assertTrueResponse(current_response, true_response) -class TestExampleFrequencyView(SimpleTestCase): - """ - Test Example Frequency View - """ - - def setUp(self): - """ - SetUp Test Data - """ - self.text = 'mock' - self.url = reverse('analysis:example_frequency') - self.result = (('mock', 2), ('example', 2), - ('for', 2), ('tests', 2), ('this', 1), ('is', 1)) - expected_url_params = [] - for key, value in self.result: - expected_url_params.append(f'{key}={value}') - self.expected_url_params = f'?text={self.text}&{"&".join(expected_url_params)}' - self.redirect_url=f'{self.url}{self.expected_url_params}' - - def test_get(self): - """ - Test get - """ - request = Request( - url=self.url - ) - true_response = TrueResponse( - status_code=200, - context=Context( - keys=['form'], - types={'form': OneTextForm}, - ), - content_values=FORM_CONTENT_VALUES - ) - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - request = Request( - url=self.redirect_url - ) - - content_values = [self.text] - for key, value in self.result: - content_values.append(key) - content_values.append(value) - - true_response = TrueResponse( - status_code=200, - context=Context( - items={ - 'text': self.text, - 'result': self.result, - } - ), - content_values=content_values - ) - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - # Error - request = Request( - url=f'{self.url}?text={self.text}&error=some error' - ) - - true_response = TrueResponse( - status_code=200, - context=Context( - items={ - 'text': self.text, - 'error': 'some error', - } - ), - content_values=[ - 'Some Error' - ] - ) - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - def test_post(self): - """ - Test post - """ - data = { - 'text': self.text - } - request = Request( - url=self.url, - method=POST, - data=data, - ) - - true_response = TrueResponse( - status_code=302, - redirect_url=self.redirect_url - ) - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - def test_post_error_example(self): - """ - Test post with error example - """ - data = { - 'text': 'unknown example value' - } - request = Request( - url=self.url, - method=POST, - data=data, - ) - - true_response = TrueResponse( - status_code=302, - redirect_url=f'{self.url}?text=unknown example value&error=example not found' - ) - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - class LoadTrainingDataViewTestCase(TestCase): def setUp(self): diff --git a/analysis/urls.py b/analysis/urls.py index bae654d..ac55c27 100644 --- a/analysis/urls.py +++ b/analysis/urls.py @@ -9,7 +9,7 @@ urlpatterns = [ path('tokenize-one/', views.TokenizeOneView.as_view(), name="tokenize_one"), path('compare-two/', views.CompareTwoView.as_view(), name="compare_two"), - path('example-frequency/', views.ExampleFrequencyAnalysis.as_view(), name="example_frequency"), + # path('example-frequency/', views.ExampleFrequencyAnalysis.as_view(), name="example_frequency"), path('load-training-data/', views.LoadTrainingDataView.as_view(), name="load_training_data"), path('training-data//', views.TrainingDataDetailView.as_view(), name="training_data"), path('find-similar//', views.FindSimilarFormView.as_view(), name="find_similar"), diff --git a/analysis/views.py b/analysis/views.py index c5b78e2..b5e4f19 100644 --- a/analysis/views.py +++ b/analysis/views.py @@ -4,6 +4,7 @@ import cProfile import os import numpy as np +import pandas as pd from django.http import HttpResponseRedirect from django.shortcuts import get_object_or_404, render @@ -94,47 +95,47 @@ def get_context_data(self, **kwargs): return context -class ExampleFrequencyAnalysis(FormView): - """ - Example Frequency Analysis - """ - form_class = OneTextForm - template_name = 'analysis/example_frequency.html' - - def form_valid(self, form): - self.text = form.cleaned_data['text'] - try: - self.result = example_frequency_analysis(self.text) - self.error = None - except FileNotFoundError: - self.error = 'example not found' - return super().form_valid(form) - - def get_context_data(self, **kwargs): - context = super().get_context_data() - data = self.request.GET.dict() - text = data.pop('text', '') - context['text'] = text - error = data.get('error', None) - if error: - context['error'] = error - else: - result = [] - for key, value in data.items(): - result.append((key, int(value))) - context['result'] = tuple(result) - return context - - def get_success_url(self): - if self.error: - url = f'{reverse("analysis:example_frequency")}?text={self.text}&error={self.error}' - else: - url_params = [] - for key, value in self.result: - url_params.append(f'{key}={value}') - url_params = f'?text={self.text}&{"&".join(url_params)}' - url = f'{reverse("analysis:example_frequency")}{url_params}' - return url +# class ExampleFrequencyAnalysis(FormView): +# """ +# Example Frequency Analysis +# """ +# form_class = OneTextForm +# template_name = 'analysis/example_frequency.html' +# +# def form_valid(self, form): +# self.text = form.cleaned_data['text'] +# try: +# self.result = example_frequency_analysis(self.text) +# self.error = None +# except FileNotFoundError: +# self.error = 'example not found' +# return super().form_valid(form) +# +# def get_context_data(self, **kwargs): +# context = super().get_context_data() +# data = self.request.GET.dict() +# text = data.pop('text', '') +# context['text'] = text +# error = data.get('error', None) +# if error: +# context['error'] = error +# else: +# result = [] +# for key, value in data.items(): +# result.append((key, int(value))) +# context['result'] = tuple(result) +# return context +# +# def get_success_url(self): +# if self.error: +# url = f'{reverse("analysis:example_frequency")}?text={self.text}&error={self.error}' +# else: +# url_params = [] +# for key, value in self.result: +# url_params.append(f'{key}={value}') +# url_params = f'?text={self.text}&{"&".join(url_params)}' +# url = f'{reverse("analysis:example_frequency")}{url_params}' +# return url class LoadTrainingDataView(FormView): @@ -362,9 +363,11 @@ def form_valid(self, form): similars = find_similar_vector(text_to_check=training_data, texts=texts, count=len(texts)) results = reshape_results_vector(results=similars, shape=training_data.shape) - report = compare(results, training_data, 2) + report = compare(results, training_data, 1) # print(report) + report_df = pd.DataFrame(report) + # report_df.to_feather('save.feather') total_rating = calculate_total_rating(report) print('TOTAL:', total_rating) diff --git a/core/templates/core/index.html b/core/templates/core/index.html index e155035..21a3fb8 100644 --- a/core/templates/core/index.html +++ b/core/templates/core/index.html @@ -1,6 +1,21 @@ {% extends "base.html" %} {% block main %} -

Still just main Page...

+

FindSimilar Laboratory

+

Examples

+ {% endblock %} {% block results %} ... diff --git a/core/tests/tests_views.py b/core/tests/tests_views.py index eb7ef2f..410ef01 100644 --- a/core/tests/tests_views.py +++ b/core/tests/tests_views.py @@ -31,7 +31,7 @@ def test_view(self): true_response = TrueResponse( status_code=200, content_values=[ - '

Still just main Page...

' + '

FindSimilar Laboratory

' ] ) current_response = request.get_response(self.client) diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/admin.py b/examples/admin.py new file mode 100644 index 0000000..8c38f3f --- /dev/null +++ b/examples/admin.py @@ -0,0 +1,3 @@ +from django.contrib import admin + +# Register your models here. diff --git a/examples/apps.py b/examples/apps.py new file mode 100644 index 0000000..d6f5703 --- /dev/null +++ b/examples/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class ExamplesConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'examples' diff --git a/examples/forms.py b/examples/forms.py new file mode 100644 index 0000000..88fb6d5 --- /dev/null +++ b/examples/forms.py @@ -0,0 +1,10 @@ +from django import forms + + +class OneTextForm(forms.Form): + """ + Form with one text + """ + text = forms.CharField(max_length=128, widget=forms.TextInput(attrs={ + 'class': 'form-control' + })) \ No newline at end of file diff --git a/examples/functions.py b/examples/functions.py new file mode 100644 index 0000000..c72ba55 --- /dev/null +++ b/examples/functions.py @@ -0,0 +1,12 @@ +from find_similar.examples.analyze import frequency_analysis +from utils.decorators import Printer + + +@Printer(title=lambda example, **kwargs: f'Analyze "{example}"...') +def example_frequency_analysis(example): + """ + Example Frequency analysis + :example: Example name + """ + result = frequency_analysis(example) + return result \ No newline at end of file diff --git a/examples/migrations/__init__.py b/examples/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/models.py b/examples/models.py new file mode 100644 index 0000000..71a8362 --- /dev/null +++ b/examples/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/analysis/templates/analysis/example_frequency.html b/examples/templates/examples/example_frequency.html similarity index 77% rename from analysis/templates/analysis/example_frequency.html rename to examples/templates/examples/example_frequency.html index 10c7d13..08d43bb 100644 --- a/analysis/templates/analysis/example_frequency.html +++ b/examples/templates/examples/example_frequency.html @@ -1,5 +1,10 @@ {% extends "base.html" %} {% block main %} + + + Example list + +

{% csrf_token %} {{form.as_p}} diff --git a/examples/templates/examples/list.html b/examples/templates/examples/list.html new file mode 100644 index 0000000..df5a45b --- /dev/null +++ b/examples/templates/examples/list.html @@ -0,0 +1,20 @@ +{% extends "base.html" %} +{% block main %} +

FindSimilar examples

+ + + Example frequency + +{% endblock %} +{% block results %} + +
    + {% for example in object_list %} +
  • + {{example}} +
  • + {% endfor %} +
+ + +{% endblock %} diff --git a/examples/tests/__init__.py b/examples/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/tests/tests_urls.py b/examples/tests/tests_urls.py new file mode 100644 index 0000000..3629fcc --- /dev/null +++ b/examples/tests/tests_urls.py @@ -0,0 +1,34 @@ +""" +Test urls module +""" +from django.test import SimpleTestCase +from django.urls import reverse + + +class TestUrlsSimpleTestCase(SimpleTestCase): + """ + Test Urls Class + """ + + def test_reverse(self): + """ + Test correct reverse + """ + app_name = 'examples' + urls = [ + { + 'url': 'example_frequency', + 'reverse': 'example-frequency/', + }, + { + 'url': 'example_list', + 'reverse': 'list/', + }, + ] + for url in urls: + app_url = f'{app_name}:{url["url"]}' + current_reverse = reverse(app_url) + true_reverse = f'/{app_name}/{url["reverse"]}' + with self.subTest(msg=app_url): + self.assertEqual(current_reverse, true_reverse) + diff --git a/examples/tests/tests_views.py b/examples/tests/tests_views.py new file mode 100644 index 0000000..fde0dd0 --- /dev/null +++ b/examples/tests/tests_views.py @@ -0,0 +1,161 @@ +from django.urls import reverse +from dry_tests import SimpleTestCase, Request, TrueResponse, POST, Context, ContentValue +from examples.forms import OneTextForm +from find_similar.examples import examples_set + +FORM_CONTENT_VALUES = [ + ContentValue( + value='', + count=1, + ), + ContentValue( + value='
', + count=1, + ), + ] + + +class TestExampleFrequencyView(SimpleTestCase): + """ + Test Example Frequency View + """ + + def setUp(self): + """ + SetUp Test Data + """ + self.text = 'mock' + self.url = reverse('examples:example_frequency') + self.result = (('mock', 2), ('example', 2), + ('for', 2), ('tests', 2), ('this', 1), ('is', 1)) + expected_url_params = [] + for key, value in self.result: + expected_url_params.append(f'{key}={value}') + self.expected_url_params = f'?text={self.text}&{"&".join(expected_url_params)}' + self.redirect_url=f'{self.url}{self.expected_url_params}' + + def test_get(self): + """ + Test get + """ + request = Request( + url=self.url + ) + true_response = TrueResponse( + status_code=200, + context=Context( + keys=['form'], + types={'form': OneTextForm}, + ), + content_values=FORM_CONTENT_VALUES + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + request = Request( + url=self.redirect_url + ) + + content_values = [self.text] + for key, value in self.result: + content_values.append(key) + content_values.append(value) + + true_response = TrueResponse( + status_code=200, + context=Context( + items={ + 'text': self.text, + 'result': self.result, + } + ), + content_values=content_values + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + # Error + request = Request( + url=f'{self.url}?text={self.text}&error=some error' + ) + + true_response = TrueResponse( + status_code=200, + context=Context( + items={ + 'text': self.text, + 'error': 'some error', + } + ), + content_values=[ + 'Some Error' + ] + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + def test_post(self): + """ + Test post + """ + data = { + 'text': self.text + } + request = Request( + url=self.url, + method=POST, + data=data, + ) + + true_response = TrueResponse( + status_code=302, + redirect_url=self.redirect_url + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + def test_post_error_example(self): + """ + Test post with error example + """ + data = { + 'text': 'unknown example value' + } + request = Request( + url=self.url, + method=POST, + data=data, + ) + + true_response = TrueResponse( + status_code=302, + redirect_url=f'{self.url}?text=unknown example value&error=example not found' + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + +class ExampleListSimpleTestCase(SimpleTestCase): + + def setUp(self): + self.url = reverse('examples:example_list') + + def test_get(self): + request = Request( + url=self.url, + ) + + examples = examples_set() + + true_response = TrueResponse( + status_code=200, + context=Context( + # keys=['object_list'] + items={ + 'object_list': examples + } + ) + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) diff --git a/examples/urls.py b/examples/urls.py new file mode 100644 index 0000000..4f89772 --- /dev/null +++ b/examples/urls.py @@ -0,0 +1,12 @@ +""" +Analysis app urls +""" +from django.urls import path +from . import views + +app_name = 'examples' + +urlpatterns = [ + path('example-frequency/', views.ExampleFrequencyAnalysis.as_view(), name="example_frequency"), + path('list/', views.ExampleList.as_view(), name="example_list"), +] diff --git a/examples/views.py b/examples/views.py new file mode 100644 index 0000000..b90441f --- /dev/null +++ b/examples/views.py @@ -0,0 +1,58 @@ +from django.views.generic import FormView, TemplateView +from django.urls import reverse +from .forms import OneTextForm +from .functions import example_frequency_analysis +from find_similar.examples import examples_set + + +class ExampleFrequencyAnalysis(FormView): + """ + Example Frequency Analysis + """ + form_class = OneTextForm + template_name = 'examples/example_frequency.html' + + def form_valid(self, form): + self.text = form.cleaned_data['text'] + try: + self.result = example_frequency_analysis(self.text) + self.error = None + except FileNotFoundError: + self.error = 'example not found' + return super().form_valid(form) + + def get_context_data(self, **kwargs): + context = super().get_context_data() + data = self.request.GET.dict() + text = data.pop('text', '') + context['text'] = text + error = data.get('error', None) + if error: + context['error'] = error + else: + result = [] + for key, value in data.items(): + result.append((key, int(value))) + context['result'] = tuple(result) + return context + + def get_success_url(self): + reverse_url = reverse("examples:example_frequency") + if self.error: + url = f'{reverse_url}?text={self.text}&error={self.error}' + else: + url_params = [] + for key, value in self.result: + url_params.append(f'{key}={value}') + url_params = f'?text={self.text}&{"&".join(url_params)}' + url = f'{reverse_url}{url_params}' + return url + + +class ExampleList(TemplateView): + template_name = 'examples/list.html' + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + context['object_list'] = examples_set() + return context diff --git a/laboratory/settings.py b/laboratory/settings.py index 72ad43b..f31b6c3 100644 --- a/laboratory/settings.py +++ b/laboratory/settings.py @@ -54,6 +54,7 @@ 'django_find_similar', "debug_toolbar", # My + 'examples', 'core', 'analysis', ] diff --git a/laboratory/urls.py b/laboratory/urls.py index b89d509..87710d7 100644 --- a/laboratory/urls.py +++ b/laboratory/urls.py @@ -6,6 +6,7 @@ urlpatterns = [ path("admin/", admin.site.urls), + path('examples/', include('examples.urls')), path('', include('core.urls')), path('analysis/', include('analysis.urls')), path("__debug__/", include("debug_toolbar.urls")), diff --git a/templates/base.html b/templates/base.html index 8c40e93..72ff4c6 100644 --- a/templates/base.html +++ b/templates/base.html @@ -68,8 +68,23 @@ + +
  • + + + TextToken list + +
  • +
  • + + + Tokenize + +
  • +
  • + + + Find Similar + +
  • @@ -92,5 +108,35 @@

    Admin

    {% endblock %} {% block results %} - ... +
    +
    +
    +
    +

    Proximity results

    + +
    +
    + +
    +
    +

    Text results

    + +
    +
    +
    +
    {% endblock %} diff --git a/analysis/templates/analysis/load_data.html b/core/templates/core/load_data.html similarity index 100% rename from analysis/templates/analysis/load_data.html rename to core/templates/core/load_data.html diff --git a/analysis/templates/analysis/training_data.html b/core/templates/core/training_data.html similarity index 71% rename from analysis/templates/analysis/training_data.html rename to core/templates/core/training_data.html index 70499e1..26b1dbf 100644 --- a/analysis/templates/analysis/training_data.html +++ b/core/templates/core/training_data.html @@ -10,7 +10,7 @@

    {{object.name}}

    - {{object.name}} + {{object.name}} {{object.columns_count}} @@ -22,10 +22,7 @@

    {{object.name}}

    Total rating -One column rating -Find similar -Tokenize -Delete +Delete {% endblock %} {% block results %} diff --git a/analysis/templates/analysis/training_data_delete_confirm.html b/core/templates/core/training_data_delete_confirm.html similarity index 100% rename from analysis/templates/analysis/training_data_delete_confirm.html rename to core/templates/core/training_data_delete_confirm.html diff --git a/analysis/templates/analysis/training_data_list.html b/core/templates/core/training_data_list.html similarity index 57% rename from analysis/templates/analysis/training_data_list.html rename to core/templates/core/training_data_list.html index 17e6e13..3f66a20 100644 --- a/analysis/templates/analysis/training_data_list.html +++ b/core/templates/core/training_data_list.html @@ -1,7 +1,7 @@ {% extends "base.html" %} {% block main %}

    Traning data list

    - Clear all training data + Clear all training data {% endblock %} {% block results %} @@ -17,7 +17,7 @@

    Traning data list

    {% for object in object_list %} - {{object.name}} + {{object.name}} {{object.columns_count}} @@ -29,16 +29,16 @@

    Traning data list

    {{object.update}} - Detail + Detail - Delete + Delete {% endfor %}

    -New +New {% endblock %} diff --git a/analysis/tests/data/2x2.xlsx b/core/tests/data/2x2.xlsx similarity index 100% rename from analysis/tests/data/2x2.xlsx rename to core/tests/data/2x2.xlsx diff --git a/analysis/tests/data/__init__.py b/core/tests/data/__init__.py similarity index 80% rename from analysis/tests/data/__init__.py rename to core/tests/data/__init__.py index 3ec01da..0d33720 100644 --- a/analysis/tests/data/__init__.py +++ b/core/tests/data/__init__.py @@ -2,11 +2,11 @@ import os import pandas as pd from django.conf import settings -from analysis.functions import load_training_data +from core.core_functions import load_training_data def get_2x2_filepath(): - filepath = os.path.join(settings.BASE_DIR, 'analysis', 'tests', 'data', '2x2.xlsx') + filepath = os.path.join(settings.BASE_DIR, 'core', 'tests', 'data', '2x2.xlsx') return filepath diff --git a/core/tests/test_urls.py b/core/tests/test_urls.py index 059604e..152aea2 100644 --- a/core/tests/test_urls.py +++ b/core/tests/test_urls.py @@ -1,9 +1,12 @@ """ Test urls module """ -from django.test import SimpleTestCase +from mixer.backend.django import mixer +from django.test import SimpleTestCase, TestCase from django.urls import reverse +from core.tests.data import get_2x2_training_data + class TestUrls(SimpleTestCase): """ @@ -20,12 +23,59 @@ def test_reverse(self): 'url': 'index', 'reverse': '' }, + { + 'url': 'load_training_data', + 'reverse': 'load-training-data/', + }, + { + 'url': 'training_data_list', + 'reverse': 'training-data-list/', + }, + { + 'url': 'clear_training_data', + 'reverse': 'clear-training-data/', + }, ] for url in urls: app_url = f'{app_name}:{url["url"]}' - print("waF") - print(app_url) current_reverse = reverse(app_url) - print(current_reverse) true_reverse = f'/{url["reverse"]}' self.assertEqual(current_reverse, true_reverse) + + +class TestUrlsTestCase(TestCase): + """ + Test Urls Class With DB + """ + + def test_reverse(self): + """ + Test correct reverse + """ + + training_data = get_2x2_training_data() + + app_name = 'core' + + urls = [ + { + 'url': 'training_data', + 'kwargs': { + 'pk': training_data.pk + }, + 'reverse': f'training-data/{training_data.pk}/', + }, + { + 'url': 'delete_training_data', + 'kwargs': { + 'pk': training_data.pk + }, + 'reverse': f'delete-training-data/{training_data.pk}/', + }, + ] + for url in urls: + app_url = f'{app_name}:{url["url"]}' + current_reverse = reverse(app_url, kwargs=url['kwargs']) + true_reverse = f'/{url["reverse"]}' + with self.subTest(msg=app_url): + self.assertEqual(current_reverse, true_reverse) \ No newline at end of file diff --git a/core/tests/tests_core_functions.py b/core/tests/tests_core_functions.py index d1b6885..9149c61 100644 --- a/core/tests/tests_core_functions.py +++ b/core/tests/tests_core_functions.py @@ -2,6 +2,10 @@ from django.test import SimpleTestCase from find_similar import TokenText +from core.models import TrainingData +from core.tests.data import get_2x2_filepath, get_2x2_expected_data + + def eq(self, other): if other is None: return False @@ -22,7 +26,7 @@ def eq(self, other): reshape_results, reshape_results_vector, get_matrix_head, - compare, calculate_total_rating, + compare, calculate_total_rating, load_training_data, ) @@ -318,3 +322,133 @@ def test_compare(self): total_rating = calculate_total_rating(report) self.assertEqual(total_rating, 100) + + +""" +Tests for Analysis functions +""" +from django.test import SimpleTestCase, TestCase + +from utils.decorators import Printer + +from analysis.functions import ( + analyze_one_item, + analyze_two_items, + # example_frequency_analysis, + # load_training_data, +) +# from analysis.tests.data import get_2x2_filepath, get_2x2_expected_data, get_2x2_training_data, Token +# from analysis.models import TrainingData + + +class TestingPrinter: + """ + Save prints to variable. To check the results + """ + + def __init__(self): + """ + Init printer + """ + self.results = [] + + def __call__(self, text, *args, **kwargs): + self.results.append(str(text)) + +class FunctionsSimpleTestCase(SimpleTestCase): + """ + Class for test all functions + """ + def setUp(self): + self.one = 'one' + self.two = 'two' + self.one_two = 'one two' + self.printer = print + + def mock_printer(*args, **kwargs): # pylint: disable=unused-argument + """ + This is mock printer. This printer do nothing + """ + + self.mock_printer = mock_printer + + self.testing_printer = TestingPrinter() + + def test_analyze_one_item(self): + """ + Test for analyze one item + """ + tokens = analyze_one_item( # pylint: disable=unexpected-keyword-arg + self.one_two, + printer=self.testing_printer + ) + expected_tokens = {self.one, self.two} + self.assertEqual(tokens, expected_tokens) + + def test_analyze_two_items(self): + """ + Test for analyze_two_items + """ + similar_cos = 1.0 + different_cos = 0 + self.assertEqual( + analyze_two_items( # pylint: disable=unexpected-keyword-arg + self.one, + self.one, + printer=self.mock_printer, + is_pass_printer=True, + ), + similar_cos + ) + self.assertEqual( + analyze_two_items( # pylint: disable=unexpected-keyword-arg + self.one, + self.two, + printer=self.testing_printer, + is_pass_printer=True, + ), + different_cos) + one_tokens = {self.one} + two_tokens = {self.two} + # prints + expected_prints = [ + 'Start', + f'Get cos between ' + f'"{self.one}" and "{self.two}"', + 'Start', + f'Get tokens for {self.one}...', + 'Done:', + f'{one_tokens}', + 'End', + 'Start', + f'Get tokens for {self.two}...', + 'Done:', + f'{two_tokens}', + 'End', + 'Done:', + f'{different_cos}', + 'End', + ] + self.assertEqual(self.testing_printer.results, expected_prints) + +class FunctionsTestCase(TestCase): + + def setUp(self): + self.testing_printer = TestingPrinter() + + def test_load_testing_data(self): + filepath = get_2x2_filepath() + expected = get_2x2_expected_data() + result = load_training_data('first', filepath, sheet_name=0, printer=self.testing_printer) + self.assertTrue(isinstance(result, TrainingData)) + self.assertTrue(expected.equals(result.get_dataframe)) + + # prints + expected_prints = [ + 'Start', + f'Loading data from "{filepath}"...', + 'Done:', + str(result), + 'End', + ] + self.assertEqual(self.testing_printer.results, expected_prints) \ No newline at end of file diff --git a/core/tests/tests_forms.py b/core/tests/tests_forms.py new file mode 100644 index 0000000..d063222 --- /dev/null +++ b/core/tests/tests_forms.py @@ -0,0 +1,33 @@ +""" +Tests for forms +""" +from django import forms +from dry_tests.testcases import SimpleTestCase +from dry_tests.models import Fields, TrueForm +from core.forms import ( + LoadTrainingDataForm, +) + + +class LoadTrainingDataFormSimpleTestCase(SimpleTestCase): + """ + Load traning data test + """ + + def test_fields(self): + """ + Test available fields + """ + true_form = TrueForm( + fields=Fields( + count=3, + types={ + 'name': forms.CharField, + 'excel_file': forms.FileField, + 'sheet_name': forms.IntegerField, + } + ) + ) + + current_form = LoadTrainingDataForm() + self.assertTrueForm(current_form, true_form) diff --git a/analysis/tests/tests_loaders.py b/core/tests/tests_loaders.py similarity index 79% rename from analysis/tests/tests_loaders.py rename to core/tests/tests_loaders.py index 6d3b10b..0daa191 100644 --- a/analysis/tests/tests_loaders.py +++ b/core/tests/tests_loaders.py @@ -1,12 +1,10 @@ """ Test load functions module """ -import os -from django.conf import settings import pandas as pd from django.test import SimpleTestCase -from analysis.loaders import load_from_excel -from analysis.tests.data import get_2x2_filepath, get_2x2_expected_data +from core.loaders import load_from_excel +from core.tests.data import get_2x2_filepath, get_2x2_expected_data class LoadersTestCase(SimpleTestCase): diff --git a/analysis/tests/tests_models.py b/core/tests/tests_models.py similarity index 92% rename from analysis/tests/tests_models.py rename to core/tests/tests_models.py index 6505c49..c0d24a0 100644 --- a/analysis/tests/tests_models.py +++ b/core/tests/tests_models.py @@ -1,7 +1,7 @@ import pandas as pd from django.test import TestCase, SimpleTestCase -from analysis.models import TrainingData, to_list -from analysis.tests.data import get_2x2_expected_data +from core.models import TrainingData, to_list +from core.tests.data import get_2x2_expected_data class TrainingDataTestCase(TestCase): diff --git a/core/tests/tests_views.py b/core/tests/tests_views.py index 410ef01..2e64a12 100644 --- a/core/tests/tests_views.py +++ b/core/tests/tests_views.py @@ -1,14 +1,34 @@ """ Tests for views """ +from mixer.backend.django import mixer +from django.core.files.uploadedfile import SimpleUploadedFile from django.urls import reverse from dry_tests import ( SimpleTestCase, + TestCase, Request, TrueResponse, + Context, + ContentValue, + POST, ) + +from core.tests.data import get_2x2_filepath, get_2x2_training_data +from core.forms import LoadTrainingDataForm +from core.models import TrainingData from core.urls import app_name +FORM_CONTENT_VALUES = [ + ContentValue( + value='
    ', + count=1, + ), + ContentValue( + value='
    ', + count=1, + ), + ] class TestIndexView(SimpleTestCase): """ @@ -31,8 +51,218 @@ def test_view(self): true_response = TrueResponse( status_code=200, content_values=[ - '

    FindSimilar Laboratory

    ' + 'Main' ] ) current_response = request.get_response(self.client) self.assertTrueResponse(current_response, true_response) + + +class LoadTrainingDataViewTestCase(TestCase): + + def setUp(self): + self.url = reverse('core:load_training_data') + + def test_get(self): + request = Request( + url=self.url, + ) + true_response = TrueResponse( + status_code=200, + context=Context( + keys=['form'], + types={ + 'form': LoadTrainingDataForm + }, + ), + content_values=[ + ContentValue( + value='
    ', + count=1, + ), + ContentValue( + value='
    ', + count=1, + ), + ], + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + def test_post(self): + filepath = get_2x2_filepath() + excel_file = SimpleUploadedFile(filepath, open(filepath, 'rb').read()) + name = 'first' + data = { + 'name': name, + 'excel_file': excel_file, + 'sheet_name': 0, + } + request = Request( + url=self.url, + method=POST, + data=data, + ) + true_response = TrueResponse( + status_code=302, + ) + + self.assertFalse(TrainingData.objects.filter(name=name).exists()) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + # true model has been created + + self.assertTrue(TrainingData.objects.filter(name=name).exists()) + + training_data = TrainingData.objects.get(name=name) + redirect_url = reverse('core:training_data', kwargs={'pk': training_data.pk}) + true_response = TrueResponse( + redirect_url=redirect_url, + ) + self.assertTrueResponse(current_response, true_response) + + +class TrainingDataDetailViewTestCase(TestCase): + + def setUp(self): + self.training_data = get_2x2_training_data() + self.url = reverse('core:training_data', kwargs={'pk': self.training_data.pk}) + + def test_get(self): + request = Request( + url=self.url, + ) + + content_values = [ + self.training_data.name, + ] + + dataframe = self.training_data.get_dataframe + + # add headers + columns = dataframe.columns + for column in columns: + content_values.append(column) + data_list = dataframe[column].values.tolist() + content_values += data_list + + true_response = TrueResponse( + status_code=200, + context=Context( + keys=['object'], + items={ + 'object': self.training_data + } + ), + content_values=content_values + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + +class TrainingDataListViewTestCase(TestCase): + + def setUp(self): + self.url = reverse('core:training_data_list') + self.training_data_list = [get_2x2_training_data('first'), get_2x2_training_data('second')] + + def test_get(self): + request = Request( + url=self.url + ) + true_response = TrueResponse( + status_code=200, + context=Context( + keys=['object_list'], + ), + content_values=[item.name for item in self.training_data_list] + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + self.assertQuerySetEqual(current_response.context['object_list'], self.training_data_list, ordered=False) + + +class TrainingDataDeleteView(TestCase): + + def setUp(self): + self.training_data = get_2x2_training_data() + self.url = reverse('core:delete_training_data', kwargs={'pk': self.training_data.pk}) + + def test_get(self): + request = Request( + url=self.url, + ) + + content_values = [ + self.training_data.name, + ] + + true_response = TrueResponse( + status_code=200, + context=Context( + keys=['object'], + items={ + 'object': self.training_data + } + ), + content_values=content_values + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + def test_post(self): + request = Request( + url=self.url, + method=POST, + ) + + true_response = TrueResponse( + status_code=302, + redirect_url=reverse('core:training_data_list') + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + +class ClearTrainingData(TestCase): + + def setUp(self): + self.url = reverse('core:clear_training_data') + + def test_get(self): + request = Request( + url=self.url, + ) + + true_response = TrueResponse( + status_code=200, + content_values=FORM_CONTENT_VALUES + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + def test_post(self): + request = Request( + url=self.url, + method=POST, + ) + + true_response = TrueResponse( + status_code=302, + redirect_url='/training-data-list/' + ) + + # db state before + mixer.cycle(2).blend(TrainingData, data={}) + self.assertTrue(TrainingData.objects.all().exists()) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + # db state after + self.assertFalse(TrainingData.objects.all().exists()) \ No newline at end of file diff --git a/core/urls.py b/core/urls.py index e133a86..2a97235 100644 --- a/core/urls.py +++ b/core/urls.py @@ -8,4 +8,9 @@ urlpatterns = [ path('', views.IndexView.as_view(), name="index"), + path('load-training-data/', views.LoadTrainingDataView.as_view(), name="load_training_data"), + path('training-data//', views.TrainingDataDetailView.as_view(), name="training_data"), + path('delete-training-data//', views.TrainingDataDeleteView.as_view(), name="delete_training_data"), + path('training-data-list/', views.TrainingDataListView.as_view(), name="training_data_list"), + path('clear-training-data/', views.clear_training_data, name="clear_training_data"), ] diff --git a/core/views.py b/core/views.py index 76aa628..fa170e4 100644 --- a/core/views.py +++ b/core/views.py @@ -1,7 +1,23 @@ """ Core package views """ -from django.views.generic import TemplateView +import os + +from django.http import HttpResponseRedirect +from django.shortcuts import render +from django.urls import reverse, reverse_lazy +from django.views.generic import ( + TemplateView, + ListView, + DeleteView, + DetailView, + FormView, +) +from django.conf import settings + +from core.core_functions import load_training_data +from core.forms import LoadTrainingDataForm +from core.models import TrainingData class IndexView(TemplateView): @@ -9,3 +25,51 @@ class IndexView(TemplateView): Main page view """ template_name = 'core/index.html' + + +class LoadTrainingDataView(FormView): + form_class = LoadTrainingDataForm + template_name = 'core/load_data.html' + + def handle_uploaded_file(self, f): + uploaded_path = os.path.join(settings.BASE_DIR, 'uploads', 'loaddata.xlsx') + with open(uploaded_path, 'wb+') as destination: + for chunk in f.chunks(): + destination.write(chunk) + return uploaded_path + + def form_valid(self, form): + data = form.cleaned_data + excel_file = form.cleaned_data['excel_file'] + uploaded_path = self.handle_uploaded_file(excel_file) + name = data['name'] + sheet_name = data.get('sheet_name', 0) + self.training_data = load_training_data(name=name, filepath=uploaded_path, sheet_name=sheet_name) + return super().form_valid(form) + + def get_success_url(self): + return reverse('core:training_data', kwargs={'pk': self.training_data.pk}) + + +class TrainingDataDetailView(DetailView): + model = TrainingData + template_name = 'core/training_data.html' + + +class TrainingDataListView(ListView): + model = TrainingData + template_name = 'core/training_data_list.html' + ordering = '-update' + + +class TrainingDataDeleteView(DeleteView): + model = TrainingData + template_name = 'core/training_data_delete_confirm.html' + success_url = reverse_lazy('core:training_data_list') + + +def clear_training_data(request): + if request.method == 'POST': + TrainingData.objects.all().delete() + return HttpResponseRedirect(reverse('core:training_data_list')) + return render(request, 'core/clear_data.html', context={'model_name': 'Training Data'}) \ No newline at end of file diff --git a/examples/management/commands/example_frequency_analysis.py b/examples/management/commands/example_frequency_analysis.py index 92b0f96..b5b1596 100644 --- a/examples/management/commands/example_frequency_analysis.py +++ b/examples/management/commands/example_frequency_analysis.py @@ -2,7 +2,7 @@ Command to analyze one example to frequency """ from django.core.management.base import BaseCommand -from analysis.functions import example_frequency_analysis +from examples.functions import example_frequency_analysis class Command(BaseCommand): diff --git a/examples/tests/tests_function.py b/examples/tests/tests_function.py new file mode 100644 index 0000000..2a5505b --- /dev/null +++ b/examples/tests/tests_function.py @@ -0,0 +1,141 @@ +""" +Tests for Analysis functions +""" +from django.test import SimpleTestCase + +from examples.functions import ( + example_frequency_analysis, +) +from utils.decorators import Printer + + +class TestingPrinter: + """ + Save prints to variable. To check the results + """ + + def __init__(self): + """ + Init printer + """ + self.results = [] + + def __call__(self, text, *args, **kwargs): + self.results.append(str(text)) + + +class FunctionsSimpleTestCase(SimpleTestCase): + """ + Class for test all functions + """ + def setUp(self): + self.one = 'one' + self.two = 'two' + self.one_two = 'one two' + self.printer = print + + def mock_printer(*args, **kwargs): # pylint: disable=unused-argument + """ + This is mock printer. This printer do nothing + """ + + self.mock_printer = mock_printer + + self.testing_printer = TestingPrinter() + + def test_printer_function_without_printer(self): + """ + Test printer when function hasn't got params + """ + @Printer(printer=self.testing_printer) + def some_func(): + """ + Do something usefull + """ + + result = some_func() + expected_prints = [ + 'Start', + 'Done:', + f'{result}', + 'End' + ] + self.assertEqual(self.testing_printer.results, expected_prints) + + def test_printer_function_with_printer_kwargs(self): + """ + Test printer when send printer dirrectly in function + """ + @Printer() + def some_func(printer=print): # pylint: disable=unused-argument + """ + Do something usefull + """ + + result = some_func(printer=self.testing_printer) + expected_prints = [ + 'Start', + 'Done:', + f'{result}', + 'End' + ] + self.assertEqual(self.testing_printer.results, expected_prints) + + def test_printer_simple_title(self): + """ + Test printer then we sent simple str title + """ + simple_title = 'Simple title' + + @Printer(title=lambda **kwargs: simple_title, printer=self.testing_printer) + def some_func(): + """ + Do something usefull + """ + + result = some_func() + expected_prints = [ + 'Start', + simple_title, + 'Done:', + f'{result}', + 'End' + ] + self.assertEqual(self.testing_printer.results, expected_prints) + + def test_printer_param_title(self): + """ + Test wen we sent title and function has a param + """ + @Printer(title=lambda param, **kwargs: f'Title {param}') + def some_func(param, printer=print): # pylint: disable=unused-argument + """ + Do something usefull + """ + + + result = some_func('A', printer=self.testing_printer) + expected_prints = [ + 'Start', + 'Title A', + 'Done:', + f'{result}', + 'End' + ] + self.assertEqual(self.testing_printer.results, expected_prints) + + def test_example_frequency_analysis(self): + """ + Test for example_frequency_analysis + """ + example_name = 'mock' + expected_result = (('mock', 2), + ('example', 2), + ('for', 2), + ('tests', 2), + ('this', 1), + ('is', 1)) + self.assertEqual(example_frequency_analysis( # pylint: disable=unexpected-keyword-arg + example_name, + printer=self.testing_printer + ), expected_result) \ No newline at end of file diff --git a/templates/base.html b/templates/base.html index 72ff4c6..1f31e91 100644 --- a/templates/base.html +++ b/templates/base.html @@ -49,44 +49,23 @@