diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..aca42ce --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,21 @@ +name: Lint + +on: + push: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.10' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + - name: Analysing the code with pylint + run: | + make lint diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 9e2d396..00c82b6 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -7,13 +7,15 @@ on: jobs: tests: runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ "3.10", "3.11", '3.12' ] steps: - - name: Check out code - uses: actions/checkout@v3 - - name: Set up Python + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v3 with: - python-version: "3.10" + python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.gitignore b/.gitignore index b0b6f3a..55cdd1c 100644 --- a/.gitignore +++ b/.gitignore @@ -124,6 +124,7 @@ celerybeat.pid .venv env/ venv/ +venv10/ ENV/ env.bak/ venv.bak/ @@ -157,4 +158,7 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -.idea/ \ No newline at end of file +.idea/ + +# uploads folder +uploads/loaddata.xlsx \ No newline at end of file diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..7fb8960 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,16 @@ +[MASTER] +disable= + R0801, # Too few public methods + E1101, # Dynamic attributes + W0511, # Used when a warning note + C0103, # Pylint doesn't like id attribute + W0201, # Attribute defined outside init method + R0903, # Too few public methods + C0114, # Module docstring + C0115, # Class docstring + C0116, # Method docstring + E1123, # Unexpected keyword argument + R0901, # Too many entities in file + +ignore-paths=.*/migrations + diff --git a/CHECKLIST.md b/CHECKLIST.md index bae6f29..b036b36 100644 --- a/CHECKLIST.md +++ b/CHECKLIST.md @@ -29,10 +29,10 @@ - [x] [Support](https://github.com/quillcraftsman/open-source-checklist#support) [CI and CD](https://github.com/quillcraftsman/open-source-checklist#ci-and-cd) -- [ ] Tests -- [ ] Test Coverage -- [ ] Test Coverage 100% -- [ ] Linters +- [x] Tests +- [x] Test Coverage +- [x] Test Coverage 100% +- [x] Linters - [ ] Build - [ ] Deploy - [ ] New User Greetings \ No newline at end of file diff --git a/Makefile b/Makefile index 37abfc6..67e62a3 100755 --- a/Makefile +++ b/Makefile @@ -1,26 +1,19 @@ make test: python manage.py test -test-proximity: - python manage.py test analysis.tests.tests_proximity - server: python manage.py runserver coverage: coverage run --source='.' manage.py test - coverage html --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,analysis/management/* - coverage report --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,analysis/management/* --fail-under=100 - + coverage html --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,*/management/* + coverage report --omit=laboratory/asgi.py,laboratory/wsgi.py,manage.py,*/management/* --fail-under=100 migrate: python manage.py migrate -compare_two: - python manage.py compare_two "$(one)" "$(two)" - -example_frequency_analysis: - python manage.py example_frequency_analysis "$(example)" +pylint: + pylint $(shell git ls-files '*.py') -load_training_data: - python manage.py load_training_data $(name) $(filepath) $(sheet_name) +lint: + make pylint diff --git a/README.md b/README.md index 4cb3786..d4171f6 100644 --- a/README.md +++ b/README.md @@ -53,32 +53,6 @@ make coverage make lint ``` -## Use find_similar core function - -Instead of: -```python -from find_similar import find_similar # You will get import error in this case -``` - -Use: -```python -from django.conf import settings -settings.FIND_SIMILAR('none', ['one', 'two']) -settings.TOKENIZE('some text') -``` - -Or if you don't like UPPER_CASE: -```python -from django.conf import settings -find_similar = settings.FIND_SIMILAR -tokenize = settings.TOKENIZE - -find_similar('none', ['one', 'two']) -tokenize('some text') -``` - -Looks weird, please make pull request if you find a better way - ## Management commands ### Get tokens from one text @@ -128,11 +102,6 @@ Done: End ``` -With make: -```commandline -make one="one" two="two" compare_two -``` - ### Example frequency analysis Input: @@ -149,11 +118,6 @@ Done: End ``` -With make: -```commandline -make example="mock" example_frequency_analysis -``` - ### Load training data Input: @@ -170,11 +134,6 @@ TrainingData object (None) End ``` -With make: -```commandline -make load_traning_data name=2x2 filepath=analysis/tests/data/2x2.xlsx sheet_name=0 -``` - ## FAQ Empty yet \ No newline at end of file diff --git a/analysis/admin.py b/analysis/admin.py index 0476320..ddac361 100644 --- a/analysis/admin.py +++ b/analysis/admin.py @@ -7,4 +7,4 @@ # Register your models here. admin.site.register(TextToken) -admin.site.register(Token) \ No newline at end of file +admin.site.register(Token) diff --git a/analysis/forms.py b/analysis/forms.py index 3e09156..b30254e 100644 --- a/analysis/forms.py +++ b/analysis/forms.py @@ -2,7 +2,6 @@ Forms """ from django import forms -from django_find_similar.forms import FindSimilarForm class OneTextForm(forms.Form): @@ -26,13 +25,13 @@ class TwoTextForm(forms.Form): })) -class LoadTrainingDataForm(forms.Form): - name = forms.CharField(max_length=128, widget=forms.TextInput(attrs={ - 'class': 'form-control' - })) - excel_file = forms.FileField(max_length=128, widget=forms.FileInput(attrs={ - 'class': 'form-control' - })) - sheet_name = forms.IntegerField(required=False, initial=0, widget=forms.NumberInput(attrs={ - 'class': 'form-control' - })) +# class LoadTrainingDataForm(forms.Form): +# name = forms.CharField(max_length=128, widget=forms.TextInput(attrs={ +# 'class': 'form-control' +# })) +# excel_file = forms.FileField(max_length=128, widget=forms.FileInput(attrs={ +# 'class': 'form-control' +# })) +# sheet_name = forms.IntegerField(required=False, initial=0, widget=forms.NumberInput(attrs={ +# 'class': 'form-control' +# })) diff --git a/analysis/functions.py b/analysis/functions.py index 964bdef..a839611 100644 --- a/analysis/functions.py +++ b/analysis/functions.py @@ -1,64 +1,17 @@ """ Analysis functions """ -from django.conf import settings -from .loaders import load_from_excel -from .models import TrainingData, to_list +from find_similar.tokenize import tokenize # pylint: disable=import-error +from find_similar.calc_functions import calc_cosine_similarity_opt # pylint: disable=import-error +from utils.decorators import Printer -class Printer: - """ - This class decorator save results to some place (default print its) - """ - - def __init__(self, title=None, printer=print): - """ - Init - :title: callback with title -> title() - :printer: print function (default print) - """ - self.title = title - self.printer = printer - - def __call__(self, func): - """ - Make decorator - :func: decorated function - """ - def inner(*args, **kwargs): - """ - New function - """ - printer = kwargs.get('printer', self.printer) - - if 'printer' in kwargs: - is_delete_printer = True - if 'is_pass_printer' in kwargs: - if kwargs['is_pass_printer']: - is_delete_printer = False - del kwargs['is_pass_printer'] - - if is_delete_printer: - del kwargs['printer'] - - printer('Start') - if self.title is not None: - printer(self.title(*args, **kwargs)) - result = func(*args, **kwargs) - printer('Done:') - printer(result) - printer('End') - return result - - return inner - - -@Printer(title=lambda item, **kwargs: f'Get tokens for {item}...') -def analyze_one_item(item, dictionary=None, language="russian"): +@Printer(title=lambda text, **kwargs: f'Get tokens for {text}...') +def analyze_one_item(text, language="english", remove_stopwords=True): """ Analyze one item for tokenize """ - tokens = settings.TOKENIZE(item, language=language, dictionary=dictionary) + tokens = tokenize(text, language=language, remove_stopwords=remove_stopwords) return tokens @@ -69,59 +22,5 @@ def analyze_two_items(one, two, printer=print): """ one_tokens = analyze_one_item(one, printer=printer) # pylint: disable=unexpected-keyword-arg two_tokens = analyze_one_item(two, printer=printer) # pylint: disable=unexpected-keyword-arg - cos = settings.CALC_COSINE_SIMILARITY_OPT(one_tokens, two_tokens) + cos = calc_cosine_similarity_opt(one_tokens, two_tokens) return cos - - -@Printer(title=lambda example, **kwargs: f'Analyze "{example}"...') -def example_frequency_analysis(example): - """ - Example Frequency analysis - :example: Example name - """ - result = settings.FREQUENCY_ANALYSIS(example) - return result - - -@Printer(title=lambda name, filepath, sheet_name=0, **kwargs: f'Loading data from "{filepath}"...') -def load_training_data(name, filepath, sheet_name=0): - dataframe = load_from_excel(filepath, sheet_name) - # TrainingData - training_data = TrainingData.objects.create(name=name, data=dataframe.to_json()) - return training_data - - -@Printer(title=lambda text, dataframe, find_similar, **kwargs: f'Find similar for "{text}" in "{dataframe}"...') -def find_similar_dataframe(text, dataframe, find_similar, **kwargs): - texts = to_list(dataframe) - return find_similar(text, texts, **kwargs) - - -def total_rating(to_search, match_list, find_similar): - results = {} - all_list = [] - for line in match_list: - all_list += line - - for search in to_search: - similars = find_similar(search, all_list) - print('search', search, 'similars', similars) - for line in match_list: - if search in line: - print('line', line) - line_count = len(line) - print('SEARCH', search) - print('similars', similars) - similars = similars[:line_count] - print('short similars', similars) - similars = [item['name'] for item in similars] - find_count = 0 - for item in line: - if item in similars: - find_count += 1 - result = f'{find_count}/{line_count}' - print('result', result) - results[search] = result - - return results - diff --git a/analysis/migrations/0004_delete_trainingdata.py b/analysis/migrations/0004_delete_trainingdata.py new file mode 100644 index 0000000..d7e5b9e --- /dev/null +++ b/analysis/migrations/0004_delete_trainingdata.py @@ -0,0 +1,16 @@ +# Generated by Django 4.2.6 on 2023-11-09 12:47 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('analysis', '0003_trainingdata_create_trainingdata_update'), + ] + + operations = [ + migrations.DeleteModel( + name='TrainingData', + ), + ] diff --git a/analysis/models.py b/analysis/models.py index ebfcef2..91bcbf8 100644 --- a/analysis/models.py +++ b/analysis/models.py @@ -1,34 +1,40 @@ -""" -Analisys models -""" -from io import StringIO - -import pandas as pd -from django.db import models - - -class TrainingData(models.Model): - name = models.CharField(max_length=128, unique=True) - data = models.JSONField() - create = models.DateTimeField(auto_now_add=True) - update = models.DateTimeField(auto_now=True) - - def get_dataframe(self) -> pd.DataFrame: - return pd.read_json(StringIO(self.data), dtype=str) - - @property - def columns_count(self): - return len(self.get_dataframe().columns) - - @property - def rows_count(self): - return len(self.get_dataframe().index) - - -def to_list(dataframe: pd.DataFrame) -> list: - result = [] - columns = dataframe.columns - for column in columns: - data_list = dataframe[column].values.tolist() - result += data_list - return result +# """ +# Analisys models +# """ +# from io import StringIO +# +# import pandas as pd +# from django.db import models +# from django.utils.functional import cached_property +# +# +# class TrainingData(models.Model): +# name = models.CharField(max_length=128, unique=True) +# data = models.JSONField() +# create = models.DateTimeField(auto_now_add=True) +# update = models.DateTimeField(auto_now=True) +# +# @cached_property +# def get_dataframe(self) -> pd.DataFrame: +# return pd.read_json(StringIO(self.data), dtype=str) +# +# @property +# def columns_count(self): +# return len(self.get_dataframe.columns) +# +# @property +# def rows_count(self): +# return len(self.get_dataframe.index) +# +# def display_dataframe(self): +# dataframe = self.get_dataframe +# return dataframe.head(10) +# +# +# def to_list(dataframe: pd.DataFrame) -> list: +# result = [] +# columns = dataframe.columns +# for column in columns: +# data_list = dataframe[column].values.tolist() +# result += data_list +# return result diff --git a/analysis/proximity.py b/analysis/proximity.py deleted file mode 100644 index 900726a..0000000 --- a/analysis/proximity.py +++ /dev/null @@ -1,46 +0,0 @@ -""" -Proximity calculation module -""" -import numpy as np - - -def calc_proximity(known_data, find_similar): - """ - Main calc proximity function - :param known_data: Known data as matrix - :param find_similar: Function to find similars - """ - result = np.zeros_like(known_data) - - with np.nditer(result, op_flags=['readwrite']) as it: - for x in it: - x[...] = 1.0 - - result = result.astype('float64') - return [result] - - -def array_to_row_list(array): - all_values = np.copy(array).reshape(1, array.size).tolist()[0] - return all_values - - -def calc_value_position(known_data, find_similar, value): - # all_values = np.copy(known_data).reshape(1, known_data.size).tolist()[0] - all_values = array_to_row_list(known_data) - result = find_similar(value, all_values) - order_result = [item.text for item in result] - return order_result.index(value) - - -def calc_values_position(known_data, find_similar, values): - return [ - calc_value_position(known_data, find_similar, value) - for value in values - ] - - -# def calc_positions_for_column(known_data, find_similar, column): -# values = array_to_row_list(known_data[:, 0]) -# print('VALUES', values) -# return calc_values_position(known_data, find_similar, values) \ No newline at end of file diff --git a/analysis/templates/analysis/find_similar.html b/analysis/templates/analysis/find_similar.html index 9f91789..0d98e1b 100644 --- a/analysis/templates/analysis/find_similar.html +++ b/analysis/templates/analysis/find_similar.html @@ -9,7 +9,7 @@

{{object.name}}

{% endblock %} {% block results %} - {% with object.get_dataframe as data %} + {% with object.display_dataframe as data %} {% for column in data.columns %} diff --git a/analysis/templates/analysis/tokenize.html b/analysis/templates/analysis/tokenize.html index 10dc9b7..787b04b 100644 --- a/analysis/templates/analysis/tokenize.html +++ b/analysis/templates/analysis/tokenize.html @@ -1,6 +1,10 @@ {% extends "base.html" %} {% block main %} - +

Tokenize

+ Tokenize all TextTokens in the database +
+

+ {% csrf_token %} {{form.as_p}} diff --git a/analysis/templates/analysis/training_data.html b/analysis/templates/analysis/training_data.html deleted file mode 100644 index 73a14f8..0000000 --- a/analysis/templates/analysis/training_data.html +++ /dev/null @@ -1,33 +0,0 @@ -{% extends "base.html" %} -{% block main %} -

{{object.name}}

-

-Total rating -One column rating -Find similar -Tokenize -Delete -{% endblock %} -{% block results %} - {% with object.get_dataframe as data %} -
- - {% for column in data.columns %} - - {% endfor %} - - {% for index, row in data.iterrows %} - - {% for cell in row %} - - {% endfor %} - - {% endfor %} -
- {{column}} -
- {{cell}} -
- {% endwith %} - -{% endblock %} diff --git a/analysis/templates/analysis/training_data_list.html b/analysis/templates/analysis/training_data_list.html deleted file mode 100644 index 17e6e13..0000000 --- a/analysis/templates/analysis/training_data_list.html +++ /dev/null @@ -1,44 +0,0 @@ -{% extends "base.html" %} -{% block main %} -

Traning data list

- Clear all training data -{% endblock %} -{% block results %} - - - - - - - - - - - {% for object in object_list %} - - - - - - - - - - {% endfor %} -
NameColumns countRows countUpdatedDetailDelete
- {{object.name}} - - {{object.columns_count}} - - {{object.rows_count}} - - {{object.update}} - - Detail - - Delete -
-

-New - -{% endblock %} diff --git a/analysis/tests/test_urls.py b/analysis/tests/test_urls.py index b3ad948..3f2f260 100644 --- a/analysis/tests/test_urls.py +++ b/analysis/tests/test_urls.py @@ -6,8 +6,6 @@ from django_find_similar.models import CheckResult, TextToken from mixer.backend.django import mixer -from analysis.tests.data import get_2x2_training_data - class TestUrlsSimpleTestCase(SimpleTestCase): """ @@ -28,18 +26,14 @@ def test_reverse(self): 'url': 'compare_two', 'reverse': 'compare-two/', }, - { - 'url': 'example_frequency', - 'reverse': 'example-frequency/', - }, - { - 'url': 'load_training_data', - 'reverse': 'load-training-data/', - }, - { - 'url': 'training_data_list', - 'reverse': 'training-data-list/', - }, + # { + # 'url': 'load_training_data', + # 'reverse': 'load-training-data/', + # }, + # { + # 'url': 'training_data_list', + # 'reverse': 'training-data-list/', + # }, { 'url': 'result_list', 'reverse': 'result-list/', @@ -48,10 +42,10 @@ def test_reverse(self): 'url': 'text_token_list', 'reverse': 'text-token-list/', }, - { - 'url': 'clear_training_data', - 'reverse': 'clear-training-data/', - }, + # { + # 'url': 'clear_training_data', + # 'reverse': 'clear-training-data/', + # }, { 'url': 'clear_text_token', 'reverse': 'clear-text-token/', @@ -60,6 +54,10 @@ def test_reverse(self): 'url': 'tokenize', 'reverse': 'tokenize/', }, + { + 'url': 'find_similar', + 'reverse': 'find-similar/', + }, ] for url in urls: app_url = f'{app_name}:{url["url"]}' @@ -78,34 +76,11 @@ def test_reverse(self): """ Test correct reverse """ - - training_data = get_2x2_training_data() check_result = mixer.blend(CheckResult) text_token = mixer.blend(TextToken) app_name = 'analysis' urls = [ - { - 'url': 'training_data', - 'kwargs': { - 'pk': training_data.pk - }, - 'reverse': f'training-data/{training_data.pk}/', - }, - { - 'url': 'delete_training_data', - 'kwargs': { - 'pk': training_data.pk - }, - 'reverse': f'delete-training-data/{training_data.pk}/', - }, - { - 'url': 'find_similar', - 'kwargs': { - 'pk': training_data.pk - }, - 'reverse': f'find-similar/{training_data.pk}/', - }, { 'url': 'result', 'kwargs': { diff --git a/analysis/tests/tests_forms.py b/analysis/tests/tests_forms.py index d466632..d39d8b8 100644 --- a/analysis/tests/tests_forms.py +++ b/analysis/tests/tests_forms.py @@ -7,7 +7,6 @@ from analysis.forms import ( OneTextForm, TwoTextForm, - LoadTrainingDataForm, ) @@ -55,27 +54,3 @@ def test_fields(self): current_form = TwoTextForm() self.assertTrueForm(current_form, true_form) - - -class LoadTrainingDataFormSimpleTestCase(SimpleTestCase): - """ - Load traning data test - """ - - def test_fields(self): - """ - Test available fields - """ - true_form = TrueForm( - fields=Fields( - count=3, - types={ - 'name': forms.CharField, - 'excel_file': forms.FileField, - 'sheet_name': forms.IntegerField, - } - ) - ) - - current_form = LoadTrainingDataForm() - self.assertTrueForm(current_form, true_form) diff --git a/analysis/tests/tests_functions.py b/analysis/tests/tests_functions.py index d401d31..e3e97db 100644 --- a/analysis/tests/tests_functions.py +++ b/analysis/tests/tests_functions.py @@ -1,19 +1,11 @@ """ Tests for Analysis functions """ -from django.test import SimpleTestCase, TestCase - +from django.test import SimpleTestCase from analysis.functions import ( - Printer, analyze_one_item, analyze_two_items, - example_frequency_analysis, - total_rating, - load_training_data, - find_similar_dataframe, ) -from analysis.tests.data import get_2x2_filepath, get_2x2_expected_data, get_2x2_training_data, Token -from analysis.models import TrainingData class TestingPrinter: @@ -30,6 +22,7 @@ def __init__(self): def __call__(self, text, *args, **kwargs): self.results.append(str(text)) + class FunctionsSimpleTestCase(SimpleTestCase): """ Class for test all functions @@ -49,87 +42,6 @@ def mock_printer(*args, **kwargs): # pylint: disable=unused-argument self.testing_printer = TestingPrinter() - def test_printer_function_without_printer(self): - """ - Test printer when function hasn't got params - """ - @Printer(printer=self.testing_printer) - def some_func(): - """ - Do something usefull - """ - - result = some_func() - expected_prints = [ - 'Start', - 'Done:', - f'{result}', - 'End' - ] - self.assertEqual(self.testing_printer.results, expected_prints) - - def test_printer_function_with_printer_kwargs(self): - """ - Test printer when send printer dirrectly in function - """ - @Printer() - def some_func(printer=print): # pylint: disable=unused-argument - """ - Do something usefull - """ - - result = some_func(printer=self.testing_printer) - expected_prints = [ - 'Start', - 'Done:', - f'{result}', - 'End' - ] - self.assertEqual(self.testing_printer.results, expected_prints) - - def test_printer_simple_title(self): - """ - Test printer then we sent simple str title - """ - simple_title = 'Simple title' - - @Printer(title=lambda **kwargs: simple_title, printer=self.testing_printer) - def some_func(): - """ - Do something usefull - """ - - result = some_func() - expected_prints = [ - 'Start', - simple_title, - 'Done:', - f'{result}', - 'End' - ] - self.assertEqual(self.testing_printer.results, expected_prints) - - def test_printer_param_title(self): - """ - Test wen we sent title and function has a param - """ - @Printer(title=lambda param, **kwargs: f'Title {param}') - def some_func(param, printer=print): # pylint: disable=unused-argument - """ - Do something usefull - """ - - - result = some_func('A', printer=self.testing_printer) - expected_prints = [ - 'Start', - 'Title A', - 'Done:', - f'{result}', - 'End' - ] - self.assertEqual(self.testing_printer.results, expected_prints) - def test_analyze_one_item(self): """ Test for analyze one item @@ -141,7 +53,6 @@ def test_analyze_one_item(self): expected_tokens = {self.one, self.two} self.assertEqual(tokens, expected_tokens) - def test_analyze_two_items(self): """ Test for analyze_two_items @@ -187,110 +98,3 @@ def test_analyze_two_items(self): 'End', ] self.assertEqual(self.testing_printer.results, expected_prints) - - def test_example_frequency_analysis(self): - """ - Test for example_frequency_analysis - """ - example_name = 'mock' - expected_result = (('mock', 2), - ('example', 2), - ('for', 2), - ('tests', 2), - ('this', 1), - ('is', 1)) - self.assertEqual(example_frequency_analysis( # pylint: disable=unexpected-keyword-arg - example_name, - printer=self.testing_printer - ), expected_result) - - def test_use_match_list(self): - match_list = [ - [ - 'one', 'uno', 'one or uno', - ], - [ - 'two', 'dos', 'two or dos' - ] - ] - - def find_similar_mock( # pylint: disable=too-many-arguments - text_to_check, - texts, - language="russian", - count=5, - dictionary=None, - remove_stopwords=True, - keywords=None, - ): - return [ - {'name': 'one', 'cos': 1.0}, - {'name': 'two', 'cos': 1.0}, - {'name': 'uno', 'cos': 0.9}, - {'name': 'one or uno', 'cos': 0.5}, - {'name': 'dos', 'cos': 0.0}, - {'name': 'two or dos', 'cos': 0.0}, - ] - - to_search = ['one', 'two'] - results = total_rating(to_search, match_list, find_similar_mock) - excepted_results = { - 'one': '2/3', - 'two': '1/3', - } - self.assertEqual(results, excepted_results) - - -class FunctionsTestCase(TestCase): - - def setUp(self): - self.testing_printer = TestingPrinter() - - def test_load_testing_data(self): - filepath = get_2x2_filepath() - expected = get_2x2_expected_data() - result = load_training_data('first', filepath, sheet_name=0, printer=self.testing_printer) - self.assertTrue(isinstance(result, TrainingData)) - self.assertTrue(expected.equals(result.get_dataframe())) - - # prints - expected_prints = [ - 'Start', - f'Loading data from "{filepath}"...', - 'Done:', - str(result), - 'End', - ] - self.assertEqual(self.testing_printer.results, expected_prints) - - def test_find_similar_dataframe(self): - sorted_result = [ - Token(text='2', cos=1.0), - Token(text='1', cos=0.5), - Token(text='3', cos=0.5), - Token(text='4', cos=0.0), - ] - - def find_similar_2x2(text, texts): - return sorted_result - - training_data = get_2x2_training_data() - text = '2' - dataframe = training_data.get_dataframe() - similars = find_similar_dataframe( - text, - training_data.get_dataframe(), - find_similar_2x2, - printer=self.testing_printer - ) - self.assertEqual(len(similars), 4) - - # prints - expected_prints = [ - 'Start', - f'Find similar for "{text}" in "{dataframe}"...', - 'Done:', - str(similars), - 'End', - ] - self.assertEqual(self.testing_printer.results, expected_prints) diff --git a/analysis/tests/tests_proximity.py b/analysis/tests/tests_proximity.py deleted file mode 100644 index 82e01d3..0000000 --- a/analysis/tests/tests_proximity.py +++ /dev/null @@ -1,124 +0,0 @@ -""" -Test for proximity calculation module -""" -from dataclasses import dataclass -import numpy as np -from django.test import SimpleTestCase -from django.conf import settings -from analysis.proximity import ( - calc_proximity, - calc_value_position, - calc_values_position, - # calc_positions_for_column, - array_to_row_list, -) -from analysis.tests.data import Token - - -class NumpyTestCase(SimpleTestCase): - - def test_shape(self): - np1x2 = np.matrix([1, 2]) - self.assertEqual(np1x2.shape, (1, 2)) - - def test_to_list_one_row(self): - np2x2 = np.matrix([[1, 2], [3, 4]]) - self.assertEqual(np2x2.shape, (2, 2)) - self.assertEqual(np2x2.size, 4) - one_row_list = np2x2.reshape(1, np2x2.size).tolist()[0] - self.assertEqual(one_row_list, [1, 2, 3, 4]) - - def test_get_column(self): - np2x2 = np.matrix([[1, 2], [3, 4]]) - zero_column = np2x2[:, 0] - self.assertEqual(array_to_row_list(zero_column),[1, 3]) - - - -class ProximitySimpleTestCase(SimpleTestCase): - """ - Test proximity class - """ - def setUp(self): - self.a = 'a' - self.b = 'b' - self.known_data_1x1 = np.matrix([self.a], dtype=str) - self.known_data_2x1 = np.matrix([self.a, self.b], dtype=str) - self.known_data_1x2 = np.matrix([[self.a], [self.b]], dtype=str) - - # @dataclass - # class Token: - # text: str - # cos: float - - def find_similar_1x1(text, texts): - return [ - Token(text=self.a, cos=1.0) - ] - - def find_similar_2x1(text, texts): - return [ - Token(text=self.a, cos=1.0), - Token(text=self.b, cos=0.0), - ] - - - self.find_similar_1x1 = find_similar_1x1 - self.find_similar_2x1 = find_similar_2x1 - # self.find_similar_1x1 = settings.FIND_SIMILAR - - - def test_calc_value_position_1x1(self): - proximity = calc_value_position(self.known_data_1x1, find_similar=self.find_similar_1x1, value=self.a) - # 0 - position of sorted element - self.assertEqual(proximity, 0) - - def test_calc_value_position_2x1(self): - # a - proximity = calc_value_position(self.known_data_2x1, find_similar=self.find_similar_2x1, value=self.a) - # 0 - position of sorted element - self.assertEqual(proximity, 0) - # b - proximity = calc_value_position(self.known_data_2x1, find_similar=self.find_similar_2x1, value=self.b) - # 0 - position of sorted element - self.assertEqual(proximity, 1) - # 1x2 - proximity = calc_value_position(self.known_data_1x2, find_similar=self.find_similar_2x1, value=self.b) - # No matter size of matrix - self.assertEqual(proximity, 1) - - def test_calc_values_position(self): - proximities = calc_values_position(self.known_data_1x1, find_similar=self.find_similar_1x1, values=[self.a]) - self.assertEqual(proximities, [0]) - proximities = calc_values_position(self.known_data_2x1, find_similar=self.find_similar_2x1, values=[self.a]) - self.assertEqual(proximities, [0]) - proximities = calc_values_position(self.known_data_2x1, find_similar=self.find_similar_2x1, values=[self.a, self.b]) - self.assertEqual(proximities, [0, 1]) - proximities = calc_values_position(self.known_data_2x1, find_similar=self.find_similar_2x1, - values=[self.b, self.a]) - self.assertEqual(proximities, [1, 0]) - - # def test_calc_position_for_column(self): - # positions = calc_positions_for_column(self.known_data_1x1, find_similar=self.find_similar_1x1, column=0) - # self.assertEqual(positions, [0]) - # positions = calc_positions_for_column(self.known_data_2x1, find_similar=self.find_similar_2x1, column=0) - # self.assertEqual(positions, [1, 0]) - - # def test_calc_values_position(self): - # proximities = calc_positions_for_column(self.known_data_1x1, find_similar=self.find_similar_1x1, column=1) - - - def test_1x1(self): - """ - test 1 x 1 matrix - """ - proximity = calc_proximity(self.known_data_1x1, find_similar=self.find_similar_1x1) - self.assertTrue(np.array_equal(proximity, [np.matrix([1])])) - - # def test_1x2(self): - # known_data = np.matrix(['a', 'b']) - # proximity = calc_proximity(known_data, find_similar=lambda x: None) - # result = np.matrix([1, 1]) - # - # self.assertEqual(result.shape, proximity.shape) - # self.assertTrue(np.array_equal(proximity, result)) diff --git a/analysis/tests/tests_views.py b/analysis/tests/tests_views.py index 56c2bcd..75c3779 100644 --- a/analysis/tests/tests_views.py +++ b/analysis/tests/tests_views.py @@ -1,9 +1,10 @@ """ Tests for views """ -from django.core.files.uploadedfile import SimpleUploadedFile +from mixer.backend.django import mixer from django.urls import reverse from django_find_similar.models import CheckResult, TextToken, Token +from django_find_similar.forms import FindSimilarForm, FindSimilarParamsForm from dry_tests import ( TestCase, SimpleTestCase, @@ -13,11 +14,7 @@ Context, POST, ) -from django_find_similar.forms import FindSimilarForm, FindSimilarParamsForm -from mixer.backend.django import mixer -from analysis.forms import OneTextForm, TwoTextForm, LoadTrainingDataForm -from analysis.models import TrainingData -from analysis.tests.data import get_2x2_filepath, get_2x2_training_data +from analysis.forms import TwoTextForm from analysis.urls import app_name @@ -58,7 +55,7 @@ def test_get(self): true_response = TrueResponse( status_code=200, context=Context( - types={'form': OneTextForm} + types={'form': FindSimilarForm} ), content_values=FORM_CONTENT_VALUES ) @@ -94,7 +91,9 @@ def test_post(self): Test Post """ data = { - 'text': self.text + 'text': self.text, + 'language': 'english', + 'remove_stopwords': True, } request = Request( url=self.url, @@ -184,301 +183,183 @@ def test_post(self): self.assertTrueResponse(current_response, true_response) -class TestExampleFrequencyView(SimpleTestCase): - """ - Test Example Frequency View - """ - - def setUp(self): - """ - SetUp Test Data - """ - self.text = 'mock' - self.url = reverse('analysis:example_frequency') - self.result = (('mock', 2), ('example', 2), - ('for', 2), ('tests', 2), ('this', 1), ('is', 1)) - expected_url_params = [] - for key, value in self.result: - expected_url_params.append(f'{key}={value}') - self.expected_url_params = f'?text={self.text}&{"&".join(expected_url_params)}' - self.redirect_url=f'{self.url}{self.expected_url_params}' - - def test_get(self): - """ - Test get - """ - request = Request( - url=self.url - ) - true_response = TrueResponse( - status_code=200, - context=Context( - keys=['form'], - types={'form': OneTextForm}, - ), - content_values=FORM_CONTENT_VALUES - ) - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - request = Request( - url=self.redirect_url - ) - - content_values = [self.text] - for key, value in self.result: - content_values.append(key) - content_values.append(value) - - true_response = TrueResponse( - status_code=200, - context=Context( - items={ - 'text': self.text, - 'result': self.result, - } - ), - content_values=content_values - ) - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - # Error - request = Request( - url=f'{self.url}?text={self.text}&error=some error' - ) - - true_response = TrueResponse( - status_code=200, - context=Context( - items={ - 'text': self.text, - 'error': 'some error', - } - ), - content_values=[ - 'Some Error' - ] - ) - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - def test_post(self): - """ - Test post - """ - data = { - 'text': self.text - } - request = Request( - url=self.url, - method=POST, - data=data, - ) - - true_response = TrueResponse( - status_code=302, - redirect_url=self.redirect_url - ) - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - def test_post_error_example(self): - """ - Test post with error example - """ - data = { - 'text': 'unknown example value' - } - request = Request( - url=self.url, - method=POST, - data=data, - ) - - true_response = TrueResponse( - status_code=302, - redirect_url=f'{self.url}?text=unknown example value&error=example not found' - ) - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - -class LoadTrainingDataViewTestCase(TestCase): - - def setUp(self): - self.url = reverse('analysis:load_training_data') - - def test_get(self): - request = Request( - url=self.url, - ) - true_response = TrueResponse( - status_code=200, - context=Context( - keys=['form'], - types={ - 'form': LoadTrainingDataForm - }, - ), - content_values=[ - ContentValue( - value='', - count=1, - ), - ContentValue( - value='', - count=1, - ), - ], - ) - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - def test_post(self): - filepath = get_2x2_filepath() - excel_file = SimpleUploadedFile(filepath, open(filepath, 'rb').read()) - name = 'first' - data = { - 'name': name, - 'excel_file': excel_file, - 'sheet_name': 0, - } - request = Request( - url=self.url, - method=POST, - data=data, - ) - true_response = TrueResponse( - status_code=302, - ) - - self.assertFalse(TrainingData.objects.filter(name=name).exists()) - - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - # true model has been created - - self.assertTrue(TrainingData.objects.filter(name=name).exists()) - - training_data = TrainingData.objects.get(name=name) - redirect_url = reverse('analysis:training_data', kwargs={'pk': training_data.pk}) - true_response = TrueResponse( - redirect_url=redirect_url, - ) - self.assertTrueResponse(current_response, true_response) - - -class TrainingDataDetailViewTestCase(TestCase): - - def setUp(self): - self.training_data = get_2x2_training_data() - self.url = reverse('analysis:training_data', kwargs={'pk': self.training_data.pk}) - - def test_get(self): - request = Request( - url=self.url, - ) - - content_values = [ - self.training_data.name, - ] - - dataframe = self.training_data.get_dataframe() - - # add headers - columns = dataframe.columns - for column in columns: - content_values.append(column) - data_list = dataframe[column].values.tolist() - content_values += data_list - - true_response = TrueResponse( - status_code=200, - context=Context( - keys=['object'], - items={ - 'object': self.training_data - } - ), - content_values=content_values - ) - - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - -class TrainingDataListViewTestCase(TestCase): - - def setUp(self): - self.url = reverse('analysis:training_data_list') - self.training_data_list = [get_2x2_training_data('first'), get_2x2_training_data('second')] - - def test_get(self): - request = Request( - url=self.url - ) - true_response = TrueResponse( - status_code=200, - context=Context( - keys=['object_list'], - ), - content_values=[item.name for item in self.training_data_list] - ) - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - self.assertQuerySetEqual(current_response.context['object_list'], self.training_data_list, ordered=False) - - -class TrainingDataDeleteView(TestCase): - - def setUp(self): - self.training_data = get_2x2_training_data() - self.url = reverse('analysis:delete_training_data', kwargs={'pk': self.training_data.pk}) - - def test_get(self): - request = Request( - url=self.url, - ) - - content_values = [ - self.training_data.name, - ] - - true_response = TrueResponse( - status_code=200, - context=Context( - keys=['object'], - items={ - 'object': self.training_data - } - ), - content_values=content_values - ) - - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - def test_post(self): - request = Request( - url=self.url, - method=POST, - ) - - true_response = TrueResponse( - status_code=302, - redirect_url=reverse('analysis:training_data_list') - ) - - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) +# class LoadTrainingDataViewTestCase(TestCase): +# +# def setUp(self): +# self.url = reverse('analysis:load_training_data') +# +# def test_get(self): +# request = Request( +# url=self.url, +# ) +# true_response = TrueResponse( +# status_code=200, +# context=Context( +# keys=['form'], +# types={ +# 'form': LoadTrainingDataForm +# }, +# ), +# content_values=[ +# ContentValue( +# value='
', +# count=1, +# ), +# ContentValue( +# value='
', +# count=1, +# ), +# ], +# ) +# current_response = request.get_response(self.client) +# self.assertTrueResponse(current_response, true_response) +# +# def test_post(self): +# filepath = get_2x2_filepath() +# excel_file = SimpleUploadedFile(filepath, open(filepath, 'rb').read()) +# name = 'first' +# data = { +# 'name': name, +# 'excel_file': excel_file, +# 'sheet_name': 0, +# } +# request = Request( +# url=self.url, +# method=POST, +# data=data, +# ) +# true_response = TrueResponse( +# status_code=302, +# ) +# +# self.assertFalse(TrainingData.objects.filter(name=name).exists()) +# +# current_response = request.get_response(self.client) +# self.assertTrueResponse(current_response, true_response) +# # true model has been created +# +# self.assertTrue(TrainingData.objects.filter(name=name).exists()) +# +# training_data = TrainingData.objects.get(name=name) +# redirect_url = reverse('analysis:training_data', kwargs={'pk': training_data.pk}) +# true_response = TrueResponse( +# redirect_url=redirect_url, +# ) +# self.assertTrueResponse(current_response, true_response) +# +# +# class TrainingDataDetailViewTestCase(TestCase): +# +# def setUp(self): +# self.training_data = get_2x2_training_data() +# self.url = reverse('analysis:training_data', kwargs={'pk': self.training_data.pk}) +# +# def test_get(self): +# request = Request( +# url=self.url, +# ) +# +# content_values = [ +# self.training_data.name, +# ] +# +# dataframe = self.training_data.get_dataframe +# +# # add headers +# columns = dataframe.columns +# for column in columns: +# content_values.append(column) +# data_list = dataframe[column].values.tolist() +# content_values += data_list +# +# true_response = TrueResponse( +# status_code=200, +# context=Context( +# keys=['object'], +# items={ +# 'object': self.training_data +# } +# ), +# content_values=content_values +# ) +# +# current_response = request.get_response(self.client) +# self.assertTrueResponse(current_response, true_response) +# +# +# class TrainingDataListViewTestCase(TestCase): +# +# def setUp(self): +# self.url = reverse('analysis:training_data_list') +# self.training_data_list = [get_2x2_training_data('first'), +# get_2x2_training_data('second')] +# +# def test_get(self): +# request = Request( +# url=self.url +# ) +# true_response = TrueResponse( +# status_code=200, +# context=Context( +# keys=['object_list'], +# ), +# content_values=[item.name for item in self.training_data_list] +# ) +# current_response = request.get_response(self.client) +# self.assertTrueResponse(current_response, true_response) +# self.assertQuerySetEqual(current_response.context['object_list'], +# self.training_data_list, ordered=False) +# +# +# class TrainingDataDeleteView(TestCase): +# +# def setUp(self): +# self.training_data = get_2x2_training_data() +# self.url = reverse('analysis:delete_training_data', kwargs={'pk': self.training_data.pk}) +# +# def test_get(self): +# request = Request( +# url=self.url, +# ) +# +# content_values = [ +# self.training_data.name, +# ] +# +# true_response = TrueResponse( +# status_code=200, +# context=Context( +# keys=['object'], +# items={ +# 'object': self.training_data +# } +# ), +# content_values=content_values +# ) +# +# current_response = request.get_response(self.client) +# self.assertTrueResponse(current_response, true_response) +# +# def test_post(self): +# request = Request( +# url=self.url, +# method=POST, +# ) +# +# true_response = TrueResponse( +# status_code=302, +# redirect_url=reverse('analysis:training_data_list') +# ) +# +# current_response = request.get_response(self.client) +# self.assertTrueResponse(current_response, true_response) class FindSimilarViewTestCase(TestCase): def setUp(self): - self.training_data = get_2x2_training_data() - self.url = reverse('analysis:find_similar', kwargs={'pk': self.training_data.pk}) + # self.training_data = get_2x2_training_data() + self.url = reverse('analysis:find_similar') def test_get(self): request = Request( @@ -487,10 +368,10 @@ def test_get(self): true_response = TrueResponse( status_code=200, context=Context( - keys=['object', 'form'], - items={ - 'object': self.training_data, - }, + keys=['form'], + # items={ + # 'object': self.training_data, + # }, types={ 'form': FindSimilarForm } @@ -515,7 +396,7 @@ def test_post(self): true_response = TrueResponse( status_code=302, - redirect_url=f'/analysis/result-list/' + redirect_url='/analysis/result-list/' ) current_response = request.get_response(self.client) @@ -613,44 +494,44 @@ def test_get(self): self.assertTrueResponse(current_response, true_response) -class ClearTrainingData(TestCase): - - def setUp(self): - self.url = reverse('analysis:clear_training_data') - - def test_get(self): - request = Request( - url=self.url, - ) - - true_response = TrueResponse( - status_code=200, - content_values=FORM_CONTENT_VALUES - ) - - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - def test_post(self): - request = Request( - url=self.url, - method=POST, - ) - - true_response = TrueResponse( - status_code=302, - redirect_url='/analysis/training-data-list/' - ) - - # db state before - mixer.cycle(2).blend(TrainingData, data={}) - self.assertTrue(TrainingData.objects.all().exists()) - - current_response = request.get_response(self.client) - self.assertTrueResponse(current_response, true_response) - - # db state after - self.assertFalse(TrainingData.objects.all().exists()) +# class ClearTrainingData(TestCase): +# +# def setUp(self): +# self.url = reverse('analysis:clear_training_data') +# +# def test_get(self): +# request = Request( +# url=self.url, +# ) +# +# true_response = TrueResponse( +# status_code=200, +# content_values=FORM_CONTENT_VALUES +# ) +# +# current_response = request.get_response(self.client) +# self.assertTrueResponse(current_response, true_response) +# +# def test_post(self): +# request = Request( +# url=self.url, +# method=POST, +# ) +# +# true_response = TrueResponse( +# status_code=302, +# redirect_url='/analysis/training-data-list/' +# ) +# +# # db state before +# mixer.cycle(2).blend(TrainingData, data={}) +# self.assertTrue(TrainingData.objects.all().exists()) +# +# current_response = request.get_response(self.client) +# self.assertTrueResponse(current_response, true_response) +# +# # db state after +# self.assertFalse(TrainingData.objects.all().exists()) class ClearTextToken(TestCase): @@ -710,7 +591,7 @@ def test_get(self): 'form': FindSimilarParamsForm, } ), - content_values=FORM_CONTENT_VALUES + content_values=['form'] ) current_response = request.get_response(self.client) @@ -731,17 +612,19 @@ def test_post(self): true_response = TrueResponse( status_code=302, - redirect_url='/analysis/training-data-list/', + redirect_url='/analysis/text-token-list/', ) - self.training_data = get_2x2_training_data() + # self.training_data = get_2x2_training_data() # db before - self.assertFalse(TextToken.objects.all().exists()) + # self.assertFalse(TextToken.objects.all().exists()) + mixer.blend(TextToken) + self.assertFalse(Token.objects.all().exists()) current_response = request.get_response(self.client) self.assertTrueResponse(current_response, true_response) # db after - self.assertTrue(TextToken.objects.all().exists()) - self.assertTrue(Token.objects.all().exists()) \ No newline at end of file + # self.assertTrue(TextToken.objects.all().exists()) + self.assertTrue(Token.objects.all().exists()) diff --git a/analysis/urls.py b/analysis/urls.py index ee77f91..ebcef85 100644 --- a/analysis/urls.py +++ b/analysis/urls.py @@ -9,17 +9,11 @@ urlpatterns = [ path('tokenize-one/', views.TokenizeOneView.as_view(), name="tokenize_one"), path('compare-two/', views.CompareTwoView.as_view(), name="compare_two"), - path('example-frequency/', views.ExampleFrequencyAnalysis.as_view(), name="example_frequency"), - path('load-training-data/', views.LoadTrainingDataView.as_view(), name="load_training_data"), - path('training-data//', views.TrainingDataDetailView.as_view(), name="training_data"), - path('find-similar//', views.FindSimilarFormView.as_view(), name="find_similar"), - path('delete-training-data//', views.TrainingDataDeleteView.as_view(), name="delete_training_data"), - path('training-data-list/', views.TrainingDataListView.as_view(), name="training_data_list"), + path('find-similar/', views.FindSimilarFormView.as_view(), name="find_similar"), path('result-list/', views.ResultListView.as_view(), name="result_list"), path('result//', views.ResultDetailView.as_view(), name="result"), path('text-token-list/', views.TextTokenListView.as_view(), name="text_token_list"), path('text-token//', views.TextTokenDetailView.as_view(), name="text_token"), - path('clear-training-data/', views.clear_training_data, name="clear_training_data"), path('clear-text-token/', views.clear_text_token, name="clear_text_token"), path('tokenize/', views.TokenizeView.as_view(), name="tokenize"), ] diff --git a/analysis/views.py b/analysis/views.py index e3f34b9..6e3064a 100644 --- a/analysis/views.py +++ b/analysis/views.py @@ -1,42 +1,40 @@ """ Analysis views """ -import os - from django.http import HttpResponseRedirect -from django.shortcuts import get_object_or_404, render -from django.views.generic import FormView, DetailView, ListView, DeleteView +from django.shortcuts import render +from django.views.generic import FormView, DetailView, ListView from django.urls import reverse, reverse_lazy -from django.conf import settings from django_find_similar.forms import FindSimilarForm, FindSimilarParamsForm -from django_find_similar.models import TextToken, TokenTextAdapter, CheckResult -from find_similar import find_similar - +from django_find_similar.models import ( + TextToken, + TokenTextAdapter, + CheckResult, + Token, + CheckResultItem, +) +from find_similar import find_similar # pylint: disable=import-error +from find_similar.tokenize import tokenize # pylint: disable=import-error from analysis.functions import ( analyze_one_item, analyze_two_items, - example_frequency_analysis, - load_training_data, ) from .forms import ( - OneTextForm, TwoTextForm, - LoadTrainingDataForm, ) -from .models import TrainingData, to_list class TokenizeOneView(FormView): """ For get tokens from one text """ - form_class = OneTextForm + form_class = FindSimilarForm template_name = 'analysis/tokenize_one.html' def form_valid(self, form): text = form.cleaned_data['text'] self.text = text - self.tokens = analyze_one_item(text) + self.tokens = analyze_one_item(**form.cleaned_data) return super().form_valid(form) def get_context_data(self, **kwargs): @@ -89,89 +87,46 @@ def get_context_data(self, **kwargs): return context -class ExampleFrequencyAnalysis(FormView): - """ - Example Frequency Analysis - """ - form_class = OneTextForm - template_name = 'analysis/example_frequency.html' - - def form_valid(self, form): - self.text = form.cleaned_data['text'] - try: - self.result = example_frequency_analysis(self.text) - self.error = None - except FileNotFoundError: - self.error = 'example not found' - return super().form_valid(form) - - def get_context_data(self, **kwargs): - context = super().get_context_data() - data = self.request.GET.dict() - text = data.pop('text', '') - context['text'] = text - error = data.get('error', None) - if error: - context['error'] = error - else: - result = [] - for key, value in data.items(): - result.append((key, int(value))) - context['result'] = tuple(result) - return context - - def get_success_url(self): - if self.error: - url = f'{reverse("analysis:example_frequency")}?text={self.text}&error={self.error}' - else: - url_params = [] - for key, value in self.result: - url_params.append(f'{key}={value}') - url_params = f'?text={self.text}&{"&".join(url_params)}' - url = f'{reverse("analysis:example_frequency")}{url_params}' - return url - - -class LoadTrainingDataView(FormView): - form_class = LoadTrainingDataForm - template_name = 'analysis/load_data.html' - - def handle_uploaded_file(self, f): - uploaded_path = os.path.join(settings.BASE_DIR, 'uploads', 'loaddata.xlsx') - with open(uploaded_path, 'wb+') as destination: - for chunk in f.chunks(): - destination.write(chunk) - return uploaded_path - - def form_valid(self, form): - data = form.cleaned_data - excel_file = form.cleaned_data['excel_file'] - uploaded_path = self.handle_uploaded_file(excel_file) - name = data['name'] - sheet_name = data.get('sheet_name', 0) - print('SHEET_NAME', sheet_name) - self.training_data = load_training_data(name=name, filepath=uploaded_path, sheet_name=sheet_name) - return super().form_valid(form) - - def get_success_url(self): - return reverse('analysis:training_data', kwargs={'pk': self.training_data.pk}) - - -class TrainingDataDetailView(DetailView): - model = TrainingData - template_name = 'analysis/training_data.html' - - -class TrainingDataListView(ListView): - model = TrainingData - template_name = 'analysis/training_data_list.html' - ordering = '-update' - - -class TrainingDataDeleteView(DeleteView): - model = TrainingData - template_name = 'analysis/training_data_delete_confirm.html' - success_url = reverse_lazy('analysis:training_data_list') +# class LoadTrainingDataView(FormView): +# form_class = LoadTrainingDataForm +# template_name = 'analysis/load_data.html' +# +# def handle_uploaded_file(self, f): +# uploaded_path = os.path.join(settings.BASE_DIR, 'uploads', 'loaddata.xlsx') +# with open(uploaded_path, 'wb+') as destination: +# for chunk in f.chunks(): +# destination.write(chunk) +# return uploaded_path +# +# def form_valid(self, form): +# data = form.cleaned_data +# excel_file = form.cleaned_data['excel_file'] +# uploaded_path = self.handle_uploaded_file(excel_file) +# name = data['name'] +# sheet_name = data.get('sheet_name', 0) +# self.training_data = load_training_data(name=name, +# filepath=uploaded_path, sheet_name=sheet_name) +# return super().form_valid(form) +# +# def get_success_url(self): +# return reverse('analysis:training_data', kwargs={'pk': self.training_data.pk}) + + +# class TrainingDataDetailView(DetailView): +# model = TrainingData +# template_name = 'analysis/training_data.html' +# +# +# class TrainingDataListView(ListView): +# model = TrainingData +# template_name = 'analysis/training_data_list.html' +# ordering = '-update' + + +# class TrainingDataDeleteView(DeleteView): +# model = TrainingData +# template_name = 'analysis/training_data_delete_confirm.html' +# success_url = reverse_lazy('analysis:training_data_list') class FindSimilarFormView(FormView): @@ -179,16 +134,6 @@ class FindSimilarFormView(FormView): template_name = 'analysis/find_similar.html' success_url = reverse_lazy('analysis:result_list') - def dispatch(self, request, *args, **kwargs): - pk = kwargs['pk'] - self.object = get_object_or_404(TrainingData, pk=pk) - return super().dispatch(request, *args, **kwargs) - - def get_context_data(self, **kwargs): - context = super().get_context_data(**kwargs) - context['object'] = self.object - return context - def form_valid(self, form): # Get cleaned data from FindSimilarForm data = form.cleaned_data @@ -208,23 +153,23 @@ def form_valid(self, form): # save all data from dataset to TextToken # self.object - data_list = to_list(self.object.get_dataframe()) - - new_token_texts = [] - for item in data_list: - item_text_token = TextToken( - text=item, - language=language, - remove_stopwords=remove_stopwords - ) - new_token_texts.append(item_text_token) - - TextToken.objects.bulk_create(new_token_texts, ignore_conflicts=True) + # data_list = to_list(self.object.get_dataframe) + # + # new_token_texts = [] + # for item in data_list: + # item_text_token = TextToken( + # text=item, + # language=language, + # remove_stopwords=remove_stopwords + # ) + # new_token_texts.append(item_text_token) + # + # TextToken.objects.bulk_create(new_token_texts, ignore_conflicts=True) # Adapt TextToken adapters = [TokenTextAdapter(item) for item in TextToken.objects.all()] # use find_similar - result = find_similar(adapter, adapters, count=len(data_list)) + result = find_similar(adapter, adapters, count=len(adapters)) # save results to the database CheckResult.save_result(text_token, result) @@ -246,6 +191,10 @@ class TextTokenListView(ListView): model = TextToken template_name = 'analysis/text_token_list.html' ordering = ['-create'] + paginate_by = 3000 + + def get_queryset(self): + return TextToken.objects.prefetch_related('token_set').all() class TextTokenDetailView(DetailView): @@ -253,45 +202,66 @@ class TextTokenDetailView(DetailView): template_name = 'analysis/text_token.html' -def clear_training_data(request): - if request.method == 'POST': - TrainingData.objects.all().delete() - return HttpResponseRedirect(reverse('analysis:training_data_list')) - return render(request, 'analysis/clear_data.html', context={'model_name': 'Training Data'}) +# def clear_training_data(request): +# if request.method == 'POST': +# TrainingData.objects.all().delete() +# # CheckResultItem.objects.all().delete() +# # Token.objects.all().delete() +# # CheckResult.objects.all().delete() +# # TextToken.objects.all().delete() +# return HttpResponseRedirect(reverse('analysis:training_data_list')) +# return render(request, 'analysis/clear_data.html', context={'model_name': 'Training Data'}) def clear_text_token(request): if request.method == 'POST': + CheckResultItem.objects.all().delete() + Token.objects.all().delete() + CheckResult.objects.all().delete() TextToken.objects.all().delete() return HttpResponseRedirect(reverse('analysis:text_token_list')) - return render(request, 'analysis/clear_data.html', context={'model_name': 'Text Tokens'}) + return render(request, 'core/clear_data.html', context={'model_name': 'Text Tokens'}) class TokenizeView(FormView): form_class = FindSimilarParamsForm template_name = 'analysis/tokenize.html' - success_url = reverse_lazy('analysis:training_data_list') + success_url = reverse_lazy('analysis:text_token_list') def form_valid(self, form): - cleaned_data = form.cleaned_data - language = cleaned_data['language'] - remove_stopwords = cleaned_data['remove_stopwords'] + # cleaned_data = form.cleaned_data + # language = cleaned_data['language'] + # remove_stopwords = cleaned_data['remove_stopwords'] # Make all training data (In a future we shout get just one) - training_data_list = TrainingData.objects.all() - for training_data in training_data_list: - data_list = to_list(training_data.get_dataframe()) - - new_token_texts = [] - for item in data_list: - item_text_token = TextToken( - text=item, - language=language, - remove_stopwords=remove_stopwords - ) - new_token_texts.append(item_text_token) - TextToken.objects.bulk_create(new_token_texts, ignore_conflicts=True) - - for text_token in TextToken.objects.all(): - text_token.create_tokens() - - return super().form_valid(form) \ No newline at end of file + # training_data_list = TrainingData.objects.all() + # all_token_texts = [] + # for training_data in training_data_list: + # data_list = to_list(training_data.get_dataframe) + # + # for item in data_list: + # all_token_texts.append(TextToken( + # text=item, + # language=language, + # remove_stopwords=remove_stopwords + # )) + # + # TextToken.objects.bulk_create(all_token_texts, ignore_conflicts=True) + + all_token_texts = TextToken.objects.all() + + all_tokens = [] + # for text_token in TextToken.objects.all(): + for text_token in all_token_texts: + # text_token.create_tokens() + token_set = tokenize( + text_token.text, + language=text_token.language, + remove_stopwords=text_token.remove_stopwords + ) + + for text_str in token_set: + all_tokens.append(Token(value=text_str, token_text=text_token)) + + Token.objects.bulk_create(all_tokens, ignore_conflicts=True) + # profiler.disable() + return super().form_valid(form) diff --git a/core/core_functions.py b/core/core_functions.py new file mode 100644 index 0000000..4b2b9a0 --- /dev/null +++ b/core/core_functions.py @@ -0,0 +1,134 @@ +""" +Core functions to analyze find_similar proximity +""" +import numpy as np +from find_similar import TokenText, find_similar # pylint: disable=import-error + +from utils.decorators import Printer +from .loaders import load_from_excel +from .models import TrainingData + + +def to_matrix(data: list) -> np.matrix: + """ + Convert data list to the Matrix + :param data: data in list of lists + :return: Matrix + """ + return np.matrix(data) + + +def str_to_token_text(text: str, language='english', remove_stopwords=True) -> TokenText: + """ + Create TokenText from text str + :param text: some str text + :return: TokenText with tokens + """ + # if text is None: + # return + return TokenText(text, language=language, remove_stopwords=remove_stopwords) + + +tokenize_vector = np.vectorize(str_to_token_text, excluded=['language', 'remove_stopwords']) + + +def matrix_to_one_line(matrix: np.matrix) -> np.ndarray: + line = np.array(matrix).reshape(-1, ) + # line = line[line != np.array(None)] + return line + + +def matrix_to_list(matrix: np.matrix) -> list: + """ + Create list from matrix + :param matrix: matrix with data + :return: list of all matrix values + """ + + return list(matrix_to_one_line(matrix)) + + +# def find_similar_or_none( +# text_to_check, +# texts, +# language="english", +# count=5, +# dictionary=None, +# keywords=None): +# if text_to_check is None: +# return +# return find_similar(text_to_check, texts, language, count, dictionary, keywords) + + +find_similar_vector = np.vectorize(find_similar, otypes=[TokenText], excluded=[ + 'texts', + 'language', + 'count', + 'dictionary', + 'keywords' + ] + ) + + +def reshape_results(results: list, shape: dict) -> np.matrix: + arr = np.array(results, dtype=TokenText) + arr = arr.reshape(shape) + matrix = np.asmatrix(arr) + return matrix + + +reshape_results_vector = np.vectorize(reshape_results, otypes=[TokenText], excluded=['shape']) + + +def get_matrix_head(matrix: np.matrix, count: int = 1): + return matrix[:count] + + +# get_matrix_head_vector = np.vectorize(get_matrix_head, excluded=['count']) +def calc_similar_count(expected_results, real_results): + expected_line = matrix_to_one_line(expected_results) + results_line = matrix_to_one_line(real_results) + intersection = np.in1d(expected_line, results_line) + return np.count_nonzero(intersection) # intersection == True + + +def calc_percent(similar_count, column_count): + # cc - 100 + # sc - x + # x = sc * 100 / cc + return (similar_count - 1) * 100 / (column_count - 1) + + +def compare( + results_matrix: np.matrix, + training_data_matrix: np.matrix, + count: int = 1 + ) -> np.matrix: + result = np.empty(training_data_matrix.shape, dtype=np.float16) + row_count, col_count = training_data_matrix.shape + for i in range(row_count): + + expected_results = training_data_matrix[i, :] + for j in range(col_count): + results: np.matrix = results_matrix[i, j] + head_results = get_matrix_head(results, count) + similar_count = calc_similar_count(expected_results, head_results) + percent = calc_percent(similar_count, col_count) + result[i, j] = percent + return np.asmatrix(result) + + +def calculate_total_rating(percent_results: np.matrix): + return percent_results.mean() + + +@Printer(title=lambda name, filepath, sheet_name=0, **kwargs: f'Loading data from "{filepath}"...') +def load_training_data(name, filepath, sheet_name=0): + dataframe = load_from_excel(filepath, sheet_name) + + # remove Null values + dataframe = dataframe.dropna() + + # TrainingData + training_data = TrainingData.objects.create(name=name, data=dataframe.to_json()) + return training_data diff --git a/core/forms.py b/core/forms.py new file mode 100644 index 0000000..92e9d18 --- /dev/null +++ b/core/forms.py @@ -0,0 +1,22 @@ +from django import forms +from django_find_similar.forms import FindSimilarParamsForm + + +class LoadTrainingDataForm(forms.Form): + name = forms.CharField(max_length=128, widget=forms.TextInput(attrs={ + 'class': 'form-control' + })) + excel_file = forms.FileField(max_length=128, widget=forms.FileInput(attrs={ + 'class': 'form-control' + })) + sheet_name = forms.IntegerField(required=False, initial=0, widget=forms.NumberInput(attrs={ + 'class': 'form-control' + })) + + +PRECISION_HELP_TEXT = ('If precision = 1 then we check for full similarity. ' + 'If precision = 2 then we search similarities in the first and second rows.' + '= 3 in the first, second and third rows ...') + +class TotalRatingForm(FindSimilarParamsForm): + precision = forms.IntegerField(help_text=PRECISION_HELP_TEXT, initial=1) diff --git a/analysis/loaders.py b/core/loaders.py similarity index 100% rename from analysis/loaders.py rename to core/loaders.py diff --git a/core/management/__init__.py b/core/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/management/commands/__init__.py b/core/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/analysis/management/commands/load_training_data.py b/core/management/commands/load_training_data.py similarity index 79% rename from analysis/management/commands/load_training_data.py rename to core/management/commands/load_training_data.py index e7062e8..843e205 100644 --- a/analysis/management/commands/load_training_data.py +++ b/core/management/commands/load_training_data.py @@ -2,7 +2,7 @@ Command to get tokens from one text """ from django.core.management.base import BaseCommand -from analysis.functions import load_training_data +from core.core_functions import load_training_data class Command(BaseCommand): @@ -22,7 +22,12 @@ def add_arguments(self, parser): """ parser.add_argument("name", type=str) parser.add_argument("filepath", type=str) - parser.add_argument("sheet_name", type=int, nargs='?', default=0) # TODO: make available to send str name + parser.add_argument( + "sheet_name", + type=int, + nargs='?', + default=0 + ) # TODO: make available to send str name def handle(self, *args, **options): """ diff --git a/core/migrations/0001_initial.py b/core/migrations/0001_initial.py new file mode 100644 index 0000000..5773a82 --- /dev/null +++ b/core/migrations/0001_initial.py @@ -0,0 +1,24 @@ +# Generated by Django 4.2.6 on 2023-11-09 12:47 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ] + + operations = [ + migrations.CreateModel( + name='TrainingData', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=128, unique=True)), + ('data', models.JSONField()), + ('create', models.DateTimeField(auto_now_add=True)), + ('update', models.DateTimeField(auto_now=True)), + ], + ), + ] diff --git a/core/migrations/0002_trainingdata_rating_data_trainingdata_total_rating.py b/core/migrations/0002_trainingdata_rating_data_trainingdata_total_rating.py new file mode 100644 index 0000000..d2d4e99 --- /dev/null +++ b/core/migrations/0002_trainingdata_rating_data_trainingdata_total_rating.py @@ -0,0 +1,23 @@ +# Generated by Django 4.2.6 on 2023-11-09 14:48 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0001_initial'), + ] + + operations = [ + migrations.AddField( + model_name='trainingdata', + name='rating_data', + field=models.JSONField(blank=True, null=True), + ), + migrations.AddField( + model_name='trainingdata', + name='total_rating', + field=models.FloatField(blank=True, null=True), + ), + ] diff --git a/core/models.py b/core/models.py index e69de29..bf9c8e9 100644 --- a/core/models.py +++ b/core/models.py @@ -0,0 +1,45 @@ +""" +Core models +""" +from io import StringIO + +import pandas as pd +from django.db import models +from django.utils.functional import cached_property + + +class TrainingData(models.Model): + name = models.CharField(max_length=128, unique=True) + data = models.JSONField() + total_rating = models.FloatField(blank=True, null=True) + rating_data = models.JSONField(blank=True, null=True) + create = models.DateTimeField(auto_now_add=True) + update = models.DateTimeField(auto_now=True) + + @cached_property + def get_dataframe(self) -> pd.DataFrame: + return pd.read_json(StringIO(self.data), dtype=str) + + @cached_property + def get_rating_data(self) -> pd.DataFrame: + if self.rating_data: + return pd.read_json(StringIO(self.rating_data), dtype=str) + return None + + @property + def columns_count(self): + return len(self.get_dataframe.columns) + + @property + def rows_count(self): + return len(self.get_dataframe.index) + + def display_dataframe(self): + dataframe = self.get_dataframe + return dataframe.head(10) + + def display_rating_data(self): + dataframe = self.get_rating_data + if dataframe is None: + return None + return dataframe.head(10) diff --git a/analysis/templates/analysis/clear_data.html b/core/templates/core/clear_data.html similarity index 100% rename from analysis/templates/analysis/clear_data.html rename to core/templates/core/clear_data.html diff --git a/core/templates/core/index.html b/core/templates/core/index.html index e155035..6fc4887 100644 --- a/core/templates/core/index.html +++ b/core/templates/core/index.html @@ -1,7 +1,142 @@ {% extends "base.html" %} {% block main %} -

Still just main Page...

+
+
+
+
+

Proximity Calculation

+ +
+
+
+

+
+ + + + +
+ +
+ +
+ +
+
+
+ {% endblock %} {% block results %} - ... +
+
+
+
+

Proximity results

+ +
+
+ +
+
+

Text results

+ +
+
+
+
{% endblock %} diff --git a/analysis/templates/analysis/load_data.html b/core/templates/core/load_data.html similarity index 100% rename from analysis/templates/analysis/load_data.html rename to core/templates/core/load_data.html diff --git a/core/templates/core/rating.html b/core/templates/core/rating.html new file mode 100644 index 0000000..fddf7c2 --- /dev/null +++ b/core/templates/core/rating.html @@ -0,0 +1,60 @@ +{% extends "base.html" %} +{% block main %} +

{{object.name}}

+

+ + + + + + + + + + + + + + + + +
NameColumns countRows countLast Total RatingLast Update
+ {{object.name}} + + {{object.columns_count}} + + {{object.rows_count}} + + {{object.total_rating}} % + + {{object.update}} +
+Total rating +Delete + +{% endblock %} +{% block results %} +

Top 10 results display

+ + {% with object.display_rating_data as data %} + + + {% for column in data.columns %} + + {% endfor %} + + {% for index, row in data.iterrows %} + + {% for cell in row %} + + {% endfor %} + + {% endfor %} +
+ {{column}} +
+ {{cell}} +
+ {% endwith %} + +{% endblock %} diff --git a/core/templates/core/total_rating_form.html b/core/templates/core/total_rating_form.html new file mode 100644 index 0000000..8709d31 --- /dev/null +++ b/core/templates/core/total_rating_form.html @@ -0,0 +1,10 @@ +{% extends "base.html" %} +{% block main %} +
+ {% csrf_token %} + {{form.as_p}} + +
+{% endblock %} +{% block results %} +{% endblock %} diff --git a/core/templates/core/training_data.html b/core/templates/core/training_data.html new file mode 100644 index 0000000..8b87cdb --- /dev/null +++ b/core/templates/core/training_data.html @@ -0,0 +1,60 @@ +{% extends "base.html" %} +{% block main %} +

{{object.name}}

+

+ + + + + + + + + + + + + + + + +
NameColumns countRows countLast Total RatingLast Update
+ {{object.name}} + + {{object.columns_count}} + + {{object.rows_count}} + + + {{object.total_rating}} % + + + {{object.update}} +
+Total rating +Delete + +{% endblock %} +{% block results %} + {% with object.display_dataframe as data %} + + + {% for column in data.columns %} + + {% endfor %} + + {% for index, row in data.iterrows %} + + {% for cell in row %} + + {% endfor %} + + {% endfor %} +
+ {{column}} +
+ {{cell}} +
+ {% endwith %} + +{% endblock %} diff --git a/analysis/templates/analysis/training_data_delete_confirm.html b/core/templates/core/training_data_delete_confirm.html similarity index 100% rename from analysis/templates/analysis/training_data_delete_confirm.html rename to core/templates/core/training_data_delete_confirm.html diff --git a/core/templates/core/training_data_list.html b/core/templates/core/training_data_list.html new file mode 100644 index 0000000..66db196 --- /dev/null +++ b/core/templates/core/training_data_list.html @@ -0,0 +1,51 @@ +{% extends "base.html" %} +{% block main %} +

Traning data list

+ Clear all training data +{% endblock %} +{% block results %} + + + + + + + + + + + + {% for object in object_list %} + + + + + + + + + + + {% endfor %} +
NameColumns countRows countLast Total RatingUpdatedDetailDelete
+ {{object.name}} + + {{object.columns_count}} + + {{object.rows_count}} + + + {{object.total_rating}} % + + Total rating + + {{object.update}} + + Detail + + Delete +
+

+New + +{% endblock %} diff --git a/analysis/tests/data/2x2.xlsx b/core/tests/data/2x2.xlsx similarity index 100% rename from analysis/tests/data/2x2.xlsx rename to core/tests/data/2x2.xlsx diff --git a/analysis/tests/data/__init__.py b/core/tests/data/__init__.py similarity index 80% rename from analysis/tests/data/__init__.py rename to core/tests/data/__init__.py index 3ec01da..0d33720 100644 --- a/analysis/tests/data/__init__.py +++ b/core/tests/data/__init__.py @@ -2,11 +2,11 @@ import os import pandas as pd from django.conf import settings -from analysis.functions import load_training_data +from core.core_functions import load_training_data def get_2x2_filepath(): - filepath = os.path.join(settings.BASE_DIR, 'analysis', 'tests', 'data', '2x2.xlsx') + filepath = os.path.join(settings.BASE_DIR, 'core', 'tests', 'data', '2x2.xlsx') return filepath diff --git a/core/tests/test_urls.py b/core/tests/test_urls.py index 059604e..7b78d1c 100644 --- a/core/tests/test_urls.py +++ b/core/tests/test_urls.py @@ -1,9 +1,11 @@ """ Test urls module """ -from django.test import SimpleTestCase +from django.test import SimpleTestCase, TestCase from django.urls import reverse +from core.tests.data import get_2x2_training_data + class TestUrls(SimpleTestCase): """ @@ -20,12 +22,66 @@ def test_reverse(self): 'url': 'index', 'reverse': '' }, + { + 'url': 'load_training_data', + 'reverse': 'load-training-data/', + }, + { + 'url': 'training_data_list', + 'reverse': 'training-data-list/', + }, + { + 'url': 'clear_training_data', + 'reverse': 'clear-training-data/', + }, ] for url in urls: app_url = f'{app_name}:{url["url"]}' - print("waF") - print(app_url) current_reverse = reverse(app_url) - print(current_reverse) true_reverse = f'/{url["reverse"]}' self.assertEqual(current_reverse, true_reverse) + + +class TestUrlsTestCase(TestCase): + """ + Test Urls Class With DB + """ + + def test_reverse(self): + """ + Test correct reverse + """ + + training_data = get_2x2_training_data() + + app_name = 'core' + + urls = [ + { + 'url': 'training_data', + 'kwargs': { + 'pk': training_data.pk + }, + 'reverse': f'training-data/{training_data.pk}/', + }, + { + 'url': 'delete_training_data', + 'kwargs': { + 'pk': training_data.pk + }, + 'reverse': f'delete-training-data/{training_data.pk}/', + }, + { + 'url': 'rating', + 'kwargs': { + 'pk': training_data.pk + }, + 'reverse': f'rating/{training_data.pk}/', + }, + ] + for url in urls: + app_url = f'{app_name}:{url["url"]}' + current_reverse = reverse(app_url, kwargs=url['kwargs']) + true_reverse = f'/{url["reverse"]}' + with self.subTest(msg=app_url): + self.assertEqual(current_reverse, true_reverse) diff --git a/core/tests/tests_core_functions.py b/core/tests/tests_core_functions.py new file mode 100644 index 0000000..e0d0369 --- /dev/null +++ b/core/tests/tests_core_functions.py @@ -0,0 +1,358 @@ +import numpy as np +from django.test import SimpleTestCase, TestCase +from find_similar import TokenText # pylint: disable=import-error +from core.models import TrainingData +from core.tests.data import get_2x2_filepath, get_2x2_expected_data +from core.core_functions import ( + to_matrix, + str_to_token_text, + tokenize_vector, + matrix_to_list, + find_similar_vector, + reshape_results, + reshape_results_vector, + get_matrix_head, + compare, calculate_total_rating, load_training_data, +) + +def eq(self, other): + # if other is None: + # return False + return self.text == other.text + +# def lt(self, other): +# return self.cos < other.cos + +TokenText.__eq__ = eq +# TokenText.__lt__ = lt + + +class CoreFunctionsSimpleTestCase(SimpleTestCase): + + def setUp(self): + self.first_str = 'one two' + self.one_one = ['one two'] + self.one_two = [['one two', 'one']] + self.two_two = [ + ['one 1984', '1984'], + ['two 50', '50'], + ] + + self.not_exact = [ + ['1', '1 1'], + ['2', '3'], + ['4', '2 2'], + ] + + self.with_empty_values = [ + ['1', None, '1 1'], + ['2', '3', None], + ['4', '2 2', '4 4'], + ] + + def test_to_matrix(self): + params = [ + { + 'data': self.one_one, + 'shape': (1,1) + }, + { + 'data': self.one_two, + 'shape': (1, 2) + }, + { + 'data': self.two_two, + 'shape': (2, 2) + }, + # { + # 'data': self.with_empty_values, + # 'shape': (3, 3) + # } + ] + for param in params: + matrix = to_matrix(param['data']) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, param['shape']) + + def test_str_to_token_text(self): + token_text = str_to_token_text(self.first_str) + self.assertIsInstance(token_text, TokenText) + self.assertEqual(len(token_text.tokens), 2) + + # token_text = str_to_token_text(None) + # self.assertIsNone(token_text) + + def test_tokenize_matrix(self): + params = [ + { + 'data': self.one_one, + }, + { + 'data': self.one_two, + }, + { + 'data': self.two_two, + }, + # { + # 'data': self.with_empty_values, + # }, + ] + for param in params: + old = to_matrix(param['data']) + new = tokenize_vector(old) + self.assertIsInstance(new, np.matrix) + self.assertTrue(new.dtype, TokenText) + self.assertEqual(new.shape, old.shape) + self.assertEqual(new[0, 0].text, old[0, 0]) + + def test_matrix_to_list(self): + params = [ + { + 'data': self.one_one, + 'value': ['one two'] + }, + { + 'data': self.one_two, + 'value': ['one two', 'one'] + }, + { + 'data': self.two_two, + 'value': ['one 1984', '1984', 'two 50', '50'], + }, + # { + # 'data': self.with_empty_values, + # # 'value': ['1', None, '1 1', '2', '3', None, '4', '2 2', '4 4'], + # 'value': ['1', '1 1', '2', '3', '4', '2 2', '4 4'], + # }, + ] + for param in params: + old = to_matrix(param['data']) + new = matrix_to_list(old) + self.assertIsInstance(new, list) + x, y = old.shape + count = x * y + self.assertEqual(len(new), count) + self.assertEqual(new, param['value']) + + def test_find_similar_vector(self): + params = [ + { + 'data': self.one_one, + }, + { + 'data': self.one_two, + }, + { + 'data': self.two_two, + }, + # { + # 'data': self.with_empty_values, + # }, + ] + for param in params: + old = to_matrix(param['data']) + old = tokenize_vector(old) + texts = matrix_to_list(old) + new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) + + self.assertIsInstance(new, np.matrix) + self.assertIsInstance(new[0, 0], list) + self.assertEqual(new[0, 0][0].text, old[0, 0].text) + self.assertEqual(new.shape, old.shape) + + def test_reshape_results(self): + params = [ + # { + # 'data': self.one_one, + # 'expected': np.matrix( + # ['one two'] + # ), + # }, + { + 'data': self.one_two, + 'expected': np.matrix( + [['one two', 'one']] + ), + }, + { + 'data': self.two_two, + 'expected': np.matrix( + [ + ['one 1984', '1984'], + ['two 50', '50'], + ] + ), + }, + # { + # 'data': self.with_empty_values, + # 'expected': np.matrix( + # [ + # ['1', None, '1 1'], + # ['2', '3', None], + # ['4', '2 2', '4 4'], + # ] + # ), + # }, + ] + for param in params: + old = to_matrix(param['data']) + texts = matrix_to_list(old) + new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) + # first + results = new[0, 0] + matrix = reshape_results(results, old.shape) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, old.shape) + expected_matrix = tokenize_vector(param['expected']) + self.assertTrue(np.array_equal(matrix, expected_matrix)) + + # second + results = new[0, 1] + matrix = reshape_results(results, old.shape) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, old.shape) + expected_matrix = tokenize_vector(param['expected']) + self.assertFalse(np.array_equal(matrix, expected_matrix)) + + def test_reshape_results_vector(self): + params = [ + # { + # 'data': self.one_one, + # 'expected': np.matrix( + # ['one two'] + # ), + # }, + { + 'data': self.one_two, + 'expected': np.matrix( + [['one two', 'one']] + ), + }, + { + 'data': self.two_two, + 'expected': np.matrix( + [ + ['one 1984', '1984'], + ['two 50', '50'], + ] + ), + }, + ] + for param in params: + old = to_matrix(param['data']) + texts = matrix_to_list(old) + new = find_similar_vector(text_to_check=old, texts=texts, count=len(texts)) + new = reshape_results_vector(results=new, shape=new.shape) + # first + matrix = new[0, 0] + # matrix = reshape_results(results, old.shape) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, old.shape) + expected_matrix = tokenize_vector(param['expected']) + self.assertTrue(np.array_equal(matrix, expected_matrix)) + + # second + matrix = new[0, 1] + # matrix = reshape_results(results, old.shape) + self.assertIsInstance(matrix, np.matrix) + self.assertEqual(matrix.shape, old.shape) + expected_matrix = tokenize_vector(param['expected']) + self.assertFalse(np.array_equal(matrix, expected_matrix)) + + def test_get_matrix_head(self): + lines = 1 + old = to_matrix(self.one_one) + head = get_matrix_head(old, lines) + self.assertIsInstance(head, np.matrix) + self.assertTrue(np.array_equal(old, head)) + + old = to_matrix(self.two_two) + head = get_matrix_head(old, lines) + self.assertFalse(np.array_equal(old, head)) + self.assertEqual(head.shape, (1, 2)) + + old = to_matrix(self.two_two) + head = get_matrix_head(old, 2) + self.assertTrue(np.array_equal(old, head)) + + def test_compare(self): + training_data = to_matrix(self.two_two) + training_data = tokenize_vector(training_data) + texts = matrix_to_list(training_data) + similars = find_similar_vector(text_to_check=training_data, texts=texts, count=len(texts)) + results = reshape_results_vector(results=similars, shape=training_data.shape) + + report = compare(results, training_data, 1) + + self.assertIsInstance(report, np.matrix) + self.assertEqual(report.shape, training_data.shape) + self.assertEqual(report.shape, results.shape) + + self.assertEqual(report[(0, 0)], 100) + + total_rating = calculate_total_rating(report) + self.assertEqual(total_rating, 100) + + # Bad finding + training_data = to_matrix(self.not_exact) + training_data = tokenize_vector(training_data) + texts = matrix_to_list(training_data) + similars = find_similar_vector(text_to_check=training_data, texts=texts, count=len(texts)) + results = reshape_results_vector(results=similars, shape=training_data.shape) + + report = compare(results, training_data, 1) + self.assertEqual(report[(1, 1)], 0) + + total_rating = calculate_total_rating(report) + self.assertTrue(33 < total_rating < 34) + + # Here we can check several lines + report = compare(results, training_data, 2) + self.assertEqual(report[(1, 1)], 100) + self.assertEqual(report[(2, 1)], 0) + + total_rating = calculate_total_rating(report) + self.assertTrue(49 < total_rating < 51) + + report = compare(results, training_data, 3) + self.assertEqual(report[(2, 1)], 100) + + total_rating = calculate_total_rating(report) + self.assertEqual(total_rating, 100) + + +class TestingPrinter: + """ + Save prints to variable. To check the results + """ + + def __init__(self): + """ + Init printer + """ + self.results = [] + + def __call__(self, text, *args, **kwargs): + self.results.append(str(text)) + +class FunctionsTestCase(TestCase): + + def setUp(self): + self.testing_printer = TestingPrinter() + + def test_load_testing_data(self): + filepath = get_2x2_filepath() + expected = get_2x2_expected_data() + result = load_training_data('first', filepath, sheet_name=0, printer=self.testing_printer) + self.assertTrue(isinstance(result, TrainingData)) + self.assertTrue(expected.equals(result.get_dataframe)) + + # prints + expected_prints = [ + 'Start', + f'Loading data from "{filepath}"...', + 'Done:', + str(result), + 'End', + ] + self.assertEqual(self.testing_printer.results, expected_prints) diff --git a/core/tests/tests_forms.py b/core/tests/tests_forms.py new file mode 100644 index 0000000..8351fda --- /dev/null +++ b/core/tests/tests_forms.py @@ -0,0 +1,55 @@ +""" +Tests for forms +""" +from django import forms +from dry_tests.testcases import SimpleTestCase +from dry_tests.models import Fields, TrueForm +from core.forms import ( + LoadTrainingDataForm, + TotalRatingForm, +) + + +class LoadTrainingDataFormSimpleTestCase(SimpleTestCase): + """ + Load traning data test + """ + + def test_fields(self): + """ + Test available fields + """ + true_form = TrueForm( + fields=Fields( + count=3, + types={ + 'name': forms.CharField, + 'excel_file': forms.FileField, + 'sheet_name': forms.IntegerField, + } + ) + ) + + current_form = LoadTrainingDataForm() + self.assertTrueForm(current_form, true_form) + + +class TotalRatingFormTestCase(SimpleTestCase): + + def test_fields(self): + """ + Test available fields + """ + true_form = TrueForm( + fields=Fields( + count=3, + types={ + 'language': forms.CharField, + 'remove_stopwords': forms.BooleanField, + 'precision': forms.IntegerField, + } + ) + ) + + current_form = TotalRatingForm() + self.assertTrueForm(current_form, true_form) diff --git a/analysis/tests/tests_loaders.py b/core/tests/tests_loaders.py similarity index 79% rename from analysis/tests/tests_loaders.py rename to core/tests/tests_loaders.py index 6d3b10b..0daa191 100644 --- a/analysis/tests/tests_loaders.py +++ b/core/tests/tests_loaders.py @@ -1,12 +1,10 @@ """ Test load functions module """ -import os -from django.conf import settings import pandas as pd from django.test import SimpleTestCase -from analysis.loaders import load_from_excel -from analysis.tests.data import get_2x2_filepath, get_2x2_expected_data +from core.loaders import load_from_excel +from core.tests.data import get_2x2_filepath, get_2x2_expected_data class LoadersTestCase(SimpleTestCase): diff --git a/analysis/tests/tests_models.py b/core/tests/tests_models.py similarity index 56% rename from analysis/tests/tests_models.py rename to core/tests/tests_models.py index 21bab9e..3c499ad 100644 --- a/analysis/tests/tests_models.py +++ b/core/tests/tests_models.py @@ -1,7 +1,10 @@ import pandas as pd -from django.test import TestCase, SimpleTestCase -from analysis.models import TrainingData, to_list -from analysis.tests.data import get_2x2_expected_data +from django.test import TestCase +from core.models import ( + TrainingData, + # to_list, +) +from core.tests.data import get_2x2_expected_data class TrainingDataTestCase(TestCase): @@ -21,22 +24,10 @@ def test_save(self): self.assertTrue(self.dataframe.equals(get_data)) def test_data_from_json(self): - self.assertTrue(self.dataframe.equals(self.training_data.get_dataframe())) + self.assertTrue(self.dataframe.equals(self.training_data.get_dataframe)) def test_count(self): - self.assertEqual(len(self.training_data.get_dataframe().columns), 2) - self.assertEqual(len(self.training_data.get_dataframe().index), 2) + self.assertEqual(len(self.training_data.get_dataframe.columns), 2) + self.assertEqual(len(self.training_data.get_dataframe.index), 2) self.assertEqual(self.training_data.columns_count, 2) self.assertEqual(self.training_data.rows_count, 2) - - -class FunctionsSimpleTestCase(SimpleTestCase): - - def test_to_list(self): - dataframe = pd.DataFrame( - [ - [1, 2], - [3, 4], - ] - ) - self.assertEqual(to_list(dataframe), [1, 3, 2, 4]) diff --git a/core/tests/tests_views.py b/core/tests/tests_views.py index eb7ef2f..6f9f161 100644 --- a/core/tests/tests_views.py +++ b/core/tests/tests_views.py @@ -1,14 +1,34 @@ """ Tests for views """ +from mixer.backend.django import mixer +from django.core.files.uploadedfile import SimpleUploadedFile from django.urls import reverse from dry_tests import ( SimpleTestCase, + TestCase, Request, TrueResponse, + Context, + ContentValue, + POST, ) + +from core.tests.data import get_2x2_filepath, get_2x2_training_data +from core.forms import LoadTrainingDataForm, TotalRatingForm +from core.models import TrainingData from core.urls import app_name +FORM_CONTENT_VALUES = [ + ContentValue( + value='
', + count=1, + ), + ContentValue( + value='
', + count=1, + ), + ] class TestIndexView(SimpleTestCase): """ @@ -31,8 +51,323 @@ def test_view(self): true_response = TrueResponse( status_code=200, content_values=[ - '

Still just main Page...

' + 'Main' ] ) current_response = request.get_response(self.client) self.assertTrueResponse(current_response, true_response) + + +class LoadTrainingDataViewTestCase(TestCase): + + def setUp(self): + self.url = reverse('core:load_training_data') + + def test_get(self): + request = Request( + url=self.url, + ) + true_response = TrueResponse( + status_code=200, + context=Context( + keys=['form'], + types={ + 'form': LoadTrainingDataForm + }, + ), + content_values=[ + ContentValue( + value='
', + count=1, + ), + ContentValue( + value='
', + count=1, + ), + ], + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + def test_post(self): + filepath = get_2x2_filepath() + excel_file = SimpleUploadedFile(filepath, open(filepath, 'rb').read()) # pylint: disable=consider-using-with + name = 'first' + data = { + 'name': name, + 'excel_file': excel_file, + 'sheet_name': 0, + } + request = Request( + url=self.url, + method=POST, + data=data, + ) + true_response = TrueResponse( + status_code=302, + ) + + self.assertFalse(TrainingData.objects.filter(name=name).exists()) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + # true model has been created + + self.assertTrue(TrainingData.objects.filter(name=name).exists()) + + training_data = TrainingData.objects.get(name=name) + redirect_url = reverse('core:training_data', kwargs={'pk': training_data.pk}) + true_response = TrueResponse( + redirect_url=redirect_url, + ) + self.assertTrueResponse(current_response, true_response) + + +class TrainingDataDetailViewTestCase(TestCase): + + def setUp(self): + self.training_data = get_2x2_training_data() + self.url = reverse('core:training_data', kwargs={'pk': self.training_data.pk}) + + def test_get(self): + request = Request( + url=self.url, + ) + + content_values = [ + self.training_data.name, + ] + + dataframe = self.training_data.get_dataframe + + # add headers + columns = dataframe.columns + for column in columns: + content_values.append(column) + data_list = dataframe[column].values.tolist() + content_values += data_list + + true_response = TrueResponse( + status_code=200, + context=Context( + keys=['object'], + items={ + 'object': self.training_data + } + ), + content_values=content_values + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + +class TrainingDataListViewTestCase(TestCase): + + def setUp(self): + self.url = reverse('core:training_data_list') + self.training_data_list = [get_2x2_training_data('first'), get_2x2_training_data('second')] + + def test_get(self): + request = Request( + url=self.url + ) + true_response = TrueResponse( + status_code=200, + context=Context( + keys=['object_list'], + ), + content_values=[item.name for item in self.training_data_list] + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + self.assertQuerySetEqual( + current_response.context['object_list'], + self.training_data_list, + ordered=False + ) + + +class TrainingDataDeleteView(TestCase): + + def setUp(self): + self.training_data = get_2x2_training_data() + self.url = reverse('core:delete_training_data', kwargs={'pk': self.training_data.pk}) + + def test_get(self): + request = Request( + url=self.url, + ) + + content_values = [ + self.training_data.name, + ] + + true_response = TrueResponse( + status_code=200, + context=Context( + keys=['object'], + items={ + 'object': self.training_data + } + ), + content_values=content_values + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + def test_post(self): + request = Request( + url=self.url, + method=POST, + ) + + true_response = TrueResponse( + status_code=302, + redirect_url=reverse('core:training_data_list') + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + +class ClearTrainingData(TestCase): + + def setUp(self): + self.url = reverse('core:clear_training_data') + + def test_get(self): + request = Request( + url=self.url, + ) + + true_response = TrueResponse( + status_code=200, + content_values=FORM_CONTENT_VALUES + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + def test_post(self): + request = Request( + url=self.url, + method=POST, + ) + + true_response = TrueResponse( + status_code=302, + redirect_url='/training-data-list/' + ) + + # db state before + mixer.cycle(2).blend(TrainingData, data={}) + self.assertTrue(TrainingData.objects.all().exists()) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + # db state after + self.assertFalse(TrainingData.objects.all().exists()) + + +class RatingViewTestCase(TestCase): + + def setUp(self): + self.training_data = get_2x2_training_data() + + self.training_data.total_rating = 78.0 + self.training_data.save() + + self.url = reverse('core:rating', kwargs={'pk': self.training_data.pk}) + + def test_get(self): + request = Request( + url=self.url, + ) + + true_response = TrueResponse( + status_code=200, + context=Context( + items={ + 'object': self.training_data + }, + ), + content_values=[self.training_data.total_rating] + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + +class TotalRatingFormViewTestCase(TestCase): + + def setUp(self): + self.training_data: TrainingData = get_2x2_training_data() + self.url = reverse('core:total_rating_form', kwargs={'pk': self.training_data.pk}) + + def test_get(self): + request = Request( + url=self.url, + ) + + true_response = TrueResponse( + status_code=200, + context=Context( + types={ + 'form': TotalRatingForm, + } + ), + ) + + current_response = request.get_response(self.client) + + self.assertTrueResponse(current_response, true_response) + + def test_post(self): + data = { + 'language': 'english', + 'remove_stopwords': True, + 'precision': 1, + } + + request = Request( + url=self.url, + method=POST, + data=data, + ) + + true_response = TrueResponse( + status_code=302, + redirect_url=f'/rating/{self.training_data.pk}/' + ) + + # db before + self.assertIsNone(self.training_data.total_rating) + self.assertIsNone(self.training_data.rating_data) + + current_response = request.get_response(self.client) + + self.training_data.refresh_from_db() + # db after + self.assertIsNotNone(self.training_data.total_rating) + self.assertIsNotNone(self.training_data.rating_data) + self.assertEqual(self.training_data.total_rating, 50) + self.assertTrueResponse(current_response, true_response) + + data = { + 'language': 'english', + 'remove_stopwords': True, + 'precision': 2, + } + + request = Request( + url=self.url, + method=POST, + data=data, + ) + + request.get_response(self.client) + self.training_data.refresh_from_db() + self.assertEqual(self.training_data.total_rating, 100) diff --git a/core/urls.py b/core/urls.py index e133a86..74ab289 100644 --- a/core/urls.py +++ b/core/urls.py @@ -8,4 +8,19 @@ urlpatterns = [ path('', views.IndexView.as_view(), name="index"), + path('load-training-data/', views.LoadTrainingDataView.as_view(), name="load_training_data"), + path('training-data//', views.TrainingDataDetailView.as_view(), name="training_data"), + path( + 'delete-training-data//', + views.TrainingDataDeleteView.as_view(), + name="delete_training_data" + ), + path('training-data-list/', views.TrainingDataListView.as_view(), name="training_data_list"), + path('clear-training-data/', views.clear_training_data, name="clear_training_data"), + path( + 'total-rating-form//', + views.TotalRatingFormView.as_view(), + name="total_rating_form" + ), + path('rating//', views.RatingView.as_view(), name="rating"), ] diff --git a/core/views.py b/core/views.py index 76aa628..5b7e1c1 100644 --- a/core/views.py +++ b/core/views.py @@ -1,7 +1,32 @@ """ Core package views """ -from django.views.generic import TemplateView +import os +import numpy as np +import pandas as pd +from django.http import HttpResponseRedirect +from django.shortcuts import render, get_object_or_404 +from django.urls import reverse, reverse_lazy +from django.views.generic import ( + TemplateView, + ListView, + DeleteView, + DetailView, + FormView, +) +from django.conf import settings + +from core.core_functions import ( + load_training_data, + tokenize_vector, + matrix_to_list, + find_similar_vector, + reshape_results_vector, + compare, + calculate_total_rating, +) +from core.forms import LoadTrainingDataForm, TotalRatingForm +from core.models import TrainingData class IndexView(TemplateView): @@ -9,3 +34,113 @@ class IndexView(TemplateView): Main page view """ template_name = 'core/index.html' + + +class LoadTrainingDataView(FormView): + form_class = LoadTrainingDataForm + template_name = 'core/load_data.html' + + def handle_uploaded_file(self, f): + uploaded_path = os.path.join(settings.BASE_DIR, 'uploads', 'loaddata.xlsx') + with open(uploaded_path, 'wb+') as destination: + for chunk in f.chunks(): + destination.write(chunk) + return uploaded_path + + def form_valid(self, form): + data = form.cleaned_data + excel_file = form.cleaned_data['excel_file'] + uploaded_path = self.handle_uploaded_file(excel_file) + name = data['name'] + sheet_name = data.get('sheet_name', 0) + self.training_data = load_training_data( + name=name, + filepath=uploaded_path, + sheet_name=sheet_name + ) + return super().form_valid(form) + + def get_success_url(self): + return reverse('core:training_data', kwargs={'pk': self.training_data.pk}) + + +class TrainingDataDetailView(DetailView): + model = TrainingData + template_name = 'core/training_data.html' + + +class TrainingDataListView(ListView): + model = TrainingData + template_name = 'core/training_data_list.html' + ordering = '-update' + + +class TrainingDataDeleteView(DeleteView): + model = TrainingData + template_name = 'core/training_data_delete_confirm.html' + success_url = reverse_lazy('core:training_data_list') + + +def clear_training_data(request): + if request.method == 'POST': + TrainingData.objects.all().delete() + return HttpResponseRedirect(reverse('core:training_data_list')) + return render(request, 'core/clear_data.html', context={'model_name': 'Training Data'}) + + +class TotalRatingFormView(FormView): + form_class = TotalRatingForm + template_name = 'core/total_rating_form.html' + # success_url = reverse_lazy('core:training_data_list') + + def dispatch(self, request, *args, **kwargs): + pk = kwargs['pk'] + self.object = get_object_or_404(TrainingData, pk=pk) + return super().dispatch(request, *args, **kwargs) + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + context['object'] = self.object + return context + + def form_valid(self, form): + # Get cleaned data from FindSimilarForm + data = form.cleaned_data + + language = data['language'] + remove_stopwords = data['remove_stopwords'] + precision = data['precision'] + + # Get or create TextToken model + dataframe = self.object.get_dataframe + arr = dataframe.to_numpy() + training_data = np.asmatrix(arr) + + training_data = tokenize_vector( + training_data, + language=language, + remove_stopwords=remove_stopwords + ) + texts = matrix_to_list(training_data) + similars = find_similar_vector(text_to_check=training_data, texts=texts, count=len(texts)) + results = reshape_results_vector(results=similars, shape=training_data.shape) + + report = compare(results, training_data, precision) + + report_df = pd.DataFrame(report) + self.object.rating_data = report_df.to_json() + # May be will be better to save as feather report_df.to_feather('save.feather') + + total_rating = calculate_total_rating(report) + self.object.total_rating = total_rating + self.object.save() + # save results to the database + return super().form_valid(form) + + def get_success_url(self): + return reverse('core:rating', kwargs={'pk': self.object.pk}) + + +class RatingView(DetailView): + model = TrainingData + template_name = 'core/rating.html' diff --git a/examples/__init__.py b/examples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/apps.py b/examples/apps.py new file mode 100644 index 0000000..d6f5703 --- /dev/null +++ b/examples/apps.py @@ -0,0 +1,6 @@ +from django.apps import AppConfig + + +class ExamplesConfig(AppConfig): + default_auto_field = 'django.db.models.BigAutoField' + name = 'examples' diff --git a/examples/forms.py b/examples/forms.py new file mode 100644 index 0000000..4d55f09 --- /dev/null +++ b/examples/forms.py @@ -0,0 +1,10 @@ +from django import forms + + +class OneTextForm(forms.Form): + """ + Form with one text + """ + text = forms.CharField(max_length=128, widget=forms.TextInput(attrs={ + 'class': 'form-control' + })) diff --git a/examples/functions.py b/examples/functions.py new file mode 100644 index 0000000..1a5a6be --- /dev/null +++ b/examples/functions.py @@ -0,0 +1,12 @@ +from find_similar.examples.analyze import frequency_analysis # pylint: disable=import-error +from utils.decorators import Printer + + +@Printer(title=lambda example, **kwargs: f'Analyze "{example}"...') +def example_frequency_analysis(example): + """ + Example Frequency analysis + :example: Example name + """ + result = frequency_analysis(example) + return result diff --git a/examples/management/__init__.py b/examples/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/management/commands/__init__.py b/examples/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/analysis/management/commands/example_frequency_analysis.py b/examples/management/commands/example_frequency_analysis.py similarity index 92% rename from analysis/management/commands/example_frequency_analysis.py rename to examples/management/commands/example_frequency_analysis.py index 92b0f96..b5b1596 100644 --- a/analysis/management/commands/example_frequency_analysis.py +++ b/examples/management/commands/example_frequency_analysis.py @@ -2,7 +2,7 @@ Command to analyze one example to frequency """ from django.core.management.base import BaseCommand -from analysis.functions import example_frequency_analysis +from examples.functions import example_frequency_analysis class Command(BaseCommand): diff --git a/examples/migrations/__init__.py b/examples/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/analysis/templates/analysis/example_frequency.html b/examples/templates/examples/example_frequency.html similarity index 77% rename from analysis/templates/analysis/example_frequency.html rename to examples/templates/examples/example_frequency.html index 10c7d13..08d43bb 100644 --- a/analysis/templates/analysis/example_frequency.html +++ b/examples/templates/examples/example_frequency.html @@ -1,5 +1,10 @@ {% extends "base.html" %} {% block main %} + + + Example list + +

{% csrf_token %} {{form.as_p}} diff --git a/examples/templates/examples/list.html b/examples/templates/examples/list.html new file mode 100644 index 0000000..df5a45b --- /dev/null +++ b/examples/templates/examples/list.html @@ -0,0 +1,20 @@ +{% extends "base.html" %} +{% block main %} +

FindSimilar examples

+ + + Example frequency + +{% endblock %} +{% block results %} + +
    + {% for example in object_list %} +
  • + {{example}} +
  • + {% endfor %} +
+ + +{% endblock %} diff --git a/examples/tests/__init__.py b/examples/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/examples/tests/tests_function.py b/examples/tests/tests_function.py new file mode 100644 index 0000000..6111261 --- /dev/null +++ b/examples/tests/tests_function.py @@ -0,0 +1,59 @@ +""" +Tests for Analysis functions +""" +from django.test import SimpleTestCase + +from examples.functions import ( + example_frequency_analysis, +) + + +class TestingPrinter: + """ + Save prints to variable. To check the results + """ + + def __init__(self): + """ + Init printer + """ + self.results = [] + + def __call__(self, text, *args, **kwargs): + self.results.append(str(text)) + + +class FunctionsSimpleTestCase(SimpleTestCase): + """ + Class for test all functions + """ + def setUp(self): + self.one = 'one' + self.two = 'two' + self.one_two = 'one two' + self.printer = print + + def mock_printer(*args, **kwargs): # pylint: disable=unused-argument + """ + This is mock printer. This printer do nothing + """ + + self.mock_printer = mock_printer + + self.testing_printer = TestingPrinter() + + def test_example_frequency_analysis(self): + """ + Test for example_frequency_analysis + """ + example_name = 'mock' + expected_result = (('mock', 2), + ('example', 2), + ('for', 2), + ('tests', 2), + ('this', 1), + ('is', 1)) + self.assertEqual(example_frequency_analysis( # pylint: disable=unexpected-keyword-arg + example_name, + printer=self.testing_printer + ), expected_result) diff --git a/examples/tests/tests_urls.py b/examples/tests/tests_urls.py new file mode 100644 index 0000000..ba8e0e3 --- /dev/null +++ b/examples/tests/tests_urls.py @@ -0,0 +1,33 @@ +""" +Test urls module +""" +from django.test import SimpleTestCase +from django.urls import reverse + + +class TestUrlsSimpleTestCase(SimpleTestCase): + """ + Test Urls Class + """ + + def test_reverse(self): + """ + Test correct reverse + """ + app_name = 'examples' + urls = [ + { + 'url': 'example_frequency', + 'reverse': 'example-frequency/', + }, + { + 'url': 'example_list', + 'reverse': 'list/', + }, + ] + for url in urls: + app_url = f'{app_name}:{url["url"]}' + current_reverse = reverse(app_url) + true_reverse = f'/{app_name}/{url["reverse"]}' + with self.subTest(msg=app_url): + self.assertEqual(current_reverse, true_reverse) diff --git a/examples/tests/tests_views.py b/examples/tests/tests_views.py new file mode 100644 index 0000000..1c41e78 --- /dev/null +++ b/examples/tests/tests_views.py @@ -0,0 +1,162 @@ +from django.urls import reverse +from dry_tests import SimpleTestCase, Request, TrueResponse, POST, Context, ContentValue +from find_similar.examples import examples_set # pylint: disable=import-error +from examples.forms import OneTextForm + + +FORM_CONTENT_VALUES = [ + ContentValue( + value='', + count=1, + ), + ContentValue( + value='
', + count=1, + ), + ] + + +class TestExampleFrequencyView(SimpleTestCase): + """ + Test Example Frequency View + """ + + def setUp(self): + """ + SetUp Test Data + """ + self.text = 'mock' + self.url = reverse('examples:example_frequency') + self.result = (('mock', 2), ('example', 2), + ('for', 2), ('tests', 2), ('this', 1), ('is', 1)) + expected_url_params = [] + for key, value in self.result: + expected_url_params.append(f'{key}={value}') + self.expected_url_params = f'?text={self.text}&{"&".join(expected_url_params)}' + self.redirect_url=f'{self.url}{self.expected_url_params}' + + def test_get(self): + """ + Test get + """ + request = Request( + url=self.url + ) + true_response = TrueResponse( + status_code=200, + context=Context( + keys=['form'], + types={'form': OneTextForm}, + ), + content_values=FORM_CONTENT_VALUES + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + request = Request( + url=self.redirect_url + ) + + content_values = [self.text] + for key, value in self.result: + content_values.append(key) + content_values.append(value) + + true_response = TrueResponse( + status_code=200, + context=Context( + items={ + 'text': self.text, + 'result': self.result, + } + ), + content_values=content_values + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + # Error + request = Request( + url=f'{self.url}?text={self.text}&error=some error' + ) + + true_response = TrueResponse( + status_code=200, + context=Context( + items={ + 'text': self.text, + 'error': 'some error', + } + ), + content_values=[ + 'Some Error' + ] + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + def test_post(self): + """ + Test post + """ + data = { + 'text': self.text + } + request = Request( + url=self.url, + method=POST, + data=data, + ) + + true_response = TrueResponse( + status_code=302, + redirect_url=self.redirect_url + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + def test_post_error_example(self): + """ + Test post with error example + """ + data = { + 'text': 'unknown example value' + } + request = Request( + url=self.url, + method=POST, + data=data, + ) + + true_response = TrueResponse( + status_code=302, + redirect_url=f'{self.url}?text=unknown example value&error=example not found' + ) + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) + + +class ExampleListSimpleTestCase(SimpleTestCase): + + def setUp(self): + self.url = reverse('examples:example_list') + + def test_get(self): + request = Request( + url=self.url, + ) + + examples = examples_set() + + true_response = TrueResponse( + status_code=200, + context=Context( + # keys=['object_list'] + items={ + 'object_list': examples + } + ) + ) + + current_response = request.get_response(self.client) + self.assertTrueResponse(current_response, true_response) diff --git a/examples/urls.py b/examples/urls.py new file mode 100644 index 0000000..4f89772 --- /dev/null +++ b/examples/urls.py @@ -0,0 +1,12 @@ +""" +Analysis app urls +""" +from django.urls import path +from . import views + +app_name = 'examples' + +urlpatterns = [ + path('example-frequency/', views.ExampleFrequencyAnalysis.as_view(), name="example_frequency"), + path('list/', views.ExampleList.as_view(), name="example_list"), +] diff --git a/examples/views.py b/examples/views.py new file mode 100644 index 0000000..34831a8 --- /dev/null +++ b/examples/views.py @@ -0,0 +1,58 @@ +from django.views.generic import FormView, TemplateView +from django.urls import reverse +from find_similar.examples import examples_set # pylint: disable=import-error +from .forms import OneTextForm +from .functions import example_frequency_analysis + + +class ExampleFrequencyAnalysis(FormView): + """ + Example Frequency Analysis + """ + form_class = OneTextForm + template_name = 'examples/example_frequency.html' + + def form_valid(self, form): + self.text = form.cleaned_data['text'] + try: + self.result = example_frequency_analysis(self.text) + self.error = None + except FileNotFoundError: + self.error = 'example not found' + return super().form_valid(form) + + def get_context_data(self, **kwargs): + context = super().get_context_data() + data = self.request.GET.dict() + text = data.pop('text', '') + context['text'] = text + error = data.get('error', None) + if error: + context['error'] = error + else: + result = [] + for key, value in data.items(): + result.append((key, int(value))) + context['result'] = tuple(result) + return context + + def get_success_url(self): + reverse_url = reverse("examples:example_frequency") + if self.error: + url = f'{reverse_url}?text={self.text}&error={self.error}' + else: + url_params = [] + for key, value in self.result: + url_params.append(f'{key}={value}') + url_params = f'?text={self.text}&{"&".join(url_params)}' + url = f'{reverse_url}{url_params}' + return url + + +class ExampleList(TemplateView): + template_name = 'examples/list.html' + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) + context['object_list'] = examples_set() + return context diff --git a/laboratory/settings.py b/laboratory/settings.py index a7d5fb4..7ccf3b8 100644 --- a/laboratory/settings.py +++ b/laboratory/settings.py @@ -15,10 +15,10 @@ import sys sys.path.append("../find-similar") -from find_similar import find_similar # pylint: disable=wrong-import-position -from find_similar.tokenize import tokenize # pylint: disable=wrong-import-position -from find_similar.calc_functions import calc_cosine_similarity_opt # pylint: disable=wrong-import-position -from find_similar.examples.analyze import frequency_analysis # pylint: disable=wrong-import-position +from find_similar import find_similar # pylint: disable=import-error,wrong-import-position +from find_similar.tokenize import tokenize # pylint: disable=import-error,wrong-import-position +from find_similar.calc_functions import calc_cosine_similarity_opt # pylint: disable=import-error,wrong-import-position +from find_similar.examples.analyze import frequency_analysis # pylint: disable=import-error,wrong-import-position FIND_SIMILAR = find_similar TOKENIZE = tokenize @@ -52,9 +52,12 @@ "django.contrib.staticfiles", # others 'django_find_similar', + "debug_toolbar", # My + 'examples', 'core', 'analysis', + 'utils', ] MIDDLEWARE = [ @@ -62,6 +65,7 @@ "django.contrib.sessions.middleware.SessionMiddleware", "django.middleware.common.CommonMiddleware", "django.middleware.csrf.CsrfViewMiddleware", + "debug_toolbar.middleware.DebugToolbarMiddleware", "django.contrib.auth.middleware.AuthenticationMiddleware", "django.contrib.messages.middleware.MessageMiddleware", "django.middleware.clickjacking.XFrameOptionsMiddleware", @@ -140,3 +144,9 @@ # https://docs.djangoproject.com/en/4.2/ref/settings/#default-auto-field DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" + +INTERNAL_IPS = [ + # ... + "127.0.0.1", + # ... +] diff --git a/laboratory/urls.py b/laboratory/urls.py index 1e2c8d0..87710d7 100644 --- a/laboratory/urls.py +++ b/laboratory/urls.py @@ -6,6 +6,8 @@ urlpatterns = [ path("admin/", admin.site.urls), + path('examples/', include('examples.urls')), path('', include('core.urls')), path('analysis/', include('analysis.urls')), + path("__debug__/", include("debug_toolbar.urls")), ] diff --git a/requirements.txt b/requirements.txt index c7877b8..f0a99ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,9 @@ Django==4.2.6 django-dry-tests==1.0.0 -django-find-similar==1.2.0 +django-find-similar==1.3.0 pandas==2.1.1 openpyxl==3.1.2 coverage==7.3.2 -mixer==7.2.2 \ No newline at end of file +mixer==7.2.2 +django-debug-toolbar==4.2.0 +pylint==3.0.2 \ No newline at end of file diff --git a/templates/base.html b/templates/base.html index 8c40e93..1f31e91 100644 --- a/templates/base.html +++ b/templates/base.html @@ -49,29 +49,23 @@