diff --git a/.travis.yml b/.travis.yml index e4fe0906..7fce146f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,27 +8,21 @@ envs: - TOXENV=django22 - TOXENV=quality -addons: - apt: - packages: - - openjdk-8-jdk +services: + - docker # Cache the pip directory. "cache: pip" doesn't work due to install override. See https://github.com/travis-ci/travis-ci/issues/3239. cache: - directories: - $HOME/.cache/pip -before_install: - - export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-amd64 - - export PATH=${PATH/\/usr\/local\/lib\/jvm\/openjdk11\/bin/$JAVA_HOME\/bin} - - curl -O https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.5.2.zip - - unzip elasticsearch-1.5.2.zip - - elasticsearch-1.5.2/bin/elasticsearch -d +before_install: make test.start_elasticsearch install: - pip install -r requirements/travis.txt -script: tox +# sleep needs for waiting on elasticsearch +script: sleep 10 && tox after_success: codecov diff --git a/Makefile b/Makefile index 9032e835..a84f1651 100644 --- a/Makefile +++ b/Makefile @@ -23,6 +23,16 @@ requirements: validate: clean tox +test.start_elasticsearch: + docker-compose up -d + +test.stop_elasticsearch: + docker-compose stop + +test_with_es: clean test.start_elasticsearch + coverage run --source='.' manage.py test + make test.stop_elasticsearch + upgrade: export CUSTOM_COMPILE_COMMAND=make upgrade upgrade: ## update the requirements/*.txt files with the latest packages satisfying requirements/*.in pip install -qr requirements/pip-tools.txt diff --git a/README.md b/README.md index efa6395b..9dccb655 100644 --- a/README.md +++ b/README.md @@ -252,3 +252,17 @@ Users of the `views.do_search` view can choose to override the SearchResultProce In particular, the base SearchResultProcessor adds in each python property alongside the properties within the results. So, the override class simply defines properties that it desires to be exposed alongside the results - for example, the LMS includes an `LmsSearchResultProcessor` that defines properties `url` and `location` which are used to infer the url / location for each search result. In this fashion, the search client can blend in information that it infers from the result fields, but it knows how to format / calculate. Also, SearchResultProcessor overriders can override the member `should_remove` which allows the client app to determine if access should be excluded to the search result - for example, the LMS includes an implementation of this member that calls the LMS `has_access` as a last safety resort in case the end user does not have access to the result returned. + +#### Testing +Tests use an Elasticsearch Docker container. To run tests locally use command: +``` +make test_with_es +``` +To simply run the container without starting the tests, run: +``` +make test.start_elasticsearch +``` +To stop an Elasticsearch Docker container, run: +``` +make test.stop_elasticsearch +``` diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 00000000..5ad8701c --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,25 @@ +version: '2.2' +services: + + test_elasticsearch: + image: elasticsearch:7.8.0 + container_name: test_elasticsearch + environment: + - node.name=test_elasticsearch + - cluster.name=docker-cluster + - cluster.initial_master_nodes=test_elasticsearch + - bootstrap.memory_lock=true + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + - http.port=9200 + ulimits: + memlock: + soft: -1 + hard: -1 + volumes: + - data01:/usr/share/elasticsearch/data + ports: + - "9200:9200" + +volumes: + data01: + driver: local diff --git a/requirements/base.in b/requirements/base.in index f77069de..63d7508a 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -12,5 +12,5 @@ Django # Web application framework -elasticsearch>=1.0.0,<2.0.0 +elasticsearch>=7.8.0,<8.0.0 event-tracking diff --git a/requirements/base.txt b/requirements/base.txt index c73bf849..c8b8cc1e 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -4,11 +4,12 @@ # # make upgrade # -django==2.2.14 # via -r requirements/base.in, event-tracking -elasticsearch==1.9.0 # via -r requirements/base.in +certifi==2020.6.20 # via elasticsearch +django==2.2.15 # via -r requirements/base.in, event-tracking +elasticsearch==7.8.1 # via -r requirements/base.in event-tracking==0.3.3 # via -r requirements/base.in -pymongo==3.10.1 # via event-tracking +pymongo==3.11.0 # via event-tracking pytz==2020.1 # via django, event-tracking six==1.15.0 # via event-tracking sqlparse==0.3.1 # via django -urllib3==1.25.9 # via elasticsearch +urllib3==1.25.10 # via elasticsearch diff --git a/requirements/constraints.txt b/requirements/constraints.txt index 4ea178f0..232a5ae8 100644 --- a/requirements/constraints.txt +++ b/requirements/constraints.txt @@ -12,7 +12,7 @@ ddt < 1.4.0 # newer version is causing failure -tox-battery==0.5.2 +tox-battery==0.6.1 coverage<5.1 \ No newline at end of file diff --git a/requirements/dev.txt b/requirements/dev.txt index 0d0386ab..578eb852 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -7,7 +7,7 @@ appdirs==1.4.4 # via -r requirements/travis.txt, virtualenv astroid==2.3.3 # via -r requirements/quality.txt, pylint, pylint-celery attrs==19.3.0 # via -r requirements/quality.txt, -r requirements/testing.txt, pytest -certifi==2020.6.20 # via -r requirements/travis.txt, requests +certifi==2020.6.20 # via -r requirements/quality.txt, -r requirements/testing.txt, -r requirements/travis.txt, elasticsearch, requests chardet==3.0.4 # via -r requirements/travis.txt, requests click-log==0.3.2 # via -r requirements/quality.txt, edx-lint click==7.1.2 # via -r requirements/pip-tools.txt, -r requirements/quality.txt, click-log, edx-lint, pip-tools @@ -15,14 +15,15 @@ codecov==2.1.8 # via -r requirements/travis.txt coverage==5.0.4 # via -c requirements/constraints.txt, -r requirements/quality.txt, -r requirements/testing.txt, -r requirements/travis.txt, codecov, pytest-cov ddt==1.3.1 # via -c requirements/constraints.txt, -r requirements/quality.txt, -r requirements/testing.txt distlib==0.3.1 # via -r requirements/travis.txt, virtualenv -django==2.2.14 # via -r requirements/quality.txt, -r requirements/testing.txt, event-tracking +django==2.2.15 # via -r requirements/quality.txt, -r requirements/testing.txt, event-tracking edx-lint==1.5.0 # via -r requirements/quality.txt -elasticsearch==1.9.0 # via -r requirements/quality.txt, -r requirements/testing.txt +elasticsearch==7.8.1 # via -r requirements/quality.txt, -r requirements/testing.txt event-tracking==0.3.3 # via -r requirements/quality.txt, -r requirements/testing.txt filelock==3.0.12 # via -r requirements/travis.txt, tox, virtualenv idna==2.10 # via -r requirements/travis.txt, requests importlib-metadata==1.7.0 # via -r requirements/quality.txt, -r requirements/testing.txt, -r requirements/travis.txt, pluggy, pytest, tox, virtualenv importlib-resources==3.0.0 # via -r requirements/travis.txt, virtualenv +iniconfig==1.0.1 # via -r requirements/quality.txt, -r requirements/testing.txt, pytest isort==4.3.21 # via -r requirements/quality.txt, pylint lazy-object-proxy==1.4.3 # via -r requirements/quality.txt, astroid mccabe==0.6.1 # via -r requirements/quality.txt, pylint @@ -30,7 +31,7 @@ mock==3.0.5 # via -r requirements/quality.txt, -r requirements/tes more-itertools==8.4.0 # via -r requirements/quality.txt, -r requirements/testing.txt, pytest packaging==20.4 # via -r requirements/quality.txt, -r requirements/testing.txt, -r requirements/travis.txt, pytest, tox pathlib2==2.3.5 # via -r requirements/quality.txt, -r requirements/testing.txt, pytest -pip-tools==5.2.1 # via -r requirements/pip-tools.txt +pip-tools==5.3.1 # via -r requirements/pip-tools.txt pluggy==0.13.1 # via -r requirements/quality.txt, -r requirements/testing.txt, -r requirements/travis.txt, pytest, tox py==1.9.0 # via -r requirements/quality.txt, -r requirements/testing.txt, -r requirements/travis.txt, pytest, tox pycodestyle==2.6.0 # via -r requirements/quality.txt @@ -38,21 +39,20 @@ pylint-celery==0.3 # via -r requirements/quality.txt, edx-lint pylint-django==2.0.11 # via -r requirements/quality.txt, edx-lint pylint-plugin-utils==0.6 # via -r requirements/quality.txt, pylint-celery, pylint-django pylint==2.4.4 # via -r requirements/quality.txt, edx-lint, pylint-celery, pylint-django, pylint-plugin-utils -pymongo==3.10.1 # via -r requirements/quality.txt, -r requirements/testing.txt, event-tracking +pymongo==3.11.0 # via -r requirements/quality.txt, -r requirements/testing.txt, event-tracking pyparsing==2.4.7 # via -r requirements/quality.txt, -r requirements/testing.txt, -r requirements/travis.txt, packaging pytest-cov==2.10.0 # via -r requirements/quality.txt, -r requirements/testing.txt -pytest==5.4.3 # via -r requirements/quality.txt, -r requirements/testing.txt, pytest-cov +pytest==6.0.1 # via -r requirements/quality.txt, -r requirements/testing.txt, pytest-cov pytz==2020.1 # via -r requirements/quality.txt, -r requirements/testing.txt, django, event-tracking requests==2.24.0 # via -r requirements/travis.txt, codecov six==1.15.0 # via -r requirements/pip-tools.txt, -r requirements/quality.txt, -r requirements/testing.txt, -r requirements/travis.txt, astroid, edx-lint, event-tracking, mock, packaging, pathlib2, pip-tools, tox, virtualenv sqlparse==0.3.1 # via -r requirements/quality.txt, -r requirements/testing.txt, django -toml==0.10.1 # via -r requirements/travis.txt, tox -tox-battery==0.5.2 # via -c requirements/constraints.txt, -r requirements/travis.txt -tox==3.17.1 # via -r requirements/travis.txt, tox-battery +toml==0.10.1 # via -r requirements/quality.txt, -r requirements/testing.txt, -r requirements/travis.txt, pytest, tox +tox-battery==0.6.1 # via -c requirements/constraints.txt, -r requirements/travis.txt +tox==3.18.1 # via -r requirements/travis.txt, tox-battery typed-ast==1.4.1 # via -r requirements/quality.txt, astroid -urllib3==1.25.9 # via -r requirements/quality.txt, -r requirements/testing.txt, -r requirements/travis.txt, elasticsearch, requests -virtualenv==20.0.27 # via -r requirements/travis.txt, tox -wcwidth==0.2.5 # via -r requirements/quality.txt, -r requirements/testing.txt, pytest +urllib3==1.25.10 # via -r requirements/quality.txt, -r requirements/testing.txt, -r requirements/travis.txt, elasticsearch, requests +virtualenv==20.0.29 # via -r requirements/travis.txt, tox wrapt==1.11.2 # via -r requirements/quality.txt, astroid zipp==1.2.0 # via -r requirements/quality.txt, -r requirements/testing.txt, -r requirements/travis.txt, importlib-metadata, importlib-resources diff --git a/requirements/pip-tools.txt b/requirements/pip-tools.txt index 1ae2c19f..4b50eae7 100644 --- a/requirements/pip-tools.txt +++ b/requirements/pip-tools.txt @@ -5,7 +5,7 @@ # make upgrade # click==7.1.2 # via pip-tools -pip-tools==5.2.1 # via -r requirements/pip-tools.in +pip-tools==5.3.1 # via -r requirements/pip-tools.in six==1.15.0 # via pip-tools # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements/quality.txt b/requirements/quality.txt index 6a3c363b..b34cd254 100644 --- a/requirements/quality.txt +++ b/requirements/quality.txt @@ -6,15 +6,17 @@ # astroid==2.3.3 # via pylint, pylint-celery attrs==19.3.0 # via -r requirements/testing.txt, pytest +certifi==2020.6.20 # via -r requirements/testing.txt, elasticsearch click-log==0.3.2 # via edx-lint click==7.1.2 # via click-log, edx-lint coverage==5.0.4 # via -c requirements/constraints.txt, -r requirements/quality.in, -r requirements/testing.txt, pytest-cov ddt==1.3.1 # via -c requirements/constraints.txt, -r requirements/testing.txt -django==2.2.14 # via -r requirements/testing.txt, event-tracking +django==2.2.15 # via -r requirements/testing.txt, event-tracking edx-lint==1.5.0 # via -r requirements/quality.in -elasticsearch==1.9.0 # via -r requirements/testing.txt +elasticsearch==7.8.1 # via -r requirements/testing.txt event-tracking==0.3.3 # via -r requirements/testing.txt importlib-metadata==1.7.0 # via -r requirements/testing.txt, pluggy, pytest +iniconfig==1.0.1 # via -r requirements/testing.txt, pytest isort==4.3.21 # via pylint lazy-object-proxy==1.4.3 # via astroid mccabe==0.6.1 # via pylint @@ -29,15 +31,15 @@ pylint-celery==0.3 # via edx-lint pylint-django==2.0.11 # via edx-lint pylint-plugin-utils==0.6 # via pylint-celery, pylint-django pylint==2.4.4 # via edx-lint, pylint-celery, pylint-django, pylint-plugin-utils -pymongo==3.10.1 # via -r requirements/testing.txt, event-tracking +pymongo==3.11.0 # via -r requirements/testing.txt, event-tracking pyparsing==2.4.7 # via -r requirements/testing.txt, packaging pytest-cov==2.10.0 # via -r requirements/testing.txt -pytest==5.4.3 # via -r requirements/testing.txt, pytest-cov +pytest==6.0.1 # via -r requirements/testing.txt, pytest-cov pytz==2020.1 # via -r requirements/testing.txt, django, event-tracking six==1.15.0 # via -r requirements/testing.txt, astroid, edx-lint, event-tracking, mock, packaging, pathlib2 sqlparse==0.3.1 # via -r requirements/testing.txt, django +toml==0.10.1 # via -r requirements/testing.txt, pytest typed-ast==1.4.1 # via astroid -urllib3==1.25.9 # via -r requirements/testing.txt, elasticsearch -wcwidth==0.2.5 # via -r requirements/testing.txt, pytest +urllib3==1.25.10 # via -r requirements/testing.txt, elasticsearch wrapt==1.11.2 # via astroid zipp==1.2.0 # via -r requirements/testing.txt, importlib-metadata diff --git a/requirements/testing.txt b/requirements/testing.txt index 18e34de9..d34b454b 100644 --- a/requirements/testing.txt +++ b/requirements/testing.txt @@ -5,24 +5,26 @@ # make upgrade # attrs==19.3.0 # via pytest +certifi==2020.6.20 # via -r requirements/base.txt, elasticsearch coverage==5.0.4 # via -c requirements/constraints.txt, -r requirements/testing.in, pytest-cov ddt==1.3.1 # via -c requirements/constraints.txt, -r requirements/testing.in -elasticsearch==1.9.0 # via -r requirements/base.txt +elasticsearch==7.8.1 # via -r requirements/base.txt event-tracking==0.3.3 # via -r requirements/base.txt importlib-metadata==1.7.0 # via pluggy, pytest +iniconfig==1.0.1 # via pytest mock==3.0.5 # via -r requirements/testing.in more-itertools==8.4.0 # via pytest packaging==20.4 # via pytest pathlib2==2.3.5 # via pytest pluggy==0.13.1 # via pytest py==1.9.0 # via pytest -pymongo==3.10.1 # via -r requirements/base.txt, event-tracking +pymongo==3.11.0 # via -r requirements/base.txt, event-tracking pyparsing==2.4.7 # via packaging pytest-cov==2.10.0 # via -r requirements/testing.in -pytest==5.4.3 # via pytest-cov +pytest==6.0.1 # via pytest-cov pytz==2020.1 # via -r requirements/base.txt, django, event-tracking six==1.15.0 # via -r requirements/base.txt, event-tracking, mock, packaging, pathlib2 sqlparse==0.3.1 # via -r requirements/base.txt, django -urllib3==1.25.9 # via -r requirements/base.txt, elasticsearch -wcwidth==0.2.5 # via pytest +toml==0.10.1 # via pytest +urllib3==1.25.10 # via -r requirements/base.txt, elasticsearch zipp==1.2.0 # via importlib-metadata diff --git a/requirements/travis.txt b/requirements/travis.txt index 3362359b..4414a120 100644 --- a/requirements/travis.txt +++ b/requirements/travis.txt @@ -21,8 +21,8 @@ pyparsing==2.4.7 # via packaging requests==2.24.0 # via codecov six==1.15.0 # via packaging, tox, virtualenv toml==0.10.1 # via tox -tox-battery==0.5.2 # via -c requirements/constraints.txt, -r requirements/travis.in -tox==3.17.1 # via -r requirements/travis.in, tox-battery -urllib3==1.25.9 # via requests -virtualenv==20.0.27 # via tox +tox-battery==0.6.1 # via -c requirements/constraints.txt, -r requirements/travis.in +tox==3.18.1 # via -r requirements/travis.in, tox-battery +urllib3==1.25.10 # via requests +virtualenv==20.0.29 # via tox zipp==1.2.0 # via importlib-metadata, importlib-resources diff --git a/search/api.py b/search/api.py index d5ad71e0..81c26090 100644 --- a/search/api.py +++ b/search/api.py @@ -14,26 +14,30 @@ def course_discovery_filter_fields(): - """ look up the desired list of course discovery filter fields """ + """ + Look up the desired list of course discovery filter fields. + """ return getattr(settings, "COURSE_DISCOVERY_FILTERS", DEFAULT_FILTER_FIELDS) -def course_discovery_facets(): - """ Discovery facets to include, by default we specify each filter field with unspecified size attribute """ - return getattr(settings, "COURSE_DISCOVERY_FACETS", {field: {} for field in course_discovery_filter_fields()}) - - -class NoSearchEngineError(Exception): - """ NoSearchEngineError exception to be thrown if no search engine is specified """ +def course_discovery_aggregations(): + """ + Discovery aggregations to include bucket names. + By default we specify each filter field with unspecified size attribute. + """ + return getattr( + settings, + "COURSE_DISCOVERY_AGGREGATIONS", + {field: {} for field in course_discovery_filter_fields()} + ) -class QueryParseError(Exception): - """QueryParseError will be thrown if the query is malformed. - If a query has mismatched quotes (e.g. '"some phrase', return a - more specific exception so the view can provide a more helpful - error message to the user. +class NoSearchEngineError(Exception): + """ + NoSearchEngineError exception. + It is thrown if no search engine is specified. """ @@ -43,7 +47,9 @@ def perform_search( size=10, from_=0, course_id=None): - """ Call the search engine with the appropriate parameters """ + """ + Call the search engine with the appropriate parameters + """ # field_, filter_ and exclude_dictionary(s) can be overridden by calling application # field_dictionary includes course if course_id provided (field_dictionary, filter_dictionary, exclude_dictionary) = SearchFilterGenerator.generate_field_filters( @@ -51,7 +57,9 @@ def perform_search( course_id=course_id ) - searcher = SearchEngine.get_search_engine(getattr(settings, "COURSEWARE_INDEX_NAME", "courseware_index")) + searcher = SearchEngine.get_search_engine( + getattr(settings, "COURSEWARE_CONTENT_INDEX_NAME", "courseware_content") + ) if not searcher: raise NoSearchEngineError("No search engine specified in settings.SEARCH_ENGINE") @@ -62,7 +70,6 @@ def perform_search( exclude_dictionary=exclude_dictionary, size=size, from_=from_, - doc_type="courseware_content", ) # post-process the result @@ -83,20 +90,23 @@ def course_discovery_search(search_term=None, size=20, from_=0, field_dictionary # dictionary, and use our own logic upon enrollment dates for these use_search_fields = ["org"] (search_fields, _, exclude_dictionary) = SearchFilterGenerator.generate_field_filters() - use_field_dictionary = {} - use_field_dictionary.update({field: search_fields[field] for field in search_fields if field in use_search_fields}) + use_field_dictionary = { + field: search_fields[field] + for field in search_fields if field in use_search_fields + } if field_dictionary: use_field_dictionary.update(field_dictionary) if not getattr(settings, "SEARCH_SKIP_ENROLLMENT_START_DATE_FILTERING", False): use_field_dictionary["enrollment_start"] = DateRange(None, datetime.utcnow()) - searcher = SearchEngine.get_search_engine(getattr(settings, "COURSEWARE_INDEX_NAME", "courseware_index")) + searcher = SearchEngine.get_search_engine( + getattr(settings, "COURSEWARE_INFO_INDEX_NAME", "course_info") + ) if not searcher: raise NoSearchEngineError("No search engine specified in settings.SEARCH_ENGINE") results = searcher.search( query_string=search_term, - doc_type="course_info", size=size, from_=from_, # only show when enrollment start IS provided and is before now @@ -104,7 +114,7 @@ def course_discovery_search(search_term=None, size=20, from_=0, field_dictionary # show if no enrollment end is provided and has not yet been reached filter_dictionary={"enrollment_end": DateRange(datetime.utcnow(), None)}, exclude_dictionary=exclude_dictionary, - facet_terms=course_discovery_facets(), + aggregation_terms=course_discovery_aggregations(), ) return results diff --git a/search/elastic.py b/search/elastic.py index f9e80796..87b4e474 100644 --- a/search/elastic.py +++ b/search/elastic.py @@ -1,5 +1,6 @@ -""" Elastic Search implementation for courseware search index """ - +""" +Elastic Search implementation for courseware search index +""" import copy import logging @@ -8,12 +9,11 @@ from elasticsearch import Elasticsearch, exceptions from elasticsearch.helpers import bulk, BulkIndexError -from search.api import QueryParseError from search.search_engine_base import SearchEngine from search.utils import ValueRange, _is_iterable # log appears to be standard name used for logger -log = logging.getLogger(__name__) # pylint: disable=invalid-name +log = logging.getLogger(__name__) # These are characters that may have special meaning within Elasticsearch. # We _may_ want to use these for their special uses for certain queries, @@ -23,252 +23,307 @@ def _translate_hits(es_response): - """ Provide resultset in our desired format from elasticsearch results """ + """ + Provide result set in our desired format from elasticsearch results. + + { + 'aggs': { + 'language': {'other': 0, 'terms': {}, 'total': 0.0}, + 'modes': {'other': 0, 'terms': {'honor': 3, 'other': 1, 'verified': 2}, 'total': 6.0}, + 'org': {'other': 0, 'terms': {'OrgA': 2, 'OrgB': 2}, 'total': 4.0} + }, + 'max_score': 1.0, + 'results': [ + { + '_id': 'edX/DemoX/Demo_Course_1', + '_index': 'test_index', + '_type': '_doc', + 'data': { + 'content': { + 'display_name': 'edX Demonstration Course', + 'number': 'DemoX', + 'overview': 'Long overview page', + 'short_description': 'Short description' + }, + 'course': 'edX/DemoX/Demo_Course', + 'effort': '5:30', + 'enrollment_start': '2014-01-01T00:00:00', + 'id': 'edX/DemoX/Demo_Course_1', + 'image_url': '/c4x/edX/DemoX/asset/images_course_image.jpg', + 'modes': ['honor', 'verified'], + 'number': 'DemoX', + 'org': 'OrgA', + 'start': '2014-02-01T00:00:00' + }, + 'score': 1.0 + }, + { + '_id': 'edX/DemoX/Demo_Course_2', + '_index': 'test_index', + '_type': '_doc', + 'data': { + 'content': { + 'display_name': 'edX Demonstration Course', + 'number': 'DemoX', + 'overview': 'Long overview page', + 'short_description': 'Short description' + }, + 'course': 'edX/DemoX/Demo_Course', + 'effort': '5:30', + 'enrollment_start': '2014-01-01T00:00:00', + 'id': 'edX/DemoX/Demo_Course_2', + 'image_url': '/c4x/edX/DemoX/asset/images_course_image.jpg', + 'modes': ['honor'], + 'number': 'DemoX', + 'org': 'OrgA', + 'start': '2014-02-01T00:00:00' + }, + 'score': 1.0 + }, + ], + 'took': 2, + 'total': 5 + } + + """ def translate_result(result): - """ Any conversion from ES result syntax into our search engine syntax """ + """ + Any conversion from ES result syntax into our search engine syntax + """ translated_result = copy.copy(result) - data = translated_result.pop("_source") + translated_result["data"] = translated_result.pop("_source") + translated_result["score"] = translated_result.pop("_score") + return translated_result - translated_result.update({ - "data": data, - "score": translated_result["_score"] - }) + def translate_agg_bucket(bucket, agg_result): + """ + Any conversion from ES aggregations result into our search engine syntax - return translated_result + agg_result argument needs for getting total number of + documents per bucket. - def translate_facet(result): - """ Any conversion from ES facet syntax into our search engine sytax """ - terms = {term["term"]: term["count"] for term in result["terms"]} + :param bucket: string + :param agg_result: dict + :return: dict + """ + agg_item = agg_result[bucket] + terms = { + bucket["key"]: bucket["doc_count"] + for bucket in agg_item["buckets"] + } + total_docs = ( + agg_result[_get_total_doc_key(bucket)]["value"] + + agg_item["sum_other_doc_count"] + + agg_item["doc_count_error_upper_bound"] + ) return { "terms": terms, - "total": result["total"], - "other": result["other"], + "total": total_docs, + "other": agg_item["sum_other_doc_count"], } - results = [translate_result(hit) for hit in es_response["hits"]["hits"]] + results = list(map(translate_result, es_response["hits"]["hits"])) response = { "took": es_response["took"], - "total": es_response["hits"]["total"], + "total": es_response["hits"]["total"]["value"], "max_score": es_response["hits"]["max_score"], "results": results, } - - if "facets" in es_response: - response["facets"] = {facet: translate_facet(es_response["facets"][facet]) for facet in es_response["facets"]} + if "aggregations" in es_response: + response["aggs"] = { + bucket: translate_agg_bucket(bucket, es_response["aggregations"]) + for bucket in es_response["aggregations"] + if "total_" not in bucket + } return response def _get_filter_field(field_name, field_value): - """ Return field to apply into filter, if an array then use a range, otherwise look for a term match """ - filter_field = None + """ + Return field to apply into filter. + + If an array then use a range, otherwise look for a term match. + """ + filter_query_field = {"term": {field_name: field_value}} if isinstance(field_value, ValueRange): range_values = {} if field_value.lower: - range_values.update({"gte": field_value.lower_string}) + range_values["gte"] = field_value.lower_string if field_value.upper: - range_values.update({"lte": field_value.upper_string}) - filter_field = { + range_values["lte"] = field_value.upper_string + filter_query_field = { "range": { field_name: range_values } } elif _is_iterable(field_value): - filter_field = { + filter_query_field = { "terms": { field_name: field_value - } - } - else: - filter_field = { - "term": { - field_name: field_value - } + }, } - return filter_field + return filter_query_field def _process_field_queries(field_dictionary): """ - We have a field_dictionary - we want to match the values for an elasticsearch "match" query - This is only potentially useful when trying to tune certain search operations + Prepare ES query which must be in the ES record set. """ - def field_item(field): - """ format field match as "match" item for elasticsearch query """ - return { - "match": { - field: field_dictionary[field] - } - } + return [ + _get_filter_field(field, field_value) + for field, field_value in field_dictionary.items() + ] - return [field_item(field) for field in field_dictionary] - -def _process_field_filters(field_dictionary): +def _process_filters(filter_dictionary): """ - We have a field_dictionary - we match the values using a "term" filter in elasticsearch + Build list for filtering. + + Match records where filtered fields may not exists. """ - return [_get_filter_field(field, field_value) for field, field_value in field_dictionary.items()] + for field, value in filter_dictionary.items(): + if value: + yield _get_filter_field(field, value) + yield { + "bool": { + "must_not": {"exists": {"field": field}}, + }, + } -def _process_filters(filter_dictionary): +def _process_exclude_dictionary(exclude_dictionary): """ - We have a filter_dictionary - this means that if the field is included - and matches, then we can include, OR if the field is undefined, then we - assume it is safe to include + Build a list of term fields which will be excluded from result set. """ - def filter_item(field): - """ format elasticsearch filter to pass if value matches OR field is not included """ - if filter_dictionary[field] is not None: - return { - "or": [ - _get_filter_field(field, filter_dictionary[field]), - { - "missing": { - "field": field - } - } - ] - } + for exclude_property, exclude_values in exclude_dictionary.items(): + if not isinstance(exclude_values, list): + exclude_values = (exclude_values,) + yield from ( + {"term": {exclude_property: exclude_value}} + for exclude_value in exclude_values + ) - return { - "missing": { - "field": field - } - } - return [filter_item(field) for field in filter_dictionary] +def _get_total_doc_key(bucket_name): + """ + Returns additional bucket name for passed bucket. + + Additional buckets are needed for the subsequent counting of + documents per bucket. + :param bucket_name: string + :return: string + """ + return "total_{}_docs".format(bucket_name) -def _process_exclude_dictionary(exclude_dictionary): +def _process_aggregation_terms(aggregation_terms): """ - Based on values in the exclude_dictionary generate a list of term queries that - will filter out unwanted results. + We have a list of terms with which we return aggregated result. """ - # not_properties will hold the generated term queries. - not_properties = [] - for exclude_property in exclude_dictionary: - exclude_values = exclude_dictionary[exclude_property] - if not isinstance(exclude_values, list): - exclude_values = [exclude_values] - not_properties.extend([{"term": {exclude_property: exclude_value}} for exclude_value in exclude_values]) - - # Returning a query segment with an empty list freaks out ElasticSearch, - # so just return an empty segment. - if not not_properties: - return {} - - return { - "not": { - "filter": { - "or": not_properties - } + elastic_aggs = {} + for bucket, options in aggregation_terms.items(): + agg_term = {agg_option: options[agg_option] for agg_option in options} + agg_term["field"] = bucket + elastic_aggs[bucket] = { + "terms": agg_term } - } - -def _process_facet_terms(facet_terms): - """ We have a list of terms with which we return facets """ - elastic_facets = {} - for facet in facet_terms: - facet_term = {"field": facet} - if facet_terms[facet]: - for facet_option in facet_terms[facet]: - facet_term[facet_option] = facet_terms[facet][facet_option] - - elastic_facets[facet] = { - "terms": facet_term + # creates sum bucket which stores total number of documents per bucket + elastic_aggs[_get_total_doc_key(bucket)] = { + "sum_bucket": { + "buckets_path": bucket + "._count" + } } - return elastic_facets + return elastic_aggs class ElasticSearchEngine(SearchEngine): - - """ ElasticSearch implementation of SearchEngine abstraction """ + """ + ElasticSearch implementation of SearchEngine abstraction + """ @staticmethod - def get_cache_item_name(index_name, doc_type): - """ name-formatter for cache_item_name """ - return "elastic_search_mappings_{}_{}".format( - index_name, - doc_type - ) + def get_cache_item_name(index_name): + """ + Name-formatter for cache_item_name + """ + return "elastic_search_mappings_{}".format(index_name) @classmethod - def get_mappings(cls, index_name, doc_type): - """ fetch mapped-items structure from cache """ - return cache.get(cls.get_cache_item_name(index_name, doc_type), {}) + def get_mappings(cls, index_name): + """ + Fetch mapped-items structure from cache + """ + return cache.get(cls.get_cache_item_name(index_name), {}) @classmethod - def set_mappings(cls, index_name, doc_type, mappings): - """ set new mapped-items structure into cache """ - cache.set(cls.get_cache_item_name(index_name, doc_type), mappings) + def set_mappings(cls, index_name, mappings): + """ + Set new mapped-items structure into cache + """ + cache.set(cls.get_cache_item_name(index_name), mappings) @classmethod def log_indexing_error(cls, indexing_errors): - """ Logs indexing errors and raises a general ElasticSearch Exception""" - indexing_errors_log = [] - for indexing_error in indexing_errors: - indexing_errors_log.append(str(indexing_error)) - raise exceptions.ElasticsearchException(', '.join(indexing_errors_log)) + """ + Logs indexing errors and raises a general ElasticSearch Exception + """ + raise exceptions.ElasticsearchException(', '.join(map(str, indexing_errors))) - def _get_mappings(self, doc_type): + @property + def mappings(self): """ - Interfaces with the elasticsearch mappings for the index - prevents multiple loading of the same mappings from ES when called more than once + Get mapping of current index. Mappings format in elasticsearch is as follows: { - "doc_type": { - "properties": { - "nested_property": { + "properties": { + "nested_property": { "properties": { - "an_analysed_property": { - "type": "string" - }, - "another_analysed_property": { - "type": "string" - } + "an_analysed_property": { + "type": "string" + }, + "another_analysed_property": { + "type": "string" + } } - }, - "a_not_analysed_property": { + }, + "a_not_analysed_property": { "type": "string", - "index": "not_analyzed" - }, - "a_date_property": { + }, + "a_date_property": { "type": "date" - } - } - } + } + } } - We cache the properties of each doc_type, if they are not available, we'll load them again from Elasticsearch + We cache the properties of each index, if they are not available, + we'll load them again from Elasticsearch """ # Try loading the mapping from the cache. - mapping = ElasticSearchEngine.get_mappings(self.index_name, doc_type) + mapping = ElasticSearchEngine.get_mappings(self.index_name) # Fall back to Elasticsearch if not mapping: mapping = self._es.indices.get_mapping( - index=self.index_name, - doc_type=doc_type, - ).get(self.index_name, {}).get('mappings', {}).get(doc_type, {}) - + index=self.index_name + ).get(self.index_name, {}).get("mappings", {}) # Cache the mapping, if one was retrieved if mapping: - ElasticSearchEngine.set_mappings( - self.index_name, - doc_type, - mapping - ) + ElasticSearchEngine.set_mappings(self.index_name, mapping) return mapping - def _clear_mapping(self, doc_type): - """ Remove the cached mappings, so that they get loaded from ES next time they are requested """ - ElasticSearchEngine.set_mappings(self.index_name, doc_type, {}) + def _clear_mapping(self): + """ + Remove the cached mappings. + + Next time ES mappings is are requested. + """ + ElasticSearchEngine.set_mappings(self.index_name, {}) def __init__(self, index=None): super(ElasticSearchEngine, self).__init__(index) @@ -277,12 +332,16 @@ def __init__(self, index=None): if not self._es.indices.exists(index=self.index_name): self._es.indices.create(index=self.index_name) - def _check_mappings(self, doc_type, body): + def _check_mappings(self, body): """ - We desire to index content so that anything we want to be textually searchable(and therefore needing to be - analysed), but the other fields are designed to be filters, and only require an exact match. So, we want to - set up the mappings for these fields as "not_analyzed" - this will allow our filters to work faster because - they only have to work off exact matches + Put mapping to the index. + + We desire to index content so that anything we want to be textually + searchable(and therefore needing to be analysed), but the other fields + are designed to be filters, and only require an exact match. So, we want + to set up the mappings for these fields as "not_analyzed" - this will + allow our filters to work faster because they only have to work off + exact matches. """ # Make fields other than content be indexed as unanalyzed terms - content @@ -292,151 +351,121 @@ def _check_mappings(self, doc_type, body): def field_property(field_name, field_value): """ - Prepares field as property syntax for providing correct mapping desired for field + Build fields as ES syntax. + + Prepares field as property syntax for providing correct + mapping desired for field. Mappings format in elasticsearch is as follows: { - "doc_type": { - "properties": { - "nested_property": { + "properties": { + "nested_property": { "properties": { - "an_analysed_property": { - "type": "string" - }, - "another_analysed_property": { - "type": "string" - } + "an_analysed_property": { + "type": "string" + }, + "another_analysed_property": { + "type": "string" + } } - }, - "a_not_analysed_property": { - "type": "string", - "index": "not_analyzed" - }, - "a_date_property": { + }, + "a_not_analysed_property": { + "type": "string" + }, + "a_date_property": { "type": "date" - } - } - } + } + } } We can only add new ones, but the format is the same """ - prop_val = None + prop_val = {"type": "keyword"} if field_name in field_properties: prop_val = field_properties[field_name] elif isinstance(field_value, dict): props = {fn: field_property(fn, field_value[fn]) for fn in field_value} prop_val = {"properties": props} - else: - prop_val = { - "type": "string", - "index": "not_analyzed", - } return prop_val new_properties = { field: field_property(field, value) for field, value in body.items() - if (field not in exclude_fields) and (field not in self._get_mappings(doc_type).get('properties', {})) + if (field not in exclude_fields) and (field not in self.mappings.get("properties", {})) } if new_properties: self._es.indices.put_mapping( index=self.index_name, - doc_type=doc_type, - body={ - doc_type: { - "properties": new_properties, - } - } + body={"properties": new_properties} ) - self._clear_mapping(doc_type) + self._clear_mapping() - def index(self, doc_type, sources, **kwargs): + def index(self, sources, **kwargs): """ - Implements call to add documents to the ES index - Note the call to _check_mappings which will setup fields with the desired mappings + Implements call to add documents to the ES index. + + Note the call to _check_mappings which will setup fields with + the desired mappings. """ try: actions = [] for source in sources: - self._check_mappings(doc_type, source) - id_ = source['id'] if 'id' in source else None - log.debug("indexing %s object with id %s", doc_type, id_) # lint-amnesty, pylint: disable=unicode-format-string + self._check_mappings(source) + id_ = source.get("id") + log.debug("indexing object with id %s", id_) action = { "_index": self.index_name, - "_type": doc_type, "_id": id_, "_source": source } actions.append(action) # bulk() returns a tuple with summary information - # number of successfully executed actions and number of errors if stats_only is set to True. - _, indexing_errors = bulk( - self._es, - actions, - **kwargs - ) + # number of successfully executed actions and number of errors + # if stats_only is set to True. + _, indexing_errors = bulk(self._es, actions, **kwargs) if indexing_errors: ElasticSearchEngine.log_indexing_error(indexing_errors) # Broad exception handler to protect around bulk call - except Exception as ex: - # log information and re-raise - log.exception("error while indexing - %s", str(ex)) # lint-amnesty, pylint: disable=unicode-format-string + except exceptions.ElasticsearchException as ex: + log.exception("Error during ES bulk operation.") raise - def remove(self, doc_type, doc_ids, **kwargs): - """ Implements call to remove the documents from the index """ + def remove(self, doc_ids, **kwargs): + """ + Implements call to remove the documents from the index + """ try: - # ignore is flagged as an unexpected-keyword-arg; ES python client documents that it can be used - # pylint: disable=unexpected-keyword-arg actions = [] for doc_id in doc_ids: - log.debug("Removing document of type %s and index %s", doc_type, doc_id) # lint-amnesty, pylint: disable=unicode-format-string + log.debug("Removing document with id %s", doc_id) action = { - '_op_type': 'delete', + "_op_type": "delete", "_index": self.index_name, - "_type": doc_type, "_id": doc_id } actions.append(action) bulk(self._es, actions, **kwargs) except BulkIndexError as ex: - valid_errors = [error for error in ex.errors if error['delete']['status'] != 404] + valid_errors = [error for error in ex.errors if error["delete"]["status"] != 404] if valid_errors: - log.exception("An error occurred while removing documents from the index.") + log.exception("An error occurred while removing documents from the index: %r", valid_errors) raise - # A few disabled pylint violations here: - # This procedure takes each of the possible input parameters and builds the query with each argument - # I tried doing this in separate steps, but IMO it makes it more difficult to follow instead of less - # So, reasoning: - # - # too-many-arguments: We have all these different parameters to which we - # wish to pay attention, it makes more sense to have them listed here - # instead of burying them within kwargs - # - # too-many-locals: I think this counts all the arguments as well, but - # there are some local variables used herein that are there for transient - # purposes and actually promote the ease of understanding - # - # too-many-branches: There's a lot of logic on the 'if I have this - # optional argument then...'. Reasoning goes back to its easier to read - # the (somewhat linear) flow rather than to jump up to other locations in code def search(self, query_string=None, field_dictionary=None, filter_dictionary=None, exclude_dictionary=None, - facet_terms=None, + aggregation_terms=None, exclude_ids=None, use_field_match=False, - **kwargs): # pylint: disable=too-many-arguments, too-many-locals, too-many-branches, arguments-differ, unicode-format-string - """ # lint-amnesty, pylint: disable=unicode-format-string + **kwargs): # pylint: disable=arguments-differ, unused-argument + """ Implements call to search the index for the desired content. Args: @@ -456,10 +485,10 @@ def search(self, documents which have any of these fields and for which the value matches one of the specified values shall be filtered out of the result set - facet_terms (dict): dictionary of terms to include within search - facets list - key is the term desired to facet upon, and the value is a + aggregation_terms (dict): dictionary of terms to include within search + aggregation list - key is the term desired to aggregate upon, and the value is a dictionary of extended information to include. Supported right now is a - size specification for a cap upon how many facet results to return (can + size specification for a cap upon how many aggregation results to return (can be an empty dictionary to use default size for underlying engine): e.g. @@ -468,7 +497,7 @@ def search(self, "modes": {} } - use_field_match (bool): flag to indicate whether to use elastic + (deprecated) use_field_match (bool): flag to indicate whether to use elastic filtering or elastic matching for field matches - this is nothing but a potential performance tune for certain queries @@ -478,40 +507,48 @@ def search(self, Returns: dict object with results in the desired format { + "_shards": {"failed": 0, "skipped": 0, "successful": 1, "total": 1}, "took": 3, - "total": 4, - "max_score": 2.0123, - "results": [ - { - "score": 2.0123, - "data": { - ... - } - }, - { - "score": 0.0983, - "data": { - ... - } - } - ], - "facets": { + "timed_out": False, + "hits": { + "hits": [ + {"_id": "FAKE_ID_1", + "_index": "test_index", + "_type": "_doc", + "data": {"id": "FAKE_ID_1", + "org": "edX", + "subject": "mathematics"}, + "score": 1.0}, + {"_id": "FAKE_ID_2", + "_index": "test_index", + "_type": "_doc", + "data": {"id": "FAKE_ID_2", + "org": "MIT", + "subject": "mathematics"}, + "score": 1.0}, + ], + "max_score": 1.0, + "total": {"relation": "eq", "value": 7} + }, + "aggregation": { "org": { - "total": total_count, - "other": 1, - "terms": { - "MITx": 25, - "HarvardX": 18 - } + "buckets": [ + {"doc_count": 4, "key": "Harvard"}, + {"doc_count": 2, "key": "MIT"} + ], + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 1 }, - "modes": { - "total": modes_count, - "other": 15, - "terms": { - "honor": 58, - "verified": 44, - } - } + "subject": { + "buckets": [ + {"doc_count": 3, "key": "mathematics"}, + {"doc_count": 2, "key": "physics"} + ], + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 1 + }, + "total_org_docs": {"value": 6.0}, + "total_subject_docs": {"value": 5.0}}, } } @@ -530,14 +567,18 @@ def search(self, ) """ - log.debug("searching index with %s", query_string) # lint-amnesty, pylint: disable=unicode-format-string + log.debug("searching index with %s", query_string) elastic_queries = [] elastic_filters = [] - # We have a query string, search all fields for matching text within the "content" node + # We have a query string, search all fields for matching text + # within the "content" node if query_string: - query_string = query_string.translate(query_string.maketrans('', '', RESERVED_CHARACTERS)) + query_string = query_string.translate( + query_string.maketrans("", "", RESERVED_CHARACTERS) + ) + elastic_queries.append({ "query_string": { "fields": ["content.*"], @@ -546,10 +587,8 @@ def search(self, }) if field_dictionary: - if use_field_match: - elastic_queries.extend(_process_field_queries(field_dictionary)) - else: - elastic_filters.extend(_process_field_filters(field_dictionary)) + # strict match of transferred fields + elastic_queries.extend(_process_field_queries(field_dictionary)) if filter_dictionary: elastic_filters.extend(_process_filters(filter_dictionary)) @@ -562,12 +601,10 @@ def search(self, exclude_dictionary["_id"] = [] exclude_dictionary["_id"].extend(exclude_ids) - if exclude_dictionary: - elastic_filters.append(_process_exclude_dictionary(exclude_dictionary)) - query_segment = { "match_all": {} } + if elastic_queries: query_segment = { "bool": { @@ -579,35 +616,35 @@ def search(self, if elastic_filters: filter_segment = { "bool": { - "must": elastic_filters + "should": elastic_filters } } query = { - "filtered": { - "query": query_segment, + "bool": { + "must": query_segment, "filter": filter_segment, } } + if exclude_dictionary: + excluded_fields = list(_process_exclude_dictionary(exclude_dictionary)) + if query.get("bool"): + query["bool"]["must_not"] = excluded_fields + else: + query = { + "bool": { + "must_not": excluded_fields + } + } + body = {"query": query} - if facet_terms: - facet_query = _process_facet_terms(facet_terms) - if facet_query: - body["facets"] = facet_query + if aggregation_terms: + body["aggs"] = _process_aggregation_terms(aggregation_terms) try: - es_response = self._es.search( - index=self.index_name, - body=body, - **kwargs - ) + es_response = self._es.search(index=self.index_name, body=body, **kwargs) except exceptions.ElasticsearchException as ex: - message = str(ex) - if 'QueryParsingException' in message: - log.exception("Malformed search query: %s", message) # lint-amnesty, pylint: disable=unicode-format-string - raise QueryParseError('Malformed search query.') - # log information and re-raise - log.exception("error while searching index - %s", str(message)) # lint-amnesty, pylint: disable=unicode-format-string + log.exception("error while searching index - %r", ex) raise return _translate_hits(es_response) diff --git a/search/search_engine_base.py b/search/search_engine_base.py index 87b337ec..a1f34985 100644 --- a/search/search_engine_base.py +++ b/search/search_engine_base.py @@ -7,8 +7,9 @@ class SearchEngine: - - """ Base abstract SearchEngine object """ + """ + Base abstract SearchEngine object. + """ index_name = "courseware" @@ -16,12 +17,16 @@ def __init__(self, index=None): if index: self.index_name = index - def index(self, doc_type, sources, **kwargs): - """ This operation is called to add documents of given type to the search index """ + def index(self, sources, **kwargs): + """ + Add documents to the search index. + """ raise NotImplementedError - def remove(self, doc_type, doc_ids, **kwargs): - """ This operation is called to remove documents of given type from the search index """ + def remove(self, doc_ids, **kwargs): + """ + Remove documents by ids from the search index. + """ raise NotImplementedError def search(self, @@ -29,23 +34,29 @@ def search(self, field_dictionary=None, filter_dictionary=None, exclude_dictionary=None, - facet_terms=None, + aggregation_terms=None, **kwargs): # pylint: disable=too-many-arguments - """ This operation is called to search for matching documents within the search index """ + """ + Search for matching documents within the search index. + """ raise NotImplementedError def search_string(self, query_string, **kwargs): - """ Helper function when primary search is for a query string """ + """ + Helper function when primary search is for a query string. + """ return self.search(query_string=query_string, **kwargs) def search_fields(self, field_dictionary, **kwargs): - """ Helper function when primary search is for a set of matching fields """ + """ + Helper function when primary search is for a set of matching fields. + """ return self.search(field_dictionary=field_dictionary, **kwargs) @staticmethod def get_search_engine(index=None): """ - Returns the desired implementor (defined in settings) + Returns the desired implementor (defined in settings). """ search_engine_class = _load_class(getattr(settings, "SEARCH_ENGINE", None), None) return search_engine_class(index=index) if search_engine_class else None diff --git a/search/tests/mock_search_engine.py b/search/tests/mock_search_engine.py index ab35430f..1711563b 100644 --- a/search/tests/mock_search_engine.py +++ b/search/tests/mock_search_engine.py @@ -1,6 +1,7 @@ """ Implementation of search interface to be used for tests where ElasticSearch is unavailable """ import copy +from collections import defaultdict from datetime import datetime import json import os @@ -149,15 +150,15 @@ def _process_exclude_dictionary(documents_to_search, exclude_dictionary): return documents_to_search -def _count_facet_values(documents, facet_terms): +def _count_aggregated_values(documents, aggregation_terms): """ - Calculate the counts for the facets provided: + Calculate the counts for the aggregations provided: - For each facet, count up the number of hits for each facet value, so + For each aggregation, count up the number of hits for each aggregation value, so that we can report back the breakdown of how many of each value there - exist. Notice that the total is the total number of facet matches that + exist. Notice that the total is the total number of aggregation matches that we receive - a single document will get counted multiple times in the - total if the facet field is multi-valued: + total if the aggregation field is multi-valued: e.g. a course may have a value for modes as ["honor", "validated"], and so the document will count as 1 towards the honor count, 1 towards the @@ -165,38 +166,42 @@ def _count_facet_values(documents, facet_terms): surprising but matches the behaviour that elasticsearch presents) """ - facets = {} - - def process_facet(facet): - """ Find the values for this facet """ - faceted_documents = [facet_document for facet_document in documents if facet in facet_document] - terms = {} - - def add_facet_value(facet_value): - """ adds the discovered value to the counts for the selected facet """ - if isinstance(facet_value, list): - for individual_value in facet_value: - add_facet_value(individual_value) + aggregations = {} + + def process_aggregation(aggregate): + """ + Find the values for this aggregation + """ + aggregated_documents = [ + agg_document for agg_document in documents if aggregate in agg_document + ] + terms = defaultdict(int) + + def add_agg_value(agg_value): + """ + Adds the discovered value to the counts for the selected aggregation. + """ + if isinstance(agg_value, list): + for individual_value in agg_value: + add_agg_value(individual_value) else: - if facet_value not in terms: - terms[facet_value] = 0 - terms[facet_value] += 1 + terms[agg_value] += 1 - for document in faceted_documents: - add_facet_value(document[facet]) + for document in aggregated_documents: + add_agg_value(document[aggregate]) total = sum([terms[term] for term in terms]) return total, terms - for facet in facet_terms: - total, terms = process_facet(facet) - facets[facet] = { + for agg in aggregation_terms: + total, terms = process_aggregation(agg) + aggregations[agg] = { "total": total, "terms": terms, } - return facets + return aggregations class MockSearchEngine(SearchEngine): @@ -281,35 +286,26 @@ def load_index(cls, index_name): """ load the index, if necessary from the backed file """ cls._load_from_file() if index_name not in cls._mock_elastic: - cls._mock_elastic[index_name] = {} + cls._mock_elastic[index_name] = [] cls._write_to_file() return cls._mock_elastic[index_name] @classmethod - def load_doc_type(cls, index_name, doc_type): - """ load the documents of type doc_type, if necessary loading from the backed file """ - index = cls.load_index(index_name) - if doc_type not in index: - index[doc_type] = [] - cls._write_to_file() - - return index[doc_type] - - @classmethod - def add_documents(cls, index_name, doc_type, sources): - """ add documents of specific type to index """ - cls.load_doc_type(index_name, doc_type).extend(sources) + def add_documents(cls, index_name, sources): + """ + Add documents to index. + """ + cls.load_index(index_name).extend(sources) cls._write_to_file() @classmethod - def remove_documents(cls, index_name, doc_type, doc_ids): - """ remove documents by id of specific type to index """ + def remove_documents(cls, index_name, doc_ids): + """ + Remove documents by id from index. + """ index = cls.load_index(index_name) - if doc_type not in index: - return - - index[doc_type] = [d for d in index[doc_type] if "id" not in d or d["id"] not in doc_ids] + cls._mock_elastic[index_name] = [d for d in index if "id" not in d or d["id"] not in doc_ids] cls._write_to_file() @classmethod @@ -322,26 +318,32 @@ def __init__(self, index=None): super(MockSearchEngine, self).__init__(index) MockSearchEngine.load_index(self.index_name) - def index(self, doc_type, sources): # pylint: disable=arguments-differ - """ Add/update documents of given type to the index """ + def index(self, sources): # pylint: disable=arguments-differ + """ + Add/update documents to the index. + """ if not MockSearchEngine._disabled: doc_ids = [s["id"] for s in sources if "id" in s] - MockSearchEngine.remove_documents(self.index_name, doc_type, doc_ids) - MockSearchEngine.add_documents(self.index_name, doc_type, sources) + MockSearchEngine.remove_documents(self.index_name, doc_ids) + MockSearchEngine.add_documents(self.index_name, sources) - def remove(self, doc_type, doc_ids): # pylint: disable=arguments-differ - """ Remove documents of type with given ids from the index """ + def remove(self, doc_ids): # pylint: disable=arguments-differ + """ + Remove documents with given ids from the index. + """ if not MockSearchEngine._disabled: - MockSearchEngine.remove_documents(self.index_name, doc_type, doc_ids) + MockSearchEngine.remove_documents(self.index_name, doc_ids) def search(self, query_string=None, field_dictionary=None, filter_dictionary=None, exclude_dictionary=None, - facet_terms=None, + aggregation_terms=None, **kwargs): # pylint: disable=too-many-arguments - """ Perform search upon documents within index """ + """ + Perform search upon documents within index. + """ if MockSearchEngine._disabled: return { "took": 10, @@ -351,12 +353,8 @@ def search(self, } documents_to_search = [] - if "doc_type" in kwargs: - documents_to_search = MockSearchEngine.load_doc_type(self.index_name, kwargs["doc_type"]) - else: - index = MockSearchEngine.load_index(self.index_name) - for doc_type in index: - documents_to_search.extend(index[doc_type]) + index = MockSearchEngine.load_index(self.index_name) + documents_to_search.extend(index) if field_dictionary: documents_to_search = _filter_intersection(documents_to_search, field_dictionary) @@ -414,7 +412,7 @@ def score_documents(documents_to_search): "results": results } - if facet_terms: - response["facets"] = _count_facet_values(documents_to_search, facet_terms) + if aggregation_terms: + response["aggs"] = _count_aggregated_values(documents_to_search, aggregation_terms) return response diff --git a/search/tests/test_course_discovery.py b/search/tests/test_course_discovery.py index 2a64c63f..c87ac931 100644 --- a/search/tests/test_course_discovery.py +++ b/search/tests/test_course_discovery.py @@ -16,7 +16,7 @@ from elasticsearch import Elasticsearch from search.api import course_discovery_search, NoSearchEngineError -from search.elastic import ElasticSearchEngine, QueryParseError +from search.elastic import ElasticSearchEngine from search.tests.utils import SearcherMixin, TEST_INDEX_NAME from .mock_search_engine import MockSearchEngine @@ -67,7 +67,7 @@ def reset_count(cls): @staticmethod def index(searcher, course_info): """ Adds course info dictionary to the index """ - searcher.index(doc_type="course_info", sources=course_info) + searcher.index(sources=course_info) @classmethod def get_and_index(cls, searcher, update_dict=None, remove_fields=None): @@ -82,7 +82,8 @@ def get_and_index(cls, searcher, update_dict=None, remove_fields=None): "enrollment_end": {"type": "date"} }) @override_settings(MOCK_SEARCH_BACKING_FILE=None) -@override_settings(COURSEWARE_INDEX_NAME=TEST_INDEX_NAME) +@override_settings(COURSEWARE_CONTENT_INDEX_NAME=TEST_INDEX_NAME) +@override_settings(COURSEWARE_INFO_INDEX_NAME=TEST_INDEX_NAME) # Any class that inherits from TestCase will cause too-many-public-methods pylint error class TestMockCourseDiscoverySearch(TestCase, SearcherMixin): # pylint: disable=too-many-public-methods """ @@ -226,8 +227,13 @@ def test_multivalue_field_matching(self): results = course_discovery_search(field_dictionary={"modes": "verified"}) self.assertEqual(results["total"], 1) - def test_faceting(self): - """ Test that facet results are incorporated (org and modes are default facets) """ + def test_aggregating(self): + """ + Tests bucket aggregation results. + + Test that aggregation results are incorporated (org and modes are + default aggregations). + """ DemoCourse.get_and_index(self.searcher, {"org": "OrgA", "modes": ["honor", "verified"]}) DemoCourse.get_and_index(self.searcher, {"org": "OrgA", "modes": ["honor"]}) DemoCourse.get_and_index(self.searcher, {"org": "OrgB", "modes": ["honor"]}) @@ -236,22 +242,25 @@ def test_faceting(self): results = course_discovery_search() self.assertEqual(results["total"], 5) - self.assertIn("facets", results) + self.assertIn("aggs", results) - self.assertIn("org", results["facets"]) - self.assertEqual(results["facets"]["org"]["total"], 4) - self.assertEqual(results["facets"]["org"]["terms"]["OrgA"], 2) - self.assertEqual(results["facets"]["org"]["terms"]["OrgB"], 2) + self.assertIn("org", results["aggs"]) + self.assertEqual(results["aggs"]["org"]["total"], 4) + self.assertEqual(results["aggs"]["org"]["terms"]["OrgA"], 2) + self.assertEqual(results["aggs"]["org"]["terms"]["OrgB"], 2) - self.assertIn("modes", results["facets"]) - self.assertEqual(results["facets"]["modes"]["total"], 6) - self.assertEqual(results["facets"]["modes"]["terms"]["honor"], 3) - self.assertEqual(results["facets"]["modes"]["terms"]["verified"], 2) - self.assertEqual(results["facets"]["modes"]["terms"]["other"], 1) + self.assertIn("modes", results["aggs"]) + self.assertEqual(results["aggs"]["modes"]["total"], 6) + self.assertEqual(results["aggs"]["modes"]["terms"]["honor"], 3) + self.assertEqual(results["aggs"]["modes"]["terms"]["verified"], 2) + self.assertEqual(results["aggs"]["modes"]["terms"]["other"], 1) @override_settings(COURSE_DISCOVERY_FILTERS=["test_name", "modes"]) - def test_faceting_filters(self): - """ Test that facet under consideration can be specified by virtue of filters being overriden """ + def test_aggregating_filters(self): + """ + Test that aggregation under consideration can be specified + by virtue of filters being overriden + """ DemoCourse.get_and_index(self.searcher, {"test_name": "Test Name 1", "modes": ["honor", "verified"]}) DemoCourse.get_and_index(self.searcher, {"test_name": "Test Name 1", "modes": ["honor"]}) DemoCourse.get_and_index(self.searcher, {"test_name": "Test Name 2", "modes": ["honor"]}) @@ -260,24 +269,27 @@ def test_faceting_filters(self): results = course_discovery_search() self.assertEqual(results["total"], 5) - self.assertIn("facets", results) + self.assertIn("aggs", results) - self.assertNotIn("org", results["facets"]) + self.assertNotIn("org", results["aggs"]) - self.assertIn("modes", results["facets"]) - self.assertEqual(results["facets"]["modes"]["total"], 6) - self.assertEqual(results["facets"]["modes"]["terms"]["honor"], 3) - self.assertEqual(results["facets"]["modes"]["terms"]["verified"], 2) - self.assertEqual(results["facets"]["modes"]["terms"]["other"], 1) + self.assertIn("modes", results["aggs"]) + self.assertEqual(results["aggs"]["modes"]["total"], 6) + self.assertEqual(results["aggs"]["modes"]["terms"]["honor"], 3) + self.assertEqual(results["aggs"]["modes"]["terms"]["verified"], 2) + self.assertEqual(results["aggs"]["modes"]["terms"]["other"], 1) - self.assertIn("test_name", results["facets"]) - self.assertEqual(results["facets"]["test_name"]["total"], 4) - self.assertEqual(results["facets"]["test_name"]["terms"]["Test Name 1"], 2) - self.assertEqual(results["facets"]["test_name"]["terms"]["Test Name 2"], 2) + self.assertIn("test_name", results["aggs"]) + self.assertEqual(results["aggs"]["test_name"]["total"], 4) + self.assertEqual(results["aggs"]["test_name"]["terms"]["Test Name 1"], 2) + self.assertEqual(results["aggs"]["test_name"]["terms"]["Test Name 2"], 2) - @override_settings(COURSE_DISCOVERY_FACETS={"subject": {}, "lang": {}}) - def test_faceting_override(self): - """ Test that facet under consideration can be specified with custom setting """ + @override_settings(COURSE_DISCOVERY_AGGREGATIONS={"subject": {}, "lang": {}}) + def test_aggregating_override(self): + """ + Test that aggregation under consideration can be specified + with custom setting + """ DemoCourse.get_and_index(self.searcher, {"subject": "Mathematics", "lang": ["en", "fr"]}) DemoCourse.get_and_index(self.searcher, {"subject": "Mathematics", "lang": ["en"]}) DemoCourse.get_and_index(self.searcher, {"subject": "History", "lang": ["en"]}) @@ -286,21 +298,21 @@ def test_faceting_override(self): results = course_discovery_search() self.assertEqual(results["total"], 5) - self.assertIn("facets", results) + self.assertIn("aggs", results) - self.assertNotIn("org", results["facets"]) - self.assertNotIn("modes", results["facets"]) + self.assertNotIn("org", results["aggs"]) + self.assertNotIn("modes", results["aggs"]) - self.assertIn("subject", results["facets"]) - self.assertEqual(results["facets"]["subject"]["total"], 4) - self.assertEqual(results["facets"]["subject"]["terms"]["Mathematics"], 2) - self.assertEqual(results["facets"]["subject"]["terms"]["History"], 2) + self.assertIn("subject", results["aggs"]) + self.assertEqual(results["aggs"]["subject"]["total"], 4) + self.assertEqual(results["aggs"]["subject"]["terms"]["Mathematics"], 2) + self.assertEqual(results["aggs"]["subject"]["terms"]["History"], 2) - self.assertIn("lang", results["facets"]) - self.assertEqual(results["facets"]["lang"]["total"], 6) - self.assertEqual(results["facets"]["lang"]["terms"]["en"], 3) - self.assertEqual(results["facets"]["lang"]["terms"]["fr"], 2) - self.assertEqual(results["facets"]["lang"]["terms"]["de"], 1) + self.assertIn("lang", results["aggs"]) + self.assertEqual(results["aggs"]["lang"]["total"], 6) + self.assertEqual(results["aggs"]["lang"]["terms"]["en"], 3) + self.assertEqual(results["aggs"]["lang"]["terms"]["fr"], 2) + self.assertEqual(results["aggs"]["lang"]["terms"]["de"], 1) @override_settings(SEARCH_ENGINE="search.tests.utils.ForceRefreshElasticSearchEngine") @@ -308,10 +320,6 @@ def test_faceting_override(self): class TestElasticCourseDiscoverySearch(TestMockCourseDiscoverySearch): """ version of tests that use Elastic Backed index instead of mock """ - def setUp(self): - super(TestElasticCourseDiscoverySearch, self).setUp() - self.searcher.index("doc_type_that_is_meaninless_to_bootstrap_index", [{"test_doc_type": "bootstrap"}]) - def test_course_matching_empty_index(self): """ Check for empty result count before indexing """ results = course_discovery_search("defensive") @@ -358,12 +366,6 @@ def test_course_matching(self, term, result_count): results = course_discovery_search(term) self.assertEqual(results["total"], result_count) - def test_malformed_query_handling(self): - """Make sure that mismatched quotes produce a specific exception. """ - - with self.assertRaises(QueryParseError): - course_discovery_search('"missing quote') - @override_settings(SEARCH_ENGINE=None) class TestNone(TestCase): diff --git a/search/tests/test_course_discovery_views.py b/search/tests/test_course_discovery_views.py index 1458961a..b773b7cd 100644 --- a/search/tests/test_course_discovery_views.py +++ b/search/tests/test_course_discovery_views.py @@ -17,7 +17,8 @@ "enrollment_end": {"type": "date"} }) @override_settings(SEARCH_ENGINE="search.tests.mock_search_engine.MockSearchEngine") -@override_settings(COURSEWARE_INDEX_NAME=TEST_INDEX_NAME) +@override_settings(COURSEWARE_CONTENT_INDEX_NAME=TEST_INDEX_NAME) +@override_settings(COURSEWARE_INFO_INDEX_NAME=TEST_INDEX_NAME) class DiscoveryUrlTest(MockSearchUrlTest): """ Make sure that requests to the url get routed to the correct view handler diff --git a/search/tests/test_engines.py b/search/tests/test_engines.py index a461ee38..9bad1373 100644 --- a/search/tests/test_engines.py +++ b/search/tests/test_engines.py @@ -29,7 +29,7 @@ class ElasticSearchTests(MockSearchTests): def test_reserved_characters(self): """ Make sure that we handle when reserved characters were passed into query_string """ test_string = "What the ! is this?" - self.searcher.index("test_doc", [{"content": {"name": test_string}}]) + self.searcher.index([{"content": {"name": test_string}}]) response = self.searcher.search_string(test_string) self.assertEqual(response["total"], 1) @@ -45,39 +45,38 @@ def test_reserved_characters(self): response = self.searcher.search_string(char) self.assertEqual(response["total"], 0) - def test_facet_options(self): + def test_aggregation_options(self): """ - Test that facet options work alongside facets - notice unsupported in mock for now - size - is the only option for now + Test that aggregate options work alongside aggregations - notice + unsupported in mock for now size - is the only option for now """ - self._index_for_facets() + self._index_for_aggs() response = self.searcher.search() self.assertEqual(response["total"], 7) - self.assertNotIn("facets", response) + self.assertNotIn("aggs", response) - facet_terms = { + aggregation_terms = { "subject": {"size": 2}, "org": {"size": 2} } - response = self.searcher.search(facet_terms=facet_terms) + response = self.searcher.search(aggregation_terms=aggregation_terms) self.assertEqual(response["total"], 7) - self.assertIn("facets", response) - facet_results = response["facets"] - - self.assertEqual(facet_results["subject"]["total"], 6) - subject_term_counts = facet_results["subject"]["terms"] + self.assertIn("aggs", response) + aggregation_results = response["aggs"] + self.assertEqual(aggregation_results["subject"]["total"], 6) + subject_term_counts = aggregation_results["subject"]["terms"] self.assertEqual(subject_term_counts["mathematics"], 3) self.assertEqual(subject_term_counts["physics"], 2) self.assertNotIn("history", subject_term_counts) - self.assertEqual(facet_results["subject"]["other"], 1) + self.assertEqual(aggregation_results["subject"]["other"], 1) - self.assertEqual(facet_results["org"]["total"], 7) - org_term_counts = facet_results["org"]["terms"] + self.assertEqual(aggregation_results["org"]["total"], 7) + org_term_counts = aggregation_results["org"]["terms"] self.assertEqual(org_term_counts["Harvard"], 4) self.assertEqual(org_term_counts["MIT"], 2) self.assertNotIn("edX", org_term_counts) - self.assertEqual(facet_results["org"]["other"], 1) + self.assertEqual(aggregation_results["org"]["other"], 1) @override_settings(MOCK_SEARCH_BACKING_FILE="./testfile.pkl") @@ -99,17 +98,17 @@ def test_file_value_formats(self): # json serialization removes microseconds part of the datetime object, so # we strip it at the beginning to allow equality comparison to be correct this_moment = datetime.utcnow().replace(microsecond=0) - test_object = { - "content": { - "name": "How did 11 of 12 balls get deflated during the game" + self.searcher.index([ + { + "content": { + "name": "How did 11 of 12 balls get deflated during the game" + }, + "my_date_value": this_moment, + "my_integer_value": 172, + "my_float_value": 57.654, + "my_string_value": "If the officials just blew it, would they come out and admit it?" }, - "my_date_value": this_moment, - "my_integer_value": 172, - "my_float_value": 57.654, - "my_string_value": "If the officials just blew it, would they come out and admit it?" - } - - self.searcher.index("test_doc", [test_object]) + ]) # now search should be successful response = self.searcher.search(query_string="deflated") @@ -117,7 +116,10 @@ def test_file_value_formats(self): # and values should be what we desire returned_result = response["results"][0]["data"] - self.assertEqual(json_date_to_datetime(returned_result["my_date_value"]), this_moment) + self.assertEqual( + json_date_to_datetime(returned_result["my_date_value"]), + this_moment + ) self.assertEqual(returned_result["my_integer_value"], 172) self.assertEqual(returned_result["my_float_value"], 57.654) self.assertEqual( @@ -130,19 +132,19 @@ def test_disabled_index(self): Make sure that searchengine operations are shut down when mock engine has a filename, but file does not exist - this is helpful for test scenarios where we essentially want to not slow anything down """ - this_moment = datetime.utcnow() - test_object = { - "id": "FAKE_ID", - "content": { - "name": "How did 11 of 12 balls get deflated during the game" + this_moment = datetime.utcnow().replace(microsecond=0) + self.searcher.index([ + { + "id": "FAKE_ID", + "content": { + "name": "How did 11 of 12 balls get deflated during the game" + }, + "my_date_value": this_moment, + "my_integer_value": 172, + "my_float_value": 57.654, + "my_string_value": "If the officials just blew it, would they come out and admit it?" }, - "my_date_value": this_moment, - "my_integer_value": 172, - "my_float_value": 57.654, - "my_string_value": "If the officials just blew it, would they come out and admit it?" - } - - self.searcher.index("test_doc", [test_object]) + ]) response = self.searcher.search(query_string="deflated") self.assertEqual(response["total"], 1) @@ -155,13 +157,13 @@ def test_disabled_index(self): response = self.searcher.search(query_string="ABC") self.assertEqual(response["total"], 0) - self.searcher.index("test_doc", [{"content": {"name": "ABC"}}]) + self.searcher.index([{"content": {"name": "ABC"}}]) # now search should be unsuccessful because file does not exist response = self.searcher.search(query_string="ABC") self.assertEqual(response["total"], 0) # remove it, and then we'll reload file and it still should be there - self.searcher.remove("test_doc", ["FAKE_ID"]) + self.searcher.remove(["FAKE_ID"]) MockSearchEngine.create_test_file("fakefile.pkl", initial_file_content) @@ -169,11 +171,7 @@ def test_disabled_index(self): response = self.searcher.search(query_string="deflated") self.assertEqual(response["total"], 1) - self.searcher.remove("not_a_test_doc", ["FAKE_ID"]) - response = self.searcher.search(query_string="deflated") - self.assertEqual(response["total"], 1) - - self.searcher.remove("test_doc", ["FAKE_ID"]) + self.searcher.remove(["FAKE_ID"]) response = self.searcher.search(query_string="deflated") self.assertEqual(response["total"], 0) @@ -187,13 +185,13 @@ def test_index_failure_bulk(self): """ the index operation should fail """ with patch('search.elastic.bulk', return_value=[0, [exceptions.ElasticsearchException()]]): with self.assertRaises(exceptions.ElasticsearchException): - self.searcher.index("test_doc", [{"name": "abc test"}]) + self.searcher.index([{"name": "abc test"}]) def test_index_failure_general(self): """ the index operation should fail """ with patch('search.elastic.bulk', side_effect=Exception()): with self.assertRaises(Exception): - self.searcher.index("test_doc", [{"name": "abc test"}]) + self.searcher.index([{"name": "abc test"}]) def test_search_failure(self): """ the search operation should fail """ @@ -203,19 +201,18 @@ def test_search_failure(self): def test_remove_failure_bulk(self): """ the remove operation should fail """ doc_id = 'test_id' - doc_type = 'test_doc' error = {'delete': { - 'status': 500, '_type': doc_type, '_index': 'test_index', '_version': 1, 'found': True, '_id': doc_id + 'status': 500, '_index': 'test_index', '_version': 1, 'found': True, '_id': doc_id }} with patch('search.elastic.bulk', side_effect=BulkIndexError('Simulated error', [error])): with self.assertRaises(BulkIndexError): - self.searcher.remove("test_doc", ["test_id"]) + self.searcher.remove(["test_id"]) def test_remove_failure_general(self): """ the remove operation should fail """ with patch('search.elastic.bulk', side_effect=Exception()): with self.assertRaises(Exception): - self.searcher.remove("test_doc", ["test_id"]) + self.searcher.remove(["test_id"]) @override_settings(SEARCH_ENGINE=None) diff --git a/search/tests/test_mock_search_engine.py b/search/tests/test_mock_search_engine.py index 7ce34721..896a9558 100644 --- a/search/tests/test_mock_search_engine.py +++ b/search/tests/test_mock_search_engine.py @@ -95,10 +95,14 @@ def test_timezone_conversion(self): # first where index has no timezones, and query has low_date = datetime(2010, 1, 1) high_date = datetime(2100, 1, 1) - self.searcher.index("test_doc", [{"id": "FAKE_ID_1", "test_value": "1", "start_date": low_date}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_2", "test_value": "2", "start_date": high_date}]) + self.searcher.index([ + {"id": "FAKE_ID_1", "test_value": "1", "start_date": low_date}, + {"id": "FAKE_ID_2", "test_value": "2", "start_date": high_date}, + ]) - response = self.searcher.search(field_dictionary={"start_date": low_date.replace(tzinfo=pytz.UTC)}) + response = self.searcher.search( + field_dictionary={"start_date": low_date.replace(tzinfo=pytz.UTC)} + ) self.assertEqual(response["total"], 1) response = self.searcher.search( @@ -112,12 +116,10 @@ def test_timezone_conversion(self): self.assertEqual(response["total"], 1) self.searcher.destroy() - self.searcher.index( - "test_doc", [{"id": "FAKE_ID_1", "test_value": "1", "start_date": low_date.replace(tzinfo=pytz.UTC)}] - ) - self.searcher.index( - "test_doc", [{"id": "FAKE_ID_2", "test_value": "2", "start_date": high_date.replace(tzinfo=pytz.UTC)}] - ) + self.searcher.index([ + {"id": "FAKE_ID_1", "test_value": "1", "start_date": low_date.replace(tzinfo=pytz.UTC)}, + {"id": "FAKE_ID_2", "test_value": "2", "start_date": high_date.replace(tzinfo=pytz.UTC)}, + ]) response = self.searcher.search(field_dictionary={"start_date": low_date}) self.assertEqual(response["total"], 1) diff --git a/search/tests/test_views.py b/search/tests/test_views.py index ce0b4cf6..27ce8be2 100644 --- a/search/tests/test_views.py +++ b/search/tests/test_views.py @@ -11,14 +11,15 @@ from search.search_engine_base import SearchEngine from search.tests.mock_search_engine import MockSearchEngine from search.tests.tests import TEST_INDEX_NAME -from search.tests.utils import post_discovery_request, post_request, SearcherMixin +from search.tests.utils import post_request, SearcherMixin # Any class that inherits from TestCase will cause too-many-public-methods pylint error # pylint: disable=too-many-public-methods @override_settings(SEARCH_ENGINE="search.tests.mock_search_engine.MockSearchEngine") @override_settings(ELASTIC_FIELD_MAPPINGS={"start_date": {"type": "date"}}) -@override_settings(COURSEWARE_INDEX_NAME=TEST_INDEX_NAME) +@override_settings(COURSEWARE_CONTENT_INDEX_NAME=TEST_INDEX_NAME) +@override_settings(COURSEWARE_INFO_INDEX_NAME=TEST_INDEX_NAME) class MockSearchUrlTest(TestCase, SearcherMixin): """ Make sure that requests to the url get routed to the correct view handler @@ -86,31 +87,28 @@ def test_url_resolution(self): def test_search_from_url(self): """ test searching using the url """ - self.searcher.index( - "courseware_content", - [ - { - "id": "FAKE_ID_1", - "content": { - "text": "Little Darling, it's been a long long lonely winter" - }, - "test_date": datetime(2015, 1, 1), - "test_string": "ABC, It's easy as 123" + self.searcher.index([ + { + "id": "FAKE_ID_1", + "content": { + "text": "Little Darling, it's been a long long lonely winter" + }, + "test_date": datetime(2015, 1, 1), + "test_string": "ABC, It's easy as 123" + }, + { + "id": "FAKE_ID_2", + "content": { + "text": "Little Darling, it's been a year since sun been gone" } - ] - ) - self.searcher.index( - "courseware_content", - [ - { - "id": "FAKE_ID_2", - "content": { - "text": "Little Darling, it's been a year since sun been gone" - } + }, + { + "id": "FAKE_ID_3", + "content": { + "text": "Here comes the sun" } - ] - ) - self.searcher.index("courseware_content", [{"id": "FAKE_ID_3", "content": {"text": "Here comes the sun"}}]) + }, + ]) # Test no events called yet after setup self.assert_no_events_were_emitted() @@ -151,42 +149,29 @@ def test_search_from_url(self): def test_course_search_url(self): """ test searching using the course url """ - self.searcher.index( - "courseware_content", - [ - { - "course": "ABC/DEF/GHI", - "id": "FAKE_ID_1", - "content": { - "text": "Little Darling, it's been a long long lonely winter" - } + self.searcher.index([ + { + "course": "ABC/DEF/GHI", + "id": "FAKE_ID_1", + "content": { + "text": "Little Darling, it's been a long long lonely winter" } - ] - ) - self.searcher.index( - "courseware_content", - [ - { - "course": "ABC/DEF/GHI", - "id": "FAKE_ID_2", - "content": { - "text": "Little Darling, it's been a year since you've been gone" - } + }, + { + "course": "ABC/DEF/GHI", + "id": "FAKE_ID_2", + "content": { + "text": "Little Darling, it's been a year since you've been gone" } - ] - ) - self.searcher.index( - "courseware_content", - [ - { - "course": "LMN/OPQ/RST", - "id": "FAKE_ID_3", - "content": { - "text": "Little Darling, it's been a long long lonely winter" - } + }, + { + "course": "LMN/OPQ/RST", + "id": "FAKE_ID_3", + "content": { + "text": "Little Darling, it's been a long long lonely winter" } - ] - ) + }, + ]) # Test no events called yet after setup self.assert_no_events_were_emitted() @@ -243,42 +228,29 @@ def test_empty_search_string(self): # pylint: disable=too-many-statements,wrong-assert-type def test_pagination(self): """ test searching using the course url """ - self.searcher.index( - "courseware_content", - [ - { - "course": "ABC", - "id": "FAKE_ID_1", - "content": { - "text": "Little Darling Little Darling Little Darling, it's been a long long lonely winter" - } + self.searcher.index([ + { + "course": "ABC", + "id": "FAKE_ID_1", + "content": { + "text": "Little Darling Little Darling Little Darling, it's been a long long lonely winter" } - ] - ) - self.searcher.index( - "courseware_content", - [ - { - "course": "ABC", - "id": "FAKE_ID_2", - "content": { - "text": "Little Darling Little Darling, it's been a year since you've been gone" - } + }, + { + "course": "ABC", + "id": "FAKE_ID_2", + "content": { + "text": "Little Darling Little Darling, it's been a year since you've been gone" } - ] - ) - self.searcher.index( - "courseware_content", - [ - { - "course": "XYZ", - "id": "FAKE_ID_3", - "content": { - "text": "Little Darling, it's been a long long lonely winter" - } + }, + { + "course": "XYZ", + "id": "FAKE_ID_3", + "content": { + "text": "Little Darling, it's been a long long lonely winter" } - ] - ) + }, + ]) # Test no events called yet after setup self.assert_no_events_were_emitted() @@ -365,18 +337,15 @@ def test_pagination(self): def test_page_size_too_large(self): """ test searching with too-large page_size """ - self.searcher.index( - "test_doc", - [ - { - "course": "ABC/DEF/GHI", - "id": "FAKE_ID_1", - "content": { - "text": "Little Darling, it's been a long long lonely winter" - } + self.searcher.index([ + { + "course": "ABC/DEF/GHI", + "id": "FAKE_ID_1", + "content": { + "text": "Little Darling, it's been a long long lonely winter" } - ] - ) + }, + ]) code, results = post_request({"search_string": "Little Darling", "page_size": 101}) self.assertEqual(code, 500) @@ -385,7 +354,8 @@ def test_page_size_too_large(self): @override_settings(SEARCH_ENGINE="search.tests.utils.ErroringSearchEngine") @override_settings(ELASTIC_FIELD_MAPPINGS={"start_date": {"type": "date"}}) -@override_settings(COURSEWARE_INDEX_NAME=TEST_INDEX_NAME) +@override_settings(COURSEWARE_CONTENT_INDEX_NAME=TEST_INDEX_NAME) +@override_settings(COURSEWARE_INFO_INDEX_NAME=TEST_INDEX_NAME) class BadSearchTest(TestCase, SearcherMixin): """ Make sure that we can error message when there is a problem """ @@ -400,29 +370,26 @@ def tearDown(self): def test_search_from_url(self): """ ensure that we get the error back when the backend fails """ searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME) - searcher.index( - "courseware_content", - [ - { - "id": "FAKE_ID_1", - "content": { - "text": "Little Darling, it's been a long long lonely winter" - } + searcher.index([ + { + "id": "FAKE_ID_1", + "content": { + "text": "Little Darling, it's been a long long lonely winter" } - ] - ) - searcher.index( - "courseware_content", - [ - { - "id": "FAKE_ID_2", - "content": { - "text": "Little Darling, it's been a year since sun been gone" - } + }, + { + "id": "FAKE_ID_2", + "content": { + "text": "Little Darling, it's been a year since sun been gone" } - ] - ) - searcher.index("test_doc", [{"id": "FAKE_ID_3", "content": {"text": "Here comes the sun"}}]) + }, + { + "id": "FAKE_ID_3", + "content": { + "text": "Here comes the sun" + } + }, + ]) code, results = post_request({"search_string": "sun"}) self.assertGreater(code, 499) @@ -448,55 +415,49 @@ def test_search_from_url(self): """ ensure that we get the error back when the backend fails """ searcher = SearchEngine.get_search_engine(TEST_INDEX_NAME) with self.assertRaises(Exception): - searcher.index("courseware_content", [{"id": "FAKE_ID_3", "content": {"text": "Here comes the sun"}}]) + searcher.index([{"id": "FAKE_ID_3", "content": {"text": "Here comes the sun"}}]) @override_settings(SEARCH_ENGINE="search.tests.utils.ForceRefreshElasticSearchEngine") @override_settings(ELASTIC_FIELD_MAPPINGS={"start_date": {"type": "date"}}) -@override_settings(COURSEWARE_INDEX_NAME=TEST_INDEX_NAME) +@override_settings(COURSEWARE_CONTENT_INDEX_NAME=TEST_INDEX_NAME) +@override_settings(COURSEWARE_INFO_INDEX_NAME=TEST_INDEX_NAME) @ddt.ddt class ElasticSearchUrlTest(TestCase, SearcherMixin): - """Elastic-specific tests""" + """ + Elastic-specific tests + """ + def setUp(self): super(ElasticSearchUrlTest, self).setUp() - self.searcher.index( - "courseware_content", - [ - { - "course": "ABC/DEF/GHI", - "id": "FAKE_ID_1", - "content": { - "text": "It seems like k-means clustering would work in this context." - }, - "test_date": datetime(2015, 1, 1), - "test_string": "ABC, It's easy as 123" - } - ] - ) - self.searcher.index( - "courseware_content", - [ - { - "course": "ABC/DEF/GHI", - "id": "FAKE_ID_2", - "content": { - "text": "It looks like k-means clustering could work in this context." - } + patcher = patch('search.views.track') + self.mock_tracker = patcher.start() + self.addCleanup(patcher.stop) + self.searcher.index([ + { + "course": "ABC/DEF/GHI", + "id": "FAKE_ID_1", + "content": { + "text": "It seems like k-means clustering would work in this context." + }, + "test_date": datetime(2015, 1, 1), + "test_string": "ABC, It's easy as 123" + }, + { + "course": "ABC/DEF/GHI", + "id": "FAKE_ID_2", + "content": { + "text": "It looks like k-means clustering could work in this context." } - ] - ) - self.searcher.index( - "courseware_content", - [ - { - "course": "ABC/DEF/GHI", - "id": "FAKE_ID_3", - "content": { - "text": "It looks like k means something different in this context." - } + }, + { + "course": "ABC/DEF/GHI", + "id": "FAKE_ID_3", + "content": { + "text": "It looks like k means something different in this context." } - ] - ) + }, + ]) @ddt.data( # Quoted phrases @@ -513,19 +474,3 @@ def test_valid_search(self, query, course_id, result_count): code, results = post_request({"search_string": query}, course_id) self.assertTrue(199 < code < 300) self.assertEqual(results["total"], result_count) - - def test_malformed_query_handling(self): - # root - code, results = post_request({"search_string": "\"missing quote"}) - self.assertGreater(code, 499) - self.assertEqual(results["error"], 'Your query seems malformed. Check for unmatched quotes.') - - # course ID - code, results = post_request({"search_string": "\"missing quote"}, "ABC/DEF/GHI") - self.assertGreater(code, 499) - self.assertEqual(results["error"], 'Your query seems malformed. Check for unmatched quotes.') - - # course discovery - code, results = post_discovery_request({"search_string": "\"missing quote"}) - self.assertGreater(code, 499) - self.assertEqual(results["error"], 'Your query seems malformed. Check for unmatched quotes.') diff --git a/search/tests/tests.py b/search/tests/tests.py index e040adad..aaed817b 100644 --- a/search/tests/tests.py +++ b/search/tests/tests.py @@ -72,16 +72,16 @@ def test_abstract_impl(self): abstract = SearchEngine("test_index_name") test_string = "A test string" with self.assertRaises(NotImplementedError): - abstract.index("test_doc", [{"name": test_string}]) + abstract.index([{"name": test_string}]) with self.assertRaises(NotImplementedError): abstract.search(test_string) with self.assertRaises(NotImplementedError): - abstract.remove("test_doc", ["test_id"]) + abstract.remove(["test_id"]) def test_find_all(self): """ Make sure that null search finds everything in the index """ test_string = "A test string" - self.searcher.index("test_doc", [{"name": test_string}]) + self.searcher.index([{"name": test_string}]) # search everything response = self.searcher.search(None) @@ -89,7 +89,7 @@ def test_find_all(self): results = response["results"] self.assertEqual(results[0]["data"]["name"], test_string) - self.searcher.index("not_test_doc", [{"value": test_string}]) + self.searcher.index([{"value": test_string}]) response = self.searcher.search(None) self.assertEqual(response["total"], 2) @@ -99,27 +99,10 @@ def test_find_all(self): self.assertEqual(test_0["name"], test_string) self.assertEqual(test_1["value"], test_string) - def test_find_doctype(self): - """ Make sure that searches for specific doc_type only return requested doc_type """ - test_string = "A test string" - self.searcher.index("test_doc", [{"name": test_string}]) - - # search by doc_type - response = self.searcher.search(None, doc_type="test_doc") - self.assertEqual(response["total"], 1) - - response = self.searcher.search(None, doc_type="not_test_doc") - self.assertEqual(response["total"], 0) - - self.searcher.index("not_test_doc", [{"value": test_string}]) - - response = self.searcher.search(None, doc_type="not_test_doc") - self.assertEqual(response["total"], 1) - def test_find_string(self): """ Find a string within the object "content" node """ test_string = "A test string" - self.searcher.index("test_doc", [{"content": {"name": test_string}}]) + self.searcher.index([{"content": {"name": test_string}}]) # search string response = self.searcher.search(test_string) @@ -128,18 +111,15 @@ def test_find_string(self): response = self.searcher.search_string(test_string) self.assertEqual(response["total"], 1) - self.searcher.index("not_test_doc", [{"content": {"value": test_string}}]) + self.searcher.index([{"content": {"value": test_string}}]) response = self.searcher.search_string(test_string) self.assertEqual(response["total"], 2) - response = self.searcher.search_string(test_string, doc_type="test_doc") - self.assertEqual(response["total"], 1) - response = self.searcher.search_string("something else") self.assertEqual(response["total"], 0) - self.searcher.index("test_doc", [{"content": {"deep": {"down": test_string}}}]) + self.searcher.index([{"content": {"deep": {"down": test_string}}}]) response = self.searcher.search_string(test_string) self.assertEqual(response["total"], 3) @@ -155,7 +135,7 @@ def test_field(self): "fieldX": "valueY", "id": "12345" } - self.searcher.index("test_doc", [test_object]) + self.searcher.index([test_object]) # search tags response = self.searcher.search_fields({"tags.tag_one": "one"}) @@ -189,7 +169,7 @@ def test_search_string_and_field(self): "course": "A/B/C", "abc": "xyz", } - self.searcher.index("test_doc", [test_object]) + self.searcher.index([test_object]) response = self.searcher.search(query_string="find me") self.assertEqual(response["total"], 1) @@ -222,7 +202,7 @@ def test_search_tags(self): "taste": "sour", } test_object["tags"] = tags - self.searcher.index("test_doc", [test_object]) + self.searcher.index([test_object]) response = self.searcher.search_fields({"tags.color": "red"}) self.assertEqual(response["total"], 1) @@ -261,10 +241,6 @@ def test_search_tags(self): response = self._searcher.search(field_dictionary={"tags.shape": "square", "tags.color": "blue"}) self.assertEqual(response["total"], 0) - response = self._searcher.search( - field_dictionary={"tags.shape": "square", "tags.color": "blue"}, use_field_match=True) - self.assertEqual(response["total"], 0) - def test_search_array(self): """ test nested object array """ test_object1 = { @@ -277,8 +253,7 @@ def test_search_array(self): "course_id": "C/D/E", "array": ["a", "b", "c"] } - self.searcher.index("test_doc", [test_object1]) - self.searcher.index("test_doc", [test_object2]) + self.searcher.index([test_object1, test_object2]) response = self.searcher.search(field_dictionary={"array": "x"}) self.assertEqual(response["total"], 1) @@ -305,8 +280,7 @@ def test_search_any(self): "name": "Anthony Rizzo", "course": "C/D/E" } - self.searcher.index("test_doc", [test_object1]) - self.searcher.index("test_doc", [test_object2]) + self.searcher.index([test_object1, test_object2]) response = self.searcher.search(field_dictionary={"course": ["x", "y"]}) self.assertEqual(response["total"], 0) @@ -323,7 +297,7 @@ def test_search_any(self): def test_extended_characters(self): """ Make sure that extended character searches work """ test_string = u"قضايـا هامـة" - self.searcher.index("test_doc", [{"content": {"name": test_string}}]) + self.searcher.index([{"content": {"name": test_string}}]) # search string response = self.searcher.search(test_string) @@ -332,7 +306,7 @@ def test_extended_characters(self): response = self.searcher.search_string(test_string) self.assertEqual(response["total"], 1) - self.searcher.index("not_test_doc", [{"content": {"value": test_string}}]) + self.searcher.index([{"content": {"value": test_string}}]) response = self.searcher.search_string(test_string) self.assertEqual(response["total"], 2) @@ -340,42 +314,41 @@ def test_extended_characters(self): def test_delete_item(self): """ make sure that we can remove an item from the index """ test_string = "This is a test of the emergency broadcast system" - self.searcher.index("test_doc", [{"id": "FAKE_ID", "content": {"name": test_string}}]) + self.searcher.index([{"id": "FAKE_ID", "content": {"name": test_string}}]) response = self.searcher.search(test_string) self.assertEqual(response["total"], 1) - - self.searcher.remove("test_doc", [response["results"][0]["data"]["id"]]) + self.searcher.remove([response["results"][0]["data"]["id"]]) response = self.searcher.search(test_string) self.assertEqual(response["total"], 0) def test_delete_item_slashes(self): """ make sure that we can remove an item from the index with complex id """ test_string = "This is a test of the emergency broadcast system" - self.searcher.index( - "test_doc", [ - { - "id": "i4x://edX/DemoX/google-document/e3369ea4aa0749a7ba29c461d1c819a4", - "content": {"name": test_string} - } - ] - ) + self.searcher.index([ + { + "id": "i4x://edX/DemoX/google-document/e3369ea4aa0749a7ba29c461d1c819a4", + "content": {"name": test_string} + }, + ]) response = self.searcher.search(test_string) self.assertEqual(response["total"], 1) - self.searcher.remove("test_doc", ["i4x://edX/DemoX/google-document/e3369ea4aa0749a7ba29c461d1c819a4"]) + self.searcher.remove(["i4x://edX/DemoX/google-document/e3369ea4aa0749a7ba29c461d1c819a4"]) response = self.searcher.search(test_string) self.assertEqual(response["total"], 0) def test_delete_item_not_present(self): """ make sure that we get no error removing an item that does not exist """ - self.searcher.remove("test_doc", ["TOTALLY_FAKE_ID"]) + self.searcher.remove(["TOTALLY_FAKE_ID"]) def test_filter_items(self): """ Make sure that filters work """ - self.searcher.index("test_doc", [{"id": "FAKE_ID_1", "test_value": "1", "filter_field": "my_filter_value"}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_2", "test_value": "2"}]) + self.searcher.index([ + {"id": "FAKE_ID_1", "test_value": "1", "filter_field": "my_filter_value"}, + {"id": "FAKE_ID_2", "test_value": "2"}, + ]) response = self.searcher.search(filter_dictionary={"filter_field": "my_filter_value"}) self.assertEqual(response["total"], 2) @@ -383,7 +356,7 @@ def test_filter_items(self): response = self.searcher.search(field_dictionary={"filter_field": "my_filter_value"}) self.assertEqual(response["total"], 1) - self.searcher.index("test_doc", [{"id": "FAKE_ID_3", "test_value": "3", "filter_field": "not_my_filter_value"}]) + self.searcher.index([{"id": "FAKE_ID_3", "test_value": "3", "filter_field": "not_my_filter_value"}]) response = self.searcher.search(filter_dictionary={"filter_field": "my_filter_value"}) self.assertEqual(response["total"], 2) @@ -394,9 +367,11 @@ def test_iterable_filters(self): """ Make sure that iterable filters works """ - self.searcher.index("test_doc", [{"id": "FAKE_ID_1"}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_2", "filter_field": "orange"}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_3", "filter_field": ["orange", "blue"]}]) + self.searcher.index([ + {"id": "FAKE_ID_1"}, + {"id": "FAKE_ID_2", "filter_field": "orange"}, + {"id": "FAKE_ID_3", "filter_field": ["orange", "blue"]}, + ]) response = self.searcher.search(filter_dictionary={"filter_field": "orange"}) self.assertEqual(response["total"], 3) @@ -412,9 +387,11 @@ def test_filter_where_null(self): Make sure that filtering with `None` value finds only fields where item is not present or where explicitly None """ - self.searcher.index("test_doc", [{"id": "FAKE_ID_1", "test_value": "1", "filter_field": "my_filter_value"}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_2", "test_value": "2"}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_3", "test_value": "3", "filter_field": "not_my_filter_value"}]) + self.searcher.index([ + {"id": "FAKE_ID_1", "test_value": "1", "filter_field": "my_filter_value"}, + {"id": "FAKE_ID_2", "test_value": "2"}, + {"id": "FAKE_ID_3", "test_value": "3", "filter_field": "not_my_filter_value"}, + ]) response = self.searcher.search(filter_dictionary={"filter_field": "my_filter_value"}) self.assertEqual(response["total"], 2) @@ -422,14 +399,16 @@ def test_filter_where_null(self): response = self.searcher.search(filter_dictionary={"filter_field": None}) self.assertEqual(response["total"], 1) - self.searcher.index("test_doc", [{"id": "FAKE_ID_4", "test_value": "4", "filter_field": None}]) + self.searcher.index([{"id": "FAKE_ID_4", "test_value": "4", "filter_field": None}]) response = self.searcher.search(filter_dictionary={"filter_field": None}) self.assertEqual(response["total"], 2) def test_date_range(self): """ Make sure that date ranges can be searched """ - self.searcher.index("test_doc", [{"id": "FAKE_ID_1", "test_value": "1", "start_date": datetime(2010, 1, 1)}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_2", "test_value": "2", "start_date": datetime(2100, 1, 1)}]) + self.searcher.index([ + {"id": "FAKE_ID_1", "test_value": "1", "start_date": datetime(2010, 1, 1)}, + {"id": "FAKE_ID_2", "test_value": "2", "start_date": datetime(2100, 1, 1)}, + ]) response = self.searcher.search() self.assertEqual(response["total"], 2) @@ -448,9 +427,11 @@ def test_date_range(self): def test_numeric_range(self): """ Make sure that numeric ranges can be searched with both field and filter queries """ - self.searcher.index("test_doc", [{"id": "FAKE_ID_1", "test_value": "1", "age": 20}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_2", "test_value": "2", "age": 30}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_3", "test_value": "3", "age": 40}]) + self.searcher.index([ + {"id": "FAKE_ID_1", "test_value": "1", "age": 20}, + {"id": "FAKE_ID_2", "test_value": "2", "age": 30}, + {"id": "FAKE_ID_3", "test_value": "3", "age": 40}, + ]) def test_age_range_field(begin, end, expect): """ repeated operations consolidated for tests """ @@ -481,7 +462,7 @@ def test_age_range_filter(begin, end, expect): test_age_range_filter(None, 29, 1) test_age_range_filter(39, None, 1) - self.searcher.index("test_doc", [{"id": "FAKE_ID_4", "test_value": "4", "age": 50}]) + self.searcher.index([{"id": "FAKE_ID_4", "test_value": "4", "age": 50}]) test_age_range_field(19, 29, 1) test_age_range_field(19, 39, 2) @@ -499,7 +480,7 @@ def test_age_range_filter(begin, end, expect): test_age_range_filter(None, 29, 1) test_age_range_filter(39, None, 2) - self.searcher.index("test_doc", [{"id": "FAKE_ID_5", "test_value": "5", "not_age": 50}]) + self.searcher.index([{"id": "FAKE_ID_5", "test_value": "5", "not_age": 50}]) test_age_range_filter(19, 29, 2) test_age_range_filter(19, 39, 3) test_age_range_filter(19, 49, 4) @@ -510,9 +491,11 @@ def test_age_range_filter(begin, end, expect): def test_range_filter(self): """ Make sure that ranges can be used in field_dictionary and filter_dictionary """ - self.searcher.index("test_doc", [{"id": "FAKE_ID_1", "test_value": "1", "age": 20}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_2", "test_value": "2", "age": 30}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_3", "test_value": "3", "not_age": 40}]) + self.searcher.index([ + {"id": "FAKE_ID_1", "test_value": "1", "age": 20}, + {"id": "FAKE_ID_2", "test_value": "2", "age": 30}, + {"id": "FAKE_ID_3", "test_value": "3", "not_age": 40}, + ]) response = self.searcher.search() self.assertEqual(response["total"], 3) @@ -525,42 +508,29 @@ def test_range_filter(self): def test_pagination(self): """ Test paging operation """ - self.searcher.index( - "test_doc", - [ - { - "course": "ABC", - "id": "FAKE_ID_1", - "content": { - "text": "Little Darling Little Darling Little Darling, it's been a long long lonely winter" - } + self.searcher.index([ + { + "course": "ABC", + "id": "FAKE_ID_1", + "content": { + "text": "Little Darling Little Darling Little Darling, it's been a long long lonely winter" } - ] - ) - self.searcher.index( - "test_doc", - [ - { - "course": "ABC", - "id": "FAKE_ID_2", - "content": { - "text": "Little Darling Little Darling, it's been a year since you've been gone" - } + }, + { + "course": "ABC", + "id": "FAKE_ID_2", + "content": { + "text": "Little Darling Little Darling, it's been a year since you've been gone" } - ] - ) - self.searcher.index( - "test_doc", - [ - { - "course": "XYZ", - "id": "FAKE_ID_3", - "content": { - "text": "Little Darling, it's been a long long lonely winter" - } + }, + { + "course": "XYZ", + "id": "FAKE_ID_3", + "content": { + "text": "Little Darling, it's been a long long lonely winter" } - ] - ) + }, + ]) response = self.searcher.search(query_string="Little Darling") self.assertEqual(response["total"], 3) @@ -604,12 +574,14 @@ def test_pagination(self): def test_exclude_ids(self): """ Test that ids that would normally be present in the resultset will not be present if in the exclude list """ - self.searcher.index("test_doc", [{"course": "ABC", "id": "FAKE_ID_1"}]) - self.searcher.index("test_doc", [{"course": "ABC", "id": "FAKE_ID_2"}]) - self.searcher.index("test_doc", [{"course": "ABC", "id": "FAKE_ID_3"}]) - self.searcher.index("test_doc", [{"course": "XYZ", "id": "FAKE_ID_11"}]) - self.searcher.index("test_doc", [{"course": "XYZ", "id": "FAKE_ID_12"}]) - self.searcher.index("test_doc", [{"course": "XYZ", "id": "FAKE_ID_13"}]) + self.searcher.index([ + {"course": "ABC", "id": "FAKE_ID_1"}, + {"course": "ABC", "id": "FAKE_ID_2"}, + {"course": "ABC", "id": "FAKE_ID_3"}, + {"course": "XYZ", "id": "FAKE_ID_11"}, + {"course": "XYZ", "id": "FAKE_ID_12"}, + {"course": "XYZ", "id": "FAKE_ID_13"}, + ]) response = self.searcher.search() self.assertEqual(response["total"], 6) @@ -621,7 +593,9 @@ def test_exclude_ids(self): self.assertIn("FAKE_ID_12", result_ids) self.assertIn("FAKE_ID_13", result_ids) - response = self.searcher.search(exclude_ids=["FAKE_ID_1", "FAKE_ID_2", "FAKE_ID_11", "FAKE_ID_12"]) + response = self.searcher.search( + exclude_ids=["FAKE_ID_1", "FAKE_ID_2", "FAKE_ID_11", "FAKE_ID_12"] + ) self.assertEqual(response["total"], 2) result_ids = [r["data"]["id"] for r in response["results"]] self.assertNotIn("FAKE_ID_1", result_ids) @@ -653,9 +627,11 @@ def test_exclude_ids(self): def test_exclude_filter_single(self): """ Test that single entries present in the exclude filter are filtered out """ - self.searcher.index("test_doc", [{"course": "ABC", "org": "edX", "id": "FAKE_ID_1"}]) - self.searcher.index("test_doc", [{"course": "XYZ", "org": "edX", "id": "FAKE_ID_2"}]) - self.searcher.index("test_doc", [{"course": "LMN", "org": "MITX", "id": "FAKE_ID_3"}]) + self.searcher.index([ + {"course": "ABC", "org": "edX", "id": "FAKE_ID_1"}, + {"course": "XYZ", "org": "edX", "id": "FAKE_ID_2"}, + {"course": "LMN", "org": "MITX", "id": "FAKE_ID_3"}, + ]) response = self.searcher.search() self.assertEqual(response["total"], 3) @@ -696,11 +672,13 @@ def test_exclude_filter_single(self): def test_exclude_filter_multiple(self): """ Test that multiple entries present in the exclude filter are filtered out """ - self.searcher.index("test_doc", [{"course": "ABC", "org": "edX", "id": "FAKE_ID_1"}]) - self.searcher.index("test_doc", [{"course": "XYZ", "org": "edX", "id": "FAKE_ID_2"}]) - self.searcher.index("test_doc", [{"course": "DEF", "org": "MITX", "id": "FAKE_ID_3"}]) - self.searcher.index("test_doc", [{"course": "GHI", "org": "HarvardX", "id": "FAKE_ID_4"}]) - self.searcher.index("test_doc", [{"course": "LMN", "org": "edX", "id": "FAKE_ID_5"}]) + self.searcher.index([ + {"course": "ABC", "org": "edX", "id": "FAKE_ID_1"}, + {"course": "XYZ", "org": "edX", "id": "FAKE_ID_2"}, + {"course": "DEF", "org": "MITX", "id": "FAKE_ID_3"}, + {"course": "GHI", "org": "HarvardX", "id": "FAKE_ID_4"}, + {"course": "LMN", "org": "edX", "id": "FAKE_ID_5"}, + ]) response = self.searcher.search() self.assertEqual(response["total"], 5) @@ -756,11 +734,13 @@ def test_exclude_filter_multiple(self): def test_exclude_filter_empty(self): """ Test that search works when exclude filter is an empty list """ - self.searcher.index("test_doc", [{"course": "ABC", "org": "edX", "id": "FAKE_ID_1"}]) - self.searcher.index("test_doc", [{"course": "XYZ", "org": "edX", "id": "FAKE_ID_2"}]) - self.searcher.index("test_doc", [{"course": "DEF", "org": "MITX", "id": "FAKE_ID_3"}]) - self.searcher.index("test_doc", [{"course": "GHI", "org": "HarvardX", "id": "FAKE_ID_4"}]) - self.searcher.index("test_doc", [{"course": "LMN", "org": "edX", "id": "FAKE_ID_5"}]) + self.searcher.index([ + {"course": "ABC", "org": "edX", "id": "FAKE_ID_1"}, + {"course": "XYZ", "org": "edX", "id": "FAKE_ID_2"}, + {"course": "DEF", "org": "MITX", "id": "FAKE_ID_3"}, + {"course": "GHI", "org": "HarvardX", "id": "FAKE_ID_4"}, + {"course": "LMN", "org": "edX", "id": "FAKE_ID_5"}, + ]) response = self.searcher.search(exclude_dictionary={"org": []}) self.assertEqual(response["total"], 5) @@ -783,83 +763,91 @@ def test_exclude_filter_empty(self): self.assertNotIn("FAKE_ID_4", result_ids) self.assertIn("FAKE_ID_5", result_ids) - def _index_for_facets(self): - """ Prepare index for facet tests """ - self.searcher.index("test_doc", [{"id": "FAKE_ID_1", "subject": "mathematics", "org": "edX"}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_2", "subject": "mathematics", "org": "MIT"}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_3", "subject": "physics", "org": "MIT"}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_4", "subject": "history", "org": "Harvard"}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_5", "subject": "mathematics", "org": "Harvard"}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_6", "subject": "physics", "org": "Harvard"}]) - self.searcher.index("test_doc", [{"id": "FAKE_ID_7", "no_subject": "not_a_subject", "org": "Harvard"}]) - - def test_faceted_search(self): - """ Test that faceting works well """ - self._index_for_facets() + def _index_for_aggs(self): + """ Prepare index for aggregation tests """ + self.searcher.index([ + {"id": "FAKE_ID_1", "subject": "mathematics", "org": "edX"}, + {"id": "FAKE_ID_2", "subject": "mathematics", "org": "MIT"}, + {"id": "FAKE_ID_3", "subject": "physics", "org": "MIT"}, + {"id": "FAKE_ID_4", "subject": "history", "org": "Harvard"}, + {"id": "FAKE_ID_5", "subject": "mathematics", "org": "Harvard"}, + {"id": "FAKE_ID_6", "subject": "physics", "org": "Harvard"}, + {"id": "FAKE_ID_7", "no_subject": "not_a_subject", "org": "Harvard"}, + ]) + + def test_aggregation_search(self): + """ Test that aggregation works well """ + self._index_for_aggs() response = self.searcher.search() self.assertEqual(response["total"], 7) - self.assertNotIn("facets", response) + self.assertNotIn("aggs", response) - facet_terms = { + aggregation_terms = { "subject": {}, "org": {} } - response = self.searcher.search(facet_terms=facet_terms) + response = self.searcher.search(aggregation_terms=aggregation_terms) self.assertEqual(response["total"], 7) - self.assertIn("facets", response) - facet_results = response["facets"] + self.assertIn("aggs", response) + aggregation_results = response["aggs"] - self.assertEqual(facet_results["subject"]["total"], 6) - subject_term_counts = facet_results["subject"]["terms"] + self.assertEqual(aggregation_results["subject"]["total"], 6) + subject_term_counts = aggregation_results["subject"]["terms"] self.assertEqual(subject_term_counts["mathematics"], 3) self.assertEqual(subject_term_counts["history"], 1) self.assertEqual(subject_term_counts["physics"], 2) - self.assertEqual(facet_results["org"]["total"], 7) - org_term_counts = facet_results["org"]["terms"] + self.assertEqual(aggregation_results["org"]["total"], 7) + org_term_counts = aggregation_results["org"]["terms"] self.assertEqual(org_term_counts["edX"], 1) self.assertEqual(org_term_counts["MIT"], 2) self.assertEqual(org_term_counts["Harvard"], 4) - def test_filtered_faceted_search(self): - """ Test that faceting works well alongside filtered results """ - self._index_for_facets() + def test_filtered_aggregation_search(self): + """ Test that aggregation works well alongside filtered results """ + self._index_for_aggs() - facet_terms = { + aggregation_terms = { "subject": {}, "org": {} } - response = self.searcher.search(field_dictionary={"org": "Harvard"}, facet_terms=facet_terms) + response = self.searcher.search( + field_dictionary={"org": "Harvard"}, + aggregation_terms=aggregation_terms + ) self.assertEqual(response["total"], 4) - self.assertIn("facets", response) - facet_results = response["facets"] + self.assertIn("aggs", response) + aggregation_results = response["aggs"] - self.assertEqual(facet_results["subject"]["total"], 3) - subject_term_counts = facet_results["subject"]["terms"] + self.assertEqual(aggregation_results["subject"]["total"], 3) + subject_term_counts = aggregation_results["subject"]["terms"] self.assertEqual(subject_term_counts["mathematics"], 1) self.assertEqual(subject_term_counts["history"], 1) self.assertEqual(subject_term_counts["physics"], 1) - self.assertEqual(facet_results["org"]["total"], 4) - org_term_counts = facet_results["org"]["terms"] + self.assertEqual(aggregation_results["org"]["total"], 4) + org_term_counts = aggregation_results["org"]["terms"] self.assertNotIn("edX", org_term_counts) self.assertNotIn("MIT", org_term_counts) self.assertEqual(org_term_counts["Harvard"], 4) - response = self.searcher.search(field_dictionary={"subject": ["physics", "history"]}, facet_terms=facet_terms) + response = self.searcher.search( + field_dictionary={"subject": ["physics", "history"]}, + aggregation_terms=aggregation_terms + ) self.assertEqual(response["total"], 3) - self.assertIn("facets", response) - facet_results = response["facets"] + self.assertIn("aggs", response) + aggregation_results = response["aggs"] - self.assertEqual(facet_results["subject"]["total"], 3) - subject_term_counts = facet_results["subject"]["terms"] + self.assertEqual(aggregation_results["subject"]["total"], 3) + subject_term_counts = aggregation_results["subject"]["terms"] self.assertNotIn("mathematics", subject_term_counts) self.assertEqual(subject_term_counts["history"], 1) self.assertEqual(subject_term_counts["physics"], 2) - self.assertEqual(facet_results["org"]["total"], 3) - org_term_counts = facet_results["org"]["terms"] + self.assertEqual(aggregation_results["org"]["total"], 3) + org_term_counts = aggregation_results["org"]["terms"] self.assertNotIn("edX", org_term_counts) self.assertEqual(org_term_counts["MIT"], 1) self.assertEqual(org_term_counts["Harvard"], 2) diff --git a/search/tests/utils.py b/search/tests/utils.py index 06f500b2..a8feb102 100644 --- a/search/tests/utils.py +++ b/search/tests/utils.py @@ -12,8 +12,10 @@ def post_request(body, course_id=None): - """ Helper method to post the request and process the response """ - address = '/' if course_id is None else '/{}'.format(course_id) + """ + Helper method to post the request and process the response + """ + address = '/{}'.format(course_id if course_id else '') response = Client().post(address, body) return getattr(response, "status_code", 500), json.loads(getattr(response, "content", None).decode('utf-8')) @@ -48,17 +50,13 @@ class ForceRefreshElasticSearchEngine(ElasticSearchEngine): so that tests can relaibly search right afterward """ - def index(self, doc_type, sources, **kwargs): - kwargs.update({ - "refresh": True - }) - super(ForceRefreshElasticSearchEngine, self).index(doc_type, sources, **kwargs) + def index(self, sources, **kwargs): + kwargs["refresh"] = True + super(ForceRefreshElasticSearchEngine, self).index(sources, **kwargs) - def remove(self, doc_type, doc_ids, **kwargs): - kwargs.update({ - "refresh": True - }) - super(ForceRefreshElasticSearchEngine, self).remove(doc_type, doc_ids, **kwargs) + def remove(self, doc_ids, **kwargs): + kwargs["refresh"] = True + super(ForceRefreshElasticSearchEngine, self).remove(doc_ids, **kwargs) class ErroringSearchEngine(MockSearchEngine): @@ -75,7 +73,7 @@ def search(self, class ErroringIndexEngine(MockSearchEngine): """ Override to generate search engine error to test """ - def index(self, doc_type, sources, **kwargs): # pylint: disable=unused-argument, arguments-differ + def index(self, sources, **kwargs): # pylint: disable=unused-argument, arguments-differ raise Exception("There is a problem here") diff --git a/search/views.py b/search/views.py index a0509fce..a4bdcbfd 100644 --- a/search/views.py +++ b/search/views.py @@ -10,7 +10,7 @@ from django.views.decorators.http import require_POST from eventtracking import tracker as track -from .api import QueryParseError, perform_search, course_discovery_search, course_discovery_filter_fields +from .api import perform_search, course_discovery_search, course_discovery_filter_fields from .initializer import SearchInitializer # log appears to be standard name used for logger @@ -123,11 +123,6 @@ def do_search(request, course_id=None): } log.debug(str(invalid_err)) - except QueryParseError: - results = { - "error": _('Your query seems malformed. Check for unmatched quotes.') - } - # Allow for broad exceptions here - this is an entry point from external reference except Exception as err: # pylint: disable=broad-except results = { @@ -214,11 +209,6 @@ def course_discovery(request): } log.debug(str(invalid_err)) - except QueryParseError: - results = { - "error": _('Your query seems malformed. Check for unmatched quotes.') - } - # Allow for broad exceptions here - this is an entry point from external reference except Exception as err: # pylint: disable=broad-except results = { diff --git a/setup.py b/setup.py index e8f42ddb..5bac26bb 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ def is_requirement(line): setup( name='edx-search', - version='1.4.1', + version='2.0.0', description='Search and index routines for index access', author='edX', author_email='oscm@edx.org',