diff --git a/galaxy_ng/app/api/ui/views/search.py b/galaxy_ng/app/api/ui/views/search.py index 3d6b2b4105..1354c9772d 100644 --- a/galaxy_ng/app/api/ui/views/search.py +++ b/galaxy_ng/app/api/ui/views/search.py @@ -1,5 +1,5 @@ from django.contrib.postgres.aggregates import JSONBAgg -from django.contrib.postgres.search import SearchQuery, SearchVector +from django.contrib.postgres.search import SearchQuery from django.db.models import ( Exists, F, @@ -42,8 +42,7 @@ SORT_PARAM = "order_by" SORTABLE_FIELDS = ["name", "namespace_name", "download_count", "last_updated", "relevance"] SORTABLE_FIELDS += [f"-{item}" for item in SORTABLE_FIELDS] -DEFAULT_SORT = "-download_count" -DEFAULT_SEARCH_TYPE = "sql" # websearch,sql +DEFAULT_SEARCH_TYPE = "websearch" # websearch,sql QUERYSET_VALUES = [ "namespace_avatar", "content_list", @@ -61,67 +60,10 @@ "relevance", ] RANK_NORMALIZATION = 32 -EMPTY_QUERY = SearchQuery(Value(None)) class SearchListView(api_base.GenericViewSet, mixins.ListModelMixin): - """Lists Search results for Collections + Roles. - Aggregates search from Collections and Roles in the same results set. - - - ## filtering - - - **search_type:** ["sql", "websearch"] - - **keywords:** string - - queried against name,namespace,description,tags,platform - - when search_type is websearch allows operators e.g: "this OR that AND (A OR B) -notthis" - - when search_type is sql performs a SQL ilike on the same fields - - **type:** ["collection", "role"] - - **deprecated:** boolean - - **name:** string (iexact query) - - **namespace:** string (iexact query) - - **tags:** string[] (allows multiple &tags=..&tags=..) - - **platform:** string - - ## Sorting - - Sorting is performed by passing `order_by` parameter, optionally prefixed with `-` for DESC, - the allowed fields are: - - - name - - namespace_name - - download_count - - last_updated - - relevance (only when search_type is websearch) - - ## Pagination - - Pagination is based on `limit` and `offset` parameters. - - ## Results - - Results are embedded in the pagination serializer including - `meta:count` and `links:first,previous,next,last`. - - The `data` key contains the results in the format:: - - ```python - { - "name": "brunogphmzthghu", - "namespace": "brunovrhvjkdh", - "description": "Lorem ipsum dolor sit amet, consectetur adipisicing elit.", - "type": "role", - "latest_version": "1.4.9", - "avatar_url": "https://github.com/brunogphmzthghu.png, - "contents": [], - "download_count": 9999, - "last_updated": "2023-11-09T15:17:01.235457Z", - "deprecated": false, - "tags": ["development", "java", "python"], - "platforms": [{"name": "Ubuntu", "versions": ["jammy", "focal"]}] - } - ``` - """ + """Search collections and roles""" permission_classes = [AllowAny] serializer_class = SearchResultsSerializer @@ -146,7 +88,63 @@ class SearchListView(api_base.GenericViewSet, mixins.ListModelMixin): ] ) def list(self, *args, **kwargs): - """Override the default method just to provide extended schema""" + """Lists Search results for Collections + Roles. + Aggregates search from Collections and Roles in the same results set. + + + ## filtering + + - **search_type:** ["sql", "websearch"] + - **keywords:** string + - queried against name,namespace,description,tags,platform + - when search_type is websearch allows operators e.g: "this OR that AND (A OR B) -C" + - when search_type is sql performs a SQL ilike on the same fields + - **type:** ["collection", "role"] + - **deprecated:** boolean + - **name:** string (iexact query) + - **namespace:** string (iexact query) + - **tags:** string[] (allows multiple &tags=..&tags=..) + - **platform:** string + + ## Sorting + + Sorting is performed by passing `order_by` parameter, optionally prefixed with `-` for DESC, + the allowed fields are: + + - name + - namespace_name + - download_count + - last_updated + - relevance (only when search_type is websearch) + + ## Pagination + + Pagination is based on `limit` and `offset` parameters. + + ## Results + + Results are embedded in the pagination serializer including + `meta:count` and `links:first,previous,next,last`. + + The `data` key contains the results in the format:: + + ```python + { + "name": "brunogphmzthghu", + "namespace": "brunovrhvjkdh", + "description": "Lorem ipsum dolor sit amet, consectetur adipisicing elit.", + "type": "role", + "latest_version": "1.4.9", + "avatar_url": "https://github.com/brunogphmzthghu.png, + "contents": [], + "download_count": 9999, + "last_updated": "2023-11-09T15:17:01.235457Z", + "deprecated": false, + "tags": ["development", "java", "python"], + "platforms": [{"name": "Ubuntu", "versions": ["jammy", "focal"]}] + } + ``` + """ return super().list(*args, **kwargs) def get_queryset(self): @@ -185,11 +183,15 @@ def get_filter_params(self, request): def get_sorting_param(self, request): """Validates the sorting parameter is valid.""" - sort = request.query_params.get(SORT_PARAM, DEFAULT_SORT) - if sort not in SORTABLE_FIELDS: - raise ValidationError(f"{SORT_PARAM} requires one of {SORTABLE_FIELDS}") - search_type = request.query_params.get("search_type", "sql") - if "relevance" in sort and search_type != "websearch": + search_type = request.query_params.get("search_type", DEFAULT_SEARCH_TYPE) + default_sort = "-download_count,-relevance" + if search_type == "sql": + default_sort = "-download_count,-last_updated" + sort = request.query_params.get(SORT_PARAM, default_sort).split(",") + for item in sort: + if item not in SORTABLE_FIELDS: + raise ValidationError(f"{SORT_PARAM} requires one of {SORTABLE_FIELDS}") + if ("relevance" in sort or "-relevance" in sort) and search_type != "websearch": raise ValidationError("'order_by=relevance' works only with 'search_type=websearch'") return sort @@ -203,12 +205,10 @@ def get_collection_queryset(self, query=None): ) namespace_qs = Namespace.objects.filter(name=OuterRef("namespace")) - vector = Value("") relevance = Value(0) if query: - vector = F("search_vector") relevance = Func( - F("search"), + F("search_vector"), query, RANK_NORMALIZATION, function="ts_rank", @@ -231,7 +231,7 @@ def get_collection_queryset(self, query=None): latest_version=F("version"), content_list=F("contents"), namespace_avatar=Subquery(namespace_qs.values("_avatar_url")), - search=vector, + search=F("search_vector"), relevance=relevance, ) .values(*QUERYSET_VALUES) @@ -241,18 +241,8 @@ def get_collection_queryset(self, query=None): def get_role_queryset(self, query=None): """Build the LegacyRole queryset from annotations.""" - vector = Value("") relevance = Value(0) if query: - # TODO: Build search_vector field in the LegacyRole model and update via trigger or - # hook during import. - vector = ( - SearchVector("name", weight="A") - + SearchVector("namespace_name", weight="B") - + SearchVector("description_text", weight="C") - + SearchVector("tag_names", weight="D") - + SearchVector("platform_names") - ) relevance = Func( F("search"), query, @@ -272,8 +262,8 @@ def get_role_queryset(self, query=None): download_count=Coalesce(F("legacyroledownloadcount__count"), Value(0)), latest_version=KT("full_metadata__versions__-1__version"), content_list=Value([], JSONField()), # There is no contents for roles - namespace_avatar=F("namespace__avatar_url"), - search=vector, + namespace_avatar=F("namespace__namespace___avatar_url"), # v3 namespace._avatar_url + search=F("legacyrolesearchvector__search_vector"), relevance=relevance, ).values(*QUERYSET_VALUES) return qs @@ -319,11 +309,11 @@ def filter_and_sort(self, collections, roles, filter_params, sort, type="", quer roles = roles.filter(query) if type.lower() == "role": - qs = roles.order_by(sort) + qs = roles.order_by(*sort) elif type.lower() == "collection": - qs = collections.order_by(sort) + qs = collections.order_by(*sort) else: - qs = collections.union(roles, all=True).order_by(sort) + qs = collections.union(roles, all=True).order_by(*sort) return qs @@ -334,9 +324,7 @@ def test(): print() print(f"{' START ':#^40}") s = SearchListView() - data = s.get_search_results( - {"type": "", "search_type": "websearch", "keywords": "java web"}, sort="-relevance" - ) + data = s.get_search_results({"type": "", "keywords": "java web"}, sort="-relevance") print(f"{' SQLQUERY ':#^40}") print(data._query) print(f"{' COUNT ':#^40}") diff --git a/galaxy_ng/app/api/v1/models.py b/galaxy_ng/app/api/v1/models.py index 5b228a5903..adb6f4c979 100644 --- a/galaxy_ng/app/api/v1/models.py +++ b/galaxy_ng/app/api/v1/models.py @@ -1,4 +1,6 @@ from django.db import models +from django.contrib.postgres.search import SearchVectorField +from django.contrib.postgres.indexes import GinIndex from galaxy_ng.app.models import Namespace from galaxy_ng.app.models.auth import User @@ -185,6 +187,19 @@ class LegacyRoleDownloadCount(models.Model): count = models.IntegerField(default=0) +class LegacyRoleSearchVector(models.Model): + role = models.OneToOneField( + LegacyRole, + on_delete=models.CASCADE, + primary_key=True, + ) + search_vector = SearchVectorField(default="") + modified = models.DateTimeField(auto_now=True) + + class Meta: + indexes = (GinIndex(fields=["search_vector"]),) + + class LegacyRoleImport(models.Model): role = models.ForeignKey( 'LegacyRole', diff --git a/galaxy_ng/app/migrations/0046_legacyrolesearchvector.py b/galaxy_ng/app/migrations/0046_legacyrolesearchvector.py new file mode 100644 index 0000000000..b5db78fd11 --- /dev/null +++ b/galaxy_ng/app/migrations/0046_legacyrolesearchvector.py @@ -0,0 +1,38 @@ +# Generated by Django 4.2.7 on 2023-11-15 15:52 + +import django.contrib.postgres.indexes +import django.contrib.postgres.search +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + dependencies = [ + ("galaxy", "0045_setting"), + ] + + operations = [ + migrations.CreateModel( + name="LegacyRoleSearchVector", + fields=[ + ( + "role", + models.OneToOneField( + on_delete=django.db.models.deletion.CASCADE, + primary_key=True, + serialize=False, + to="galaxy.legacyrole", + ), + ), + ("search_vector", django.contrib.postgres.search.SearchVectorField(default="")), + ("modified", models.DateTimeField(auto_now=True)), + ], + options={ + "indexes": [ + django.contrib.postgres.indexes.GinIndex( + fields=["search_vector"], name="galaxy_lega_search__13e661_gin" + ) + ], + }, + ), + ] diff --git a/galaxy_ng/app/migrations/0047_update_role_search_vector_trigger.py b/galaxy_ng/app/migrations/0047_update_role_search_vector_trigger.py new file mode 100755 index 0000000000..e8b686709f --- /dev/null +++ b/galaxy_ng/app/migrations/0047_update_role_search_vector_trigger.py @@ -0,0 +1,64 @@ +# Generated by Django 4.2.7 on 2023-11-15 15:55 + +from django.db import migrations + +REBUILD_ROLES_TS_VECTOR = """ +UPDATE galaxy_legacyrole SET name = name; +""" + +CREATE_ROLE_TS_VECTOR_TRIGGER = """ +CREATE OR REPLACE FUNCTION update_role_ts_vector() + RETURNS TRIGGER + AS $$ +DECLARE + _search_vector tsvector; + _namespace text; +BEGIN + _namespace := (select name from galaxy_legacynamespace WHERE id = NEW.namespace_id); + _search_vector := (((( + setweight(to_tsvector(COALESCE(_namespace, '')), 'A') + || setweight(to_tsvector(COALESCE(NEW."name", '')), 'A')) + || setweight(to_tsvector(COALESCE(((NEW."full_metadata"->'tags'))::text, '')), 'B')) + || setweight(to_tsvector(COALESCE(((NEW."full_metadata"->'platforms'))::text, '')), 'C')) + || setweight(to_tsvector(COALESCE((NEW."full_metadata"->>'description'), '')), 'D')); + + INSERT INTO galaxy_legacyrolesearchvector(role_id,search_vector,modified) + VALUES(new.id,_search_vector,current_timestamp) + ON CONFLICT (role_id) + DO UPDATE SET + search_vector = _search_vector, modified = current_timestamp; + RETURN NEW; +END; +$$ +LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS update_ts_vector ON galaxy_legacyrole; + +CREATE TRIGGER update_ts_vector + AFTER INSERT OR UPDATE + ON galaxy_legacyrole + FOR EACH ROW +EXECUTE PROCEDURE update_role_ts_vector(); +""" + +DROP_ROLE_TS_VECTOR_TRIGGER = """ +DROP TRIGGER IF EXISTS update_ts_vector ON galaxy_legacyrole; +DROP FUNCTION IF EXISTS update_role_ts_vector(); +""" + + +class Migration(migrations.Migration): + dependencies = [ + ("galaxy", "0046_legacyrolesearchvector"), + ] + + operations = [ + migrations.RunSQL( + sql=CREATE_ROLE_TS_VECTOR_TRIGGER, + reverse_sql=DROP_ROLE_TS_VECTOR_TRIGGER, + ), + migrations.RunSQL( + sql=REBUILD_ROLES_TS_VECTOR, + reverse_sql=migrations.RunSQL.noop, + ) + ] diff --git a/galaxy_ng/tests/integration/community/test_search.py b/galaxy_ng/tests/integration/community/test_search.py index 818dbaace3..d4cf4c460b 100644 --- a/galaxy_ng/tests/integration/community/test_search.py +++ b/galaxy_ng/tests/integration/community/test_search.py @@ -119,7 +119,9 @@ def test_namespace_with_sql_search(admin_client): @pytest.mark.deployment_community def test_name_with_sql_search(admin_client): """Test search.""" - name = admin_client(f"/api/_ui/v1/search/?namespace=ansible&name={COLLECTION_NAME}") + name = admin_client( + f"/api/_ui/v1/search/?search_type=sql&namespace=ansible&name={COLLECTION_NAME}" + ) assert name["meta"]["count"] == 1 assert name["data"][0]["name"] == COLLECTION_NAME assert name["data"][0]["namespace"] == NAMESPACE_NAME @@ -131,7 +133,7 @@ def test_name_with_sql_search(admin_client): @pytest.mark.deployment_community def test_tags_with_sql_search(admin_client): """Test search.""" - tag_url = "/api/_ui/v1/search/?namespace=ansible" + tag_url = "/api/_ui/v1/search/?search_type=sql&namespace=ansible" for tag in COLLECTION_TAGS: tag_url += f"&tags={tag}" tags = admin_client(tag_url) @@ -146,7 +148,7 @@ def test_tags_with_sql_search(admin_client): @pytest.mark.deployment_community def test_type_with_sql_search(admin_client): """Test search.""" - content_type = admin_client("/api/_ui/v1/search/?namespace=ansible&type=role") + content_type = admin_client("/api/_ui/v1/search/?search_type=sql&namespace=ansible&type=role") assert content_type["meta"]["count"] == 1 assert content_type["data"][0]["name"] == ROLE_NAME assert content_type["data"][0]["namespace"] == NAMESPACE_NAME @@ -158,7 +160,7 @@ def test_type_with_sql_search(admin_client): @pytest.mark.deployment_community def test_platform_with_sql_search(admin_client): """Test search.""" - platform = admin_client("/api/_ui/v1/search/?namespace=ansible&platform=fedora") + platform = admin_client("/api/_ui/v1/search/?search_type=sql&namespace=ansible&platform=fedora") assert platform["meta"]["count"] == 1 assert platform["data"][0]["name"] == ROLE_NAME assert platform["data"][0]["namespace"] == NAMESPACE_NAME @@ -170,14 +172,18 @@ def test_platform_with_sql_search(admin_client): @pytest.mark.deployment_community def test_deprecated_with_sql_search(admin_client): """Test search.""" - deprecated = admin_client("/api/_ui/v1/search/?namespace=ansible&deprecated=true") + deprecated = admin_client( + "/api/_ui/v1/search/?search_type=sql&namespace=ansible&deprecated=true" + ) assert deprecated["meta"]["count"] == 0 @pytest.mark.deployment_community def test_keywords_with_sql_search(admin_client): """Test search.""" - keywords = admin_client("/api/_ui/v1/search/?namespace=ansible&keywords=infinidash") + keywords = admin_client( + "/api/_ui/v1/search/?search_type=sql&namespace=ansible&keywords=infinidash" + ) assert keywords["meta"]["count"] == 1 assert keywords["data"][0]["name"] == COLLECTION_NAME assert keywords["data"][0]["namespace"] == NAMESPACE_NAME @@ -189,7 +195,9 @@ def test_keywords_with_sql_search(admin_client): @pytest.mark.deployment_community def test_sorting_with_sql_search(admin_client): """Test search.""" - sorting = admin_client("/api/_ui/v1/search/?namespace=ansible&order_by=-last_updated") + sorting = admin_client( + "/api/_ui/v1/search/?search_type=sql&namespace=ansible&order_by=-last_updated" + ) assert sorting["meta"]["count"] == 2 assert sorting["data"][0]["type"] == "role" assert sorting["data"][1]["type"] == "collection" @@ -198,7 +206,7 @@ def test_sorting_with_sql_search(admin_client): @pytest.mark.deployment_community def test_facets_with_web_search(admin_client): """Search using vector websearch""" - namespace = admin_client("/api/_ui/v1/search/?namespace=ansible&search_type=websearch") + namespace = admin_client("/api/_ui/v1/search/?namespace=ansible") assert namespace["meta"]["count"] == 2 # IMPORTANT: Keep filtering by namespace to avoid including content from other tests @@ -207,9 +215,7 @@ def test_facets_with_web_search(admin_client): @pytest.mark.deployment_community def test_name_with_web_search(admin_client): """Search using vector websearch""" - name = admin_client( - f"/api/_ui/v1/search/?namespace=ansible&search_type=websearch&name={COLLECTION_NAME}" - ) + name = admin_client(f"/api/_ui/v1/search/?namespace=ansible&name={COLLECTION_NAME}") assert name["meta"]["count"] == 1 assert name["data"][0]["name"] == COLLECTION_NAME assert name["data"][0]["namespace"] == NAMESPACE_NAME @@ -221,7 +227,7 @@ def test_name_with_web_search(admin_client): @pytest.mark.deployment_community def test_tags_with_web_search(admin_client): """Search using vector websearch""" - tag_url = "/api/_ui/v1/search/?namespace=ansible&search_type=websearch" + tag_url = "/api/_ui/v1/search/?namespace=ansible" for tag in COLLECTION_TAGS: tag_url += f"&tags={tag}" tags = admin_client(tag_url) @@ -236,9 +242,7 @@ def test_tags_with_web_search(admin_client): @pytest.mark.deployment_community def test_type_with_web_search(admin_client): """Search using vector websearch""" - content_type = admin_client( - "/api/_ui/v1/search/?namespace=ansible&search_type=websearch&type=role" - ) + content_type = admin_client("/api/_ui/v1/search/?namespace=ansible&type=role") assert content_type["meta"]["count"] == 1 assert content_type["data"][0]["name"] == ROLE_NAME assert content_type["data"][0]["namespace"] == NAMESPACE_NAME @@ -250,9 +254,7 @@ def test_type_with_web_search(admin_client): @pytest.mark.deployment_community def test_platform_with_web_search(admin_client): """Search using vector websearch""" - platform = admin_client( - "/api/_ui/v1/search/?namespace=ansible&search_type=websearch&platform=fedora" - ) + platform = admin_client("/api/_ui/v1/search/?namespace=ansible&platform=fedora") assert platform["meta"]["count"] == 1 assert platform["data"][0]["name"] == ROLE_NAME assert platform["data"][0]["namespace"] == NAMESPACE_NAME @@ -264,18 +266,14 @@ def test_platform_with_web_search(admin_client): @pytest.mark.deployment_community def test_deprecated_with_web_search(admin_client): """Search using vector websearch""" - deprecated = admin_client( - "/api/_ui/v1/search/?namespace=ansible&search_type=websearch&deprecated=true" - ) + deprecated = admin_client("/api/_ui/v1/search/?namespace=ansible&deprecated=true") assert deprecated["meta"]["count"] == 0 @pytest.mark.deployment_community def test_keywords_with_web_search(admin_client): """Search using vector websearch""" - keywords = admin_client( - "/api/_ui/v1/search/?namespace=ansible&search_type=websearch&keywords=infinidash" - ) + keywords = admin_client("/api/_ui/v1/search/?namespace=ansible&keywords=infinidash") assert keywords["meta"]["count"] == 1 assert keywords["data"][0]["name"] == COLLECTION_NAME assert keywords["data"][0]["namespace"] == NAMESPACE_NAME @@ -287,9 +285,7 @@ def test_keywords_with_web_search(admin_client): @pytest.mark.deployment_community def test_sorting_with_web_search(admin_client): """Search using vector websearch""" - sorting = admin_client( - "/api/_ui/v1/search/?namespace=ansible&search_type=websearch&order_by=-last_updated" - ) + sorting = admin_client("/api/_ui/v1/search/?namespace=ansible&order_by=-last_updated") assert sorting["meta"]["count"] == 2 assert sorting["data"][0]["type"] == "role" assert sorting["data"][1]["type"] == "collection" @@ -304,9 +300,7 @@ def test_compound_query_with_web_search(admin_client): "infinidash%20OR%20java", "api%20-kubernetes", ]: - websearch = admin_client( - f"/api/_ui/v1/search/?namespace=ansible&search_type=websearch&keywords={term}" - ) + websearch = admin_client(f"/api/_ui/v1/search/?namespace=ansible&keywords={term}") assert websearch["meta"]["count"] == 1 assert websearch["data"][0]["name"] == COLLECTION_NAME assert websearch["data"][0]["namespace"] == NAMESPACE_NAME @@ -320,9 +314,7 @@ def test_compound_query_with_web_search(admin_client): "kubernetes%20OR%20java", "api%20-infinidash", ]: - websearch = admin_client( - f"/api/_ui/v1/search/?namespace=ansible&search_type=websearch&keywords={term}" - ) + websearch = admin_client(f"/api/_ui/v1/search/?namespace=ansible&keywords={term}") assert websearch["meta"]["count"] == 1 assert websearch["data"][0]["name"] == ROLE_NAME assert websearch["data"][0]["namespace"] == NAMESPACE_NAME @@ -336,8 +328,7 @@ def test_relevance_with_web_search(admin_client): """Search using vector websearch""" # Both has api tag and fedora term as a platform for role and description for collection keywords = admin_client( - "/api/_ui/v1/search/?namespace=ansible&search_type=websearch" - "&keywords=api%20AND%20fedora&order_by=-relevance" + "/api/_ui/v1/search/?namespace=ansible" "&keywords=api%20AND%20fedora&order_by=-relevance" ) assert keywords["meta"]["count"] == 2 assert keywords["data"][0]["name"] == ROLE_NAME