Skip to content

Commit

Permalink
Use pgroonga for search
Browse files Browse the repository at this point in the history
  • Loading branch information
ThirVondukr committed Feb 24, 2024
1 parent ef471c3 commit 7e54b9a
Show file tree
Hide file tree
Showing 17 changed files with 231 additions and 80 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ jobs:
runs-on: ubuntu-latest
services:
postgres:
image: postgres:16.1-bookworm
image: groonga/pgroonga:3.1.7-alpine-16-slim
ports:
- "5432:5432"
env:
Expand Down
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@ coverage.xml
.pdm-python
/.k8s/app/values.dev.yaml
/tiltfile

.local
8 changes: 8 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,11 @@ services:
build: .
entrypoint: ["alembic", "upgrade", "head"]
env_file: .env

postgres:
ports:
- "${POSTGRES_PORT:-5432}:5432"
image: groonga/pgroonga:3.1.7-debian-16
env_file: .env
volumes:
- ".local/postgres:/var/lib/postgresql/data"
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ omit = [
"src/app/db/migrations/*",
"src/app/adapters/cli/seed.py",
]
command_line = "-m pytest -v --reuse-db"
command_line = "-m pytest -vvv --reuse-db"
concurrency = ["greenlet"]
branch = true

Expand Down
12 changes: 12 additions & 0 deletions schema.graphql
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
type AltTitle {
id: ID!
language: LanguageEnum!
title: String!
}

type AuthMutations {
signUp(input: SignUpInput!): SignUpPayload!
signIn(input: SignInInput!): SignInPayload!
Expand All @@ -23,13 +29,19 @@ type InvalidCredentialsError implements Error {
message: String!
}

enum LanguageEnum {
eng
ukr
}

type Manga {
id: ID!
title: String!
titleSlug: String!
createdAt: DateTime!
updatedAt: DateTime!
tags: [MangaTag!]!
altTitles: [AltTitle!]!
}

union MangaCreateError = ValidationErrors
Expand Down
27 changes: 14 additions & 13 deletions src/app/adapters/cli/seed.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,25 @@

async def main() -> None:
async with async_session_factory.begin() as session:
for model in [MangaTag, Manga, AltTitle]:
for model in [AltTitle, MangaTag, Manga]:
await session.execute(delete(model))

async with async_session_factory.begin() as session:
tags = MangaTagFactory.build_batch(size=20)
session.add_all(tags)
mangas = MangaFactory.build_batch(size=1000)
for manga in mangas:
manga.tags = random.sample(
tags,
k=random.randint(1, 5), # noqa: S311
)
manga.alt_titles = MangaAltTitleFactory.build_batch(
size=random.randint(1, 3), # noqa: S311
)

session.add_all(mangas)
await session.flush()
for _ in range(1):
mangas = MangaFactory.build_batch(size=10_000)
for manga in mangas:
manga.tags = random.sample(
tags,
k=random.randint(1, 5), # noqa: S311
)
manga.alt_titles = MangaAltTitleFactory.build_batch(
size=random.randint(1, 3), # noqa: S311
)

session.add_all(mangas)
await session.flush()


if __name__ == "__main__":
Expand Down
23 changes: 7 additions & 16 deletions src/app/adapters/graphql/apps/manga/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from app.adapters.graphql.context import Info
from app.adapters.graphql.dto import DTOMixin
from app.adapters.graphql.types import LanguageGQL
from app.core.domain.manga.loaders import MangaTagLoader
from app.core.domain.manga.loaders import MangaAltTitleLoader, MangaTagLoader
from app.db.models import Manga
from app.db.models._manga import AltTitle, MangaTag

Expand Down Expand Up @@ -74,21 +74,12 @@ async def tags(
)
return MangaTagGQL.from_dto_list(tags)

# @strawberry.field
# async def infos(self, info: Info) -> Sequence[AltTitleGQL]:
# preferred_languages = preferred_languages or []
# preferred_languages = [Language[lang.value] for lang in preferred_languages]
# infos = await info.context.loaders.manga_info_by_manga_id.load(self.id)
# if preferred_languages:
# preferred_infos = [
# info for info in infos if info.language in preferred_languages
# ]
# preferred_infos.sort(
# key=lambda i: preferred_languages.index(i.language),
# )
# infos = preferred_infos or infos
#
# return AltTitleGQL.from_orm_list(infos)
@strawberry.field
async def alt_titles(self, info: Info) -> Sequence[AltTitleGQL]:
alt_titles = await info.context.loaders.map(MangaAltTitleLoader).load(
self._instance.id,
)
return AltTitleGQL.from_dto_list(alt_titles)

#
# @strawberry.field
Expand Down
4 changes: 2 additions & 2 deletions src/app/core/di/_modules/manga.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from app.core.domain.manga.commands import MangaCreateCommand
from app.core.domain.manga.loaders import (
MangaInfoLoader,
MangaAltTitleLoader,
MangaLoader,
MangaTagLoader,
)
Expand All @@ -18,5 +18,5 @@
Scoped(MangaLoader),
Scoped(MangaCreateCommand),
Scoped(MangaTagLoader),
Scoped(MangaInfoLoader),
Scoped(MangaAltTitleLoader),
]
4 changes: 2 additions & 2 deletions src/app/core/domain/manga/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ async def execute(self, keys: Sequence[K]) -> Sequence[Sequence[V]]:
return [models[key] for key in keys]


class MangaInfoLoader(SQLAListLoader[UUID, AltTitle]):
class MangaAltTitleLoader(SQLAListLoader[UUID, AltTitle]):
column = Manga.id
stmt = (
select(Manga.id, AltTitle)
.join(AltTitle.manga)
.order_by(AltTitle.language)
.order_by(AltTitle.language, AltTitle.id)
)


Expand Down
21 changes: 6 additions & 15 deletions src/app/core/domain/manga/repositories.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from uuid import UUID

from sqlalchemy import Select, func, select
from sqlalchemy import Select, func, select, text
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.orm import aliased

Expand Down Expand Up @@ -29,6 +29,7 @@ async def paginate(
filter: MangaFilter,
pagination: PagePaginationParamsDTO,
) -> PagePaginationResultDTO[Manga]:
await self._session.execute(text("set enable_seqscan = off;"))
stmt = self._filter_stmt(filter=filter)
return await page_paginate(
session=self._session,
Expand All @@ -38,22 +39,12 @@ async def paginate(

def _filter_stmt(self, filter: MangaFilter) -> Select[tuple[Manga]]:
stmt = self._base_stmt.group_by(Manga.id).order_by(
Manga.created_at.desc(),
Manga.id.desc(),
Manga.title,
Manga.id,
)
if filter.search_term:
search_term = func.plainto_tsquery(filter.search_term)
stmt = (
stmt.join(Manga.alt_titles, isouter=True)
.group_by(AltTitle.search_ts_vector)
.where(AltTitle.search_ts_vector.op("@@")(search_term))
.order_by(None)
.order_by(
func.ts_rank_cd(
AltTitle.search_ts_vector,
search_term,
).desc(),
)
stmt = stmt.join(Manga.alt_titles, isouter=True).where(
AltTitle.title.op("&@~")(filter.search_term),
)
if filter.tags.include:
include_alias = aliased(MangaTag, name="tags_include")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""
Create pgroonga extension
Revision ID: d49f687766a0
Revises: 7f9bdf00c25c
Create Date: 2024-02-24 21:54:10.518816
"""

from alembic import op

# revision identifiers, used by Alembic.
revision = "d49f687766a0"
down_revision: str | None = "7f9bdf00c25c"
branch_labels: str | None = None
depends_on: str | None = None


def upgrade() -> None:
op.execute("create extension if not exists pgroonga;")


def downgrade() -> None:
op.execute("drop extension pgroonga;")
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
empty message
Revision ID: cf103393a814
Revises: d49f687766a0
Create Date: 2024-02-24 22:10:46.166577
"""

import sqlalchemy as sa
from alembic import op
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = "cf103393a814"
down_revision: str | None = "d49f687766a0"
branch_labels: str | None = None
depends_on: str | None = None


def upgrade() -> None:
op.drop_index(
"ix_manga_info_search_ts_vector",
table_name="manga_alt_title",
postgresql_using="gin",
)
op.create_index(
"ix_manga_info_title_pgroonga",
"manga_alt_title",
["title"],
unique=False,
postgresql_using="pgroonga",
)
op.drop_column("manga_alt_title", "search_ts_vector")
op.drop_column("manga_alt_title", "language_regconfig")


def downgrade() -> None:
op.add_column(
"manga_alt_title",
sa.Column(
"language_regconfig",
sa.NullType(),
autoincrement=False,
nullable=False,
),
)
op.add_column(
"manga_alt_title",
sa.Column(
"search_ts_vector",
postgresql.TSVECTOR(),
sa.Computed(
"to_tsvector(language_regconfig, (COALESCE(title, ''::character varying))::text)",
persisted=True,
),
autoincrement=False,
nullable=False,
),
)
op.drop_index(
"ix_manga_info_title_pgroonga",
table_name="manga_alt_title",
postgresql_using="pgroonga",
)
op.create_index(
"ix_manga_info_search_ts_vector",
"manga_alt_title",
["search_ts_vector"],
unique=False,
postgresql_using="gin",
)
29 changes: 3 additions & 26 deletions src/app/db/models/_manga.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,12 @@

from sqlalchemy import (
Column,
Computed,
ForeignKey,
Index,
String,
Table,
UniqueConstraint,
)
from sqlalchemy.dialects.postgresql import REGCONFIG, TSVECTOR
from sqlalchemy.engine.default import DefaultExecutionContext
from sqlalchemy.orm import (
Mapped,
MappedAsDataclass,
Expand All @@ -25,7 +22,6 @@
HasPrivate,
HasTimestamps,
PkUUID,
RegConfigLanguage,
str_title,
)
from lib.types import Language
Expand Down Expand Up @@ -139,12 +135,6 @@ class MangaPage(PkUUID, MappedAsDataclass, Base, kw_only=True):
image_path: Mapped[str] = mapped_column(String(250))


def _regconfig_default(ctx: DefaultExecutionContext) -> str:
return RegConfigLanguage[
ctx.current_parameters["language"].value # type: ignore[index]
].value


class AltTitle(
PkUUID,
HasTimestamps,
Expand All @@ -155,9 +145,9 @@ class AltTitle(
__tablename__ = "manga_alt_title"
__table_args__ = (
Index(
"ix_manga_info_search_ts_vector",
"search_ts_vector",
postgresql_using="gin",
"ix_manga_info_title_pgroonga",
"title",
postgresql_using="pgroonga",
),
)

Expand All @@ -171,17 +161,4 @@ class AltTitle(
default=None,
)
language: Mapped[Language]
language_regconfig: Mapped[str] = mapped_column(
REGCONFIG,
insert_default=_regconfig_default,
init=False,
)
title: Mapped[str] = mapped_column(String(250))
search_ts_vector: Mapped[str] = mapped_column(
TSVECTOR,
Computed(
"to_tsvector(language_regconfig, coalesce(title, '')",
persisted=True,
),
init=False,
)
Loading

0 comments on commit 7e54b9a

Please sign in to comment.