diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2ca6b5e..d70fbfe 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,7 @@ repos: # https://github.com/pre-commit/pre-commit-hooks#pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 + rev: v4.6.0 hooks: - id: check-yaml - id: end-of-file-fixer @@ -9,12 +9,10 @@ repos: - id: check-added-large-files - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.0 # Ruff version + rev: v0.5.4 # Ruff version hooks: + # Run the linter - id: ruff - args: [--fix, --exit-non-zero-on-fix] - - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.0 # Ruff version - hooks: + args: [--fix, --exit-non-zero-on-fix, --select, I] + # Run the formatter - id: ruff-format diff --git a/README.md b/README.md index 9ad7d3f..ed0d73a 100644 --- a/README.md +++ b/README.md @@ -115,6 +115,12 @@ uvicorn api:app --reload ## Lint and format the code +Before contributing to the repository, it is necessary to initialize the pre-commit hooks: +```bash +pre-commit install +``` +Once this is done, code formatting and linting, as well as import sorting, will be automatically checked before each commit. + Lint with: ```bash rye lint --fix diff --git a/api.py b/api.py index ef67106..ed9ba1d 100644 --- a/api.py +++ b/api.py @@ -34,7 +34,7 @@ DESCRIPTION: str = config["project"]["description"] VERSION: str = config["project"]["version"] -if ENVIRONMENT != "local": +if ENVIRONMENT not in ["local", "test"]: sentry_sdk.init( dsn=SENTRY_DSN, release=f"{APP_NAME}@{VERSION}", @@ -43,6 +43,7 @@ # of transactions for performance monitoring. # Sentry recommend adjusting this value in production, traces_sample_rate=1.0, + profiles_sample_rate=1.0, # Experimental profiling _experiments={ "profiles_sample_rate": 1.0, @@ -96,9 +97,7 @@ async def generate_word(request: Request, lang: str) -> dict[str, str | None] | @app.get("/{lang}/word/get", tags=["word"]) @limiter.limit("20/minute") -async def get_random_word_from_db( - request: Request, lang: str -) -> dict[str, str | None] | None: +async def get_random_word_from_db(request: Request, lang: str) -> dict[str, str | None] | None: """ Get a random generated word from DB. """ @@ -141,9 +140,7 @@ async def generate_definition(request: Request, lang: str) -> dict[str, str] | N @app.get("/{lang}/definition/get", tags=["definition"]) @limiter.limit("20/minute") -async def get_random_definition_from_db( - request: Request, lang: str -) -> dict[str, str] | None: +async def get_random_definition_from_db(request: Request, lang: str) -> dict[str, str] | None: """ Get a random generated definition from DB. """ @@ -175,9 +172,7 @@ async def get_random_definition_from_db( @app.get("/{lang}/alter", tags=["alter"]) @limiter.limit("6/minute") -async def alter_text( - request: Request, lang: str, text: str, percentage: float | None = 0.4 -) -> str: +async def alter_text(request: Request, lang: str, text: str, percentage: float | None = 0.4) -> str: """ Alter a text with random non existing words. """ diff --git a/commands/build_proba_file.py b/commands/build_proba_file.py index 7f3de42..d4bb2d4 100644 --- a/commands/build_proba_file.py +++ b/commands/build_proba_file.py @@ -49,9 +49,7 @@ def build_2char_probabilities( temp2: dict = alphabet_dict | temp temp3: dict = alphabet_dict | {"last_letter": 0} - probabilities: dict = {"first_letter": alphabet_dict} | { - chars: temp3.copy() for chars in temp2 - } + probabilities: dict = {"first_letter": alphabet_dict} | {chars: temp3.copy() for chars in temp2} # Populate the dictionary with probabilities with open(dictionary_filepath, "r", encoding="utf-8") as dictionary: @@ -97,17 +95,12 @@ def build_chars_probability_file(lang: str, chars_nb: int = 2) -> None: async def _main(): current_path = Path(__file__).parent.absolute() - with open( - current_path / f"../{lang}/data/alphabet_{lang.upper()}.json" - ) as infile: + with open(current_path / f"../{lang}/data/alphabet_{lang.upper()}.json") as infile: alphabet: list[str] = json.load(infile) - dictionary_filepath: Path = ( - current_path / f"../{lang}/data/dictionary_{lang.upper()}.txt" - ) + dictionary_filepath: Path = current_path / f"../{lang}/data/dictionary_{lang.upper()}.txt" json_filepath: Path = ( - current_path - / f"../{lang}/data/proba_table_{chars_nb}char_{lang.upper()}.json" + current_path / f"../{lang}/data/proba_table_{chars_nb}char_{lang.upper()}.json" ) if chars_nb == 1: diff --git a/commands/clean_db_generated.py b/commands/clean_db_generated.py index 15f72c7..cc9678e 100755 --- a/commands/clean_db_generated.py +++ b/commands/clean_db_generated.py @@ -21,9 +21,7 @@ async def _main(): existing = await RealWordEN.filter(string=entry.string) if existing: i += 1 - typer.secho( - f'"{entry.string}" exists as a real word. Deleting.', fg="cyan" - ) + typer.secho(f'"{entry.string}" exists as a real word. Deleting.', fg="cyan") await entry.delete() else: continue @@ -33,9 +31,7 @@ async def _main(): existing = await RealWordFR.filter(string=entry.string) if existing: i += 1 - typer.secho( - f'"{entry.string}" exists as a real word. Deleting.', fg="cyan" - ) + typer.secho(f'"{entry.string}" exists as a real word. Deleting.', fg="cyan") await entry.delete() else: continue diff --git a/commands/clean_db_real.py b/commands/clean_db_real.py index 7bbe78a..07f6519 100755 --- a/commands/clean_db_real.py +++ b/commands/clean_db_real.py @@ -28,9 +28,7 @@ async def _main(): for j, entry in enumerate(await real_word_class.objects.all()): if ".ADV" in entry.string: i += 1 - typer.secho( - f'"{entry.string}" have .ADV in string, fixing...', fg="cyan" - ) + typer.secho(f'"{entry.string}" have .ADV in string, fixing...', fg="cyan") replacement = entry.string.replace(".ADV", "") try: await entry.update( @@ -61,9 +59,7 @@ async def _main(): else: continue - typer.secho( - f'"{i}/{j}" real words had .ADV in string and were fixed.', fg="green" - ) + typer.secho(f'"{i}/{j}" real words had .ADV in string and were fixed.', fg="green") aiorun(_main()) diff --git a/commands/dictionary_to_db.py b/commands/dictionary_to_db.py index 55ec193..9fc4caf 100755 --- a/commands/dictionary_to_db.py +++ b/commands/dictionary_to_db.py @@ -57,9 +57,7 @@ async def _main(): proper=False, ) else: - await RealWordFR.objects.create( - string=word, proper=False - ) + await RealWordFR.objects.create(string=word, proper=False) except Exception as e: typer.secho(f"{word}", fg="red") typer.secho(e, fg="red") diff --git a/commands/tweet.py b/commands/tweet.py index dcdb6da..61a1e76 100755 --- a/commands/tweet.py +++ b/commands/tweet.py @@ -5,7 +5,7 @@ import typer from common import prepare_db -from config import SENTRY_DSN, SENTRY_CRON_MONITOR_ID, TWITTER +from config import SENTRY_CRON_MONITOR_ID, SENTRY_DSN, TWITTER from en import generate_tweet_en from fr import generate_tweet_fr diff --git a/common/generate_word.py b/common/generate_word.py index 1432026..1d9b7f5 100644 --- a/common/generate_word.py +++ b/common/generate_word.py @@ -11,7 +11,9 @@ async def generate_word_and_save(lang: str, ip: str) -> dict | None: - already_generated = True # assume it has already been generated so we enter the while loop at least once + already_generated = ( + True # assume it has already been generated so we enter the while loop at least once + ) retries = 0 while already_generated and retries < 10: try: @@ -93,9 +95,7 @@ def _generate_word_core(json_proba_file: str) -> str: word = char1 # for _ in probas[char1]: # choose second char - char2 = random.choices( - list(probas[char1].keys()), weights=list(probas[char1].values()), k=1 - )[0] + char2 = random.choices(list(probas[char1].keys()), weights=list(probas[char1].values()), k=1)[0] if char2 == "last_letter": # it's a 1-letter word return word diff --git a/en/alter_text.py b/en/alter_text.py index 0c0bcc7..f1f0d88 100644 --- a/en/alter_text.py +++ b/en/alter_text.py @@ -23,9 +23,7 @@ } -async def alter_text_en( - text: str, percentage: float, forced_replacements: dict | None = {} -) -> str: +async def alter_text_en(text: str, percentage: float, forced_replacements: dict | None = {}) -> str: """ Alter a text randomly using NLTK POS tagging. See https://www.guru99.com/pos-tagging-chunking-nltk.html diff --git a/fr/alter_text.py b/fr/alter_text.py index eaa327a..db1961d 100644 --- a/fr/alter_text.py +++ b/fr/alter_text.py @@ -59,9 +59,7 @@ def create_melt_tagger(nlp, name): } -async def alter_text_fr( - text: str, percentage: float, forced_replacements: dict | None = {} -) -> str: +async def alter_text_fr(text: str, percentage: float, forced_replacements: dict | None = {}) -> str: """ Alter a text randomly using Spacy and Lefff """ diff --git a/fr/classify.py b/fr/classify.py index 80bb50a..5560102 100644 --- a/fr/classify.py +++ b/fr/classify.py @@ -20,8 +20,7 @@ def classify_fr(word: str) -> dict: gender = "f" number = "p" elif (word[-4:] == "ment") and ( - get_char_from_position(word, -5) - in ["a", "e", "é", "i", "î", "ï", "o", "ô", "u", "û"] + get_char_from_position(word, -5) in ["a", "e", "é", "i", "î", "ï", "o", "ô", "u", "û"] ): type = "adverb" elif word[-4:] == "ique": diff --git a/pyproject.toml b/pyproject.toml index 4ba641d..fdc2e57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "word-generator-api" -version = "1.5.6" +version = "1.5.7" description = "Generates words that don't exist but sound English, French, Spanish or Italian, along with their altered dictionary definitions." authors = [{ name = "Adrien Carpentier", email = "me@adriencarpentier.com" }] dependencies = [ @@ -10,7 +10,7 @@ dependencies = [ "nltk<4.0,>=3.7", "requests<3.0.0,>=2.27.1", "slowapi<1.0.0,>=0.1.5", - "sentry-sdk>=1.15.0", + "sentry-sdk>=2.10.0", "spacy<4.0.0,>=3.2.4", "tqdm<5.0.0,>=4.63.0", "tortoise-orm>=0.19.3", @@ -29,19 +29,14 @@ requires = ["pdm-pep517>=1.0.0"] build-backend = "pdm.pep517.api" [tool] -rye = { dev-dependencies = [ - "pre-commit>=3.6.0", - "ruff>=0.2.0", -] } +rye = { dev-dependencies = ["pre-commit>=3.7.1", "ruff>=0.5.4"] } [tool.pdm] includes = [] [tool.pdm.dev-dependencies] -dev = [ - "ruff>=0.1.0", - "pre-commit>=3.5.0", -] +dev = ["pre-commit>=3.7.1", "ruff>=0.5.4"] [tool.ruff] -ignore = ["E501"] +lint = { select = ["I"] } +line-length = 100 diff --git a/requirements-dev.lock b/requirements-dev.lock index c005445..975dfe0 100644 --- a/requirements-dev.lock +++ b/requirements-dev.lock @@ -6,81 +6,253 @@ # features: [] # all-features: false # with-sources: false +# generate-hashes: false +# universal: false -e file:. aiosqlite==0.17.0 -annotated-types==0.6.0 -anyio==4.2.0 + # via tortoise-orm +annotated-types==0.7.0 + # via pydantic +anyio==4.4.0 + # via httpx + # via starlette + # via watchfiles async-timeout==4.0.3 + # via asyncpg asyncpg==0.29.0 + # via word-generator-api black==22.12.0 + # via spacy-lefff blis==0.7.11 + # via thinc catalogue==2.0.10 -certifi==2024.2.2 + # via spacy + # via srsly + # via thinc +certifi==2024.7.4 + # via httpcore + # via httpx + # via requests + # via sentry-sdk cfgv==3.4.0 + # via pre-commit charset-normalizer==3.3.2 + # via requests click==8.1.7 -cloudpathlib==0.16.0 -confection==0.1.4 + # via black + # via nltk + # via typer + # via uvicorn +cloudpathlib==0.18.1 + # via weasel +confection==0.1.5 + # via thinc + # via weasel cymem==2.0.8 + # via preshed + # via spacy + # via thinc deprecated==1.2.14 + # via limits distlib==0.3.8 -fastapi==0.109.2 -filelock==3.13.1 + # via virtualenv +dnspython==2.6.1 + # via email-validator +email-validator==2.2.0 + # via fastapi +fastapi==0.111.1 + # via word-generator-api +fastapi-cli==0.0.4 + # via fastapi +filelock==3.15.4 + # via virtualenv gunicorn==20.1.0 + # via word-generator-api h11==0.14.0 -identify==2.5.34 -idna==3.6 -importlib-resources==6.1.1 + # via httpcore + # via uvicorn +httpcore==1.0.5 + # via httpx +httptools==0.6.1 + # via uvicorn +httpx==0.27.0 + # via fastapi +identify==2.6.0 + # via pre-commit +idna==3.7 + # via anyio + # via email-validator + # via httpx + # via requests +importlib-resources==6.4.0 + # via limits iso8601==1.1.0 -jinja2==3.1.3 -joblib==1.3.2 -langcodes==3.3.0 -limits==3.8.0 + # via tortoise-orm +jinja2==3.1.4 + # via fastapi + # via spacy +joblib==1.4.2 + # via nltk +langcodes==3.4.0 + # via spacy +language-data==1.2.0 + # via langcodes +limits==3.13.0 + # via slowapi +marisa-trie==1.2.0 + # via language-data +markdown-it-py==3.0.0 + # via rich markupsafe==2.1.5 + # via jinja2 +mdurl==0.1.2 + # via markdown-it-py msgpack==0.5.6 + # via spacy-lefff murmurhash==1.0.10 + # via preshed + # via spacy + # via thinc mypy-extensions==1.0.0 + # via black nltk==3.8.1 -nodeenv==1.8.0 + # via word-generator-api +nodeenv==1.9.1 + # via pre-commit numpy==1.26.4 + # via blis + # via spacy + # via thinc oauthlib==3.2.2 -packaging==23.2 + # via requests-oauthlib + # via tweepy +packaging==24.1 + # via limits + # via spacy + # via thinc + # via weasel pathspec==0.12.1 -platformdirs==4.2.0 -pluggy==1.4.0 -pre-commit==3.6.1 + # via black +platformdirs==4.2.2 + # via black + # via virtualenv +pluggy==1.5.0 + # via spacy-lefff +pre-commit==3.7.1 preshed==3.0.9 -pydantic==2.6.1 -pydantic-core==2.16.2 + # via spacy + # via thinc +pydantic==2.8.2 + # via confection + # via fastapi + # via spacy + # via thinc + # via tortoise-orm + # via weasel +pydantic-core==2.20.1 + # via pydantic +pygments==2.18.0 + # via rich pypika-tortoise==0.1.6 + # via tortoise-orm +python-dotenv==1.0.1 + # via uvicorn +python-multipart==0.0.9 + # via fastapi pytz==2024.1 + # via tortoise-orm pyyaml==6.0.1 -regex==2023.12.25 -requests==2.31.0 + # via pre-commit + # via uvicorn +regex==2024.5.15 + # via nltk +requests==2.32.3 + # via requests-oauthlib + # via spacy + # via tweepy + # via weasel + # via word-generator-api requests-oauthlib==1.3.1 -ruff==0.2.1 -sentry-sdk==1.40.4 + # via tweepy +rich==13.7.1 + # via typer +ruff==0.5.4 +sentry-sdk==2.10.0 + # via word-generator-api +setuptools==71.1.0 + # via gunicorn + # via marisa-trie + # via spacy + # via thinc +shellingham==1.5.4 + # via typer slowapi==0.1.9 -smart-open==6.4.0 -sniffio==1.3.0 -spacy==3.7.2 + # via word-generator-api +smart-open==7.0.4 + # via weasel +sniffio==1.3.1 + # via anyio + # via httpx +spacy==3.7.5 + # via spacy-lefff + # via word-generator-api spacy-lefff==0.5.1 + # via word-generator-api spacy-legacy==3.0.12 + # via spacy spacy-loggers==1.0.5 + # via spacy srsly==2.4.8 -starlette==0.36.3 -thinc==8.2.3 -tortoise-orm==0.20.0 -tqdm==4.66.2 + # via confection + # via spacy + # via thinc + # via weasel +starlette==0.37.2 + # via fastapi +thinc==8.2.5 + # via spacy +tortoise-orm==0.21.5 + # via word-generator-api +tqdm==4.66.4 + # via nltk + # via spacy + # via spacy-lefff + # via word-generator-api tweepy==4.14.0 -typer==0.9.0 -typing-extensions==4.9.0 -urllib3==2.2.0 -uvicorn==0.27.1 -virtualenv==20.25.0 -wasabi==1.1.2 -weasel==0.3.4 + # via word-generator-api +typer==0.12.3 + # via fastapi-cli + # via spacy + # via weasel + # via word-generator-api +typing-extensions==4.12.2 + # via aiosqlite + # via fastapi + # via limits + # via pydantic + # via pydantic-core + # via typer +urllib3==2.2.2 + # via requests + # via sentry-sdk +uvicorn==0.30.3 + # via fastapi + # via word-generator-api +uvloop==0.19.0 + # via uvicorn +virtualenv==20.26.3 + # via pre-commit +wasabi==1.1.3 + # via spacy + # via thinc + # via weasel +watchfiles==0.22.0 + # via uvicorn +weasel==0.4.1 + # via spacy +websockets==12.0 + # via uvicorn wrapt==1.16.0 -# The following packages are considered to be unsafe in a requirements file: -setuptools==69.1.0 + # via deprecated + # via smart-open diff --git a/requirements.lock b/requirements.lock index 58c8580..064d776 100644 --- a/requirements.lock +++ b/requirements.lock @@ -6,72 +6,237 @@ # features: [] # all-features: false # with-sources: false +# generate-hashes: false +# universal: false -e file:. aiosqlite==0.17.0 -annotated-types==0.6.0 -anyio==4.2.0 + # via tortoise-orm +annotated-types==0.7.0 + # via pydantic +anyio==4.4.0 + # via httpx + # via starlette + # via watchfiles async-timeout==4.0.3 + # via asyncpg asyncpg==0.29.0 + # via word-generator-api black==22.12.0 + # via spacy-lefff blis==0.7.11 + # via thinc catalogue==2.0.10 -certifi==2024.2.2 + # via spacy + # via srsly + # via thinc +certifi==2024.7.4 + # via httpcore + # via httpx + # via requests + # via sentry-sdk charset-normalizer==3.3.2 + # via requests click==8.1.7 -cloudpathlib==0.16.0 -confection==0.1.4 + # via black + # via nltk + # via typer + # via uvicorn +cloudpathlib==0.18.1 + # via weasel +confection==0.1.5 + # via thinc + # via weasel cymem==2.0.8 + # via preshed + # via spacy + # via thinc deprecated==1.2.14 -fastapi==0.109.2 + # via limits +dnspython==2.6.1 + # via email-validator +email-validator==2.2.0 + # via fastapi +fastapi==0.111.1 + # via word-generator-api +fastapi-cli==0.0.4 + # via fastapi gunicorn==20.1.0 + # via word-generator-api h11==0.14.0 -idna==3.6 -importlib-resources==6.1.1 + # via httpcore + # via uvicorn +httpcore==1.0.5 + # via httpx +httptools==0.6.1 + # via uvicorn +httpx==0.27.0 + # via fastapi +idna==3.7 + # via anyio + # via email-validator + # via httpx + # via requests +importlib-resources==6.4.0 + # via limits iso8601==1.1.0 -jinja2==3.1.3 -joblib==1.3.2 -langcodes==3.3.0 -limits==3.8.0 + # via tortoise-orm +jinja2==3.1.4 + # via fastapi + # via spacy +joblib==1.4.2 + # via nltk +langcodes==3.4.0 + # via spacy +language-data==1.2.0 + # via langcodes +limits==3.13.0 + # via slowapi +marisa-trie==1.2.0 + # via language-data +markdown-it-py==3.0.0 + # via rich markupsafe==2.1.5 + # via jinja2 +mdurl==0.1.2 + # via markdown-it-py msgpack==0.5.6 + # via spacy-lefff murmurhash==1.0.10 + # via preshed + # via spacy + # via thinc mypy-extensions==1.0.0 + # via black nltk==3.8.1 + # via word-generator-api numpy==1.26.4 + # via blis + # via spacy + # via thinc oauthlib==3.2.2 -packaging==23.2 + # via requests-oauthlib + # via tweepy +packaging==24.1 + # via limits + # via spacy + # via thinc + # via weasel pathspec==0.12.1 -platformdirs==4.2.0 -pluggy==1.4.0 + # via black +platformdirs==4.2.2 + # via black +pluggy==1.5.0 + # via spacy-lefff preshed==3.0.9 -pydantic==2.6.1 -pydantic-core==2.16.2 + # via spacy + # via thinc +pydantic==2.8.2 + # via confection + # via fastapi + # via spacy + # via thinc + # via tortoise-orm + # via weasel +pydantic-core==2.20.1 + # via pydantic +pygments==2.18.0 + # via rich pypika-tortoise==0.1.6 + # via tortoise-orm +python-dotenv==1.0.1 + # via uvicorn +python-multipart==0.0.9 + # via fastapi pytz==2024.1 -regex==2023.12.25 -requests==2.31.0 + # via tortoise-orm +pyyaml==6.0.1 + # via uvicorn +regex==2024.5.15 + # via nltk +requests==2.32.3 + # via requests-oauthlib + # via spacy + # via tweepy + # via weasel + # via word-generator-api requests-oauthlib==1.3.1 -sentry-sdk==1.40.4 + # via tweepy +rich==13.7.1 + # via typer +sentry-sdk==2.10.0 + # via word-generator-api +setuptools==71.1.0 + # via gunicorn + # via marisa-trie + # via spacy + # via thinc +shellingham==1.5.4 + # via typer slowapi==0.1.9 -smart-open==6.4.0 -sniffio==1.3.0 -spacy==3.7.2 + # via word-generator-api +smart-open==7.0.4 + # via weasel +sniffio==1.3.1 + # via anyio + # via httpx +spacy==3.7.5 + # via spacy-lefff + # via word-generator-api spacy-lefff==0.5.1 + # via word-generator-api spacy-legacy==3.0.12 + # via spacy spacy-loggers==1.0.5 + # via spacy srsly==2.4.8 -starlette==0.36.3 -thinc==8.2.3 -tortoise-orm==0.20.0 -tqdm==4.66.2 + # via confection + # via spacy + # via thinc + # via weasel +starlette==0.37.2 + # via fastapi +thinc==8.2.5 + # via spacy +tortoise-orm==0.21.5 + # via word-generator-api +tqdm==4.66.4 + # via nltk + # via spacy + # via spacy-lefff + # via word-generator-api tweepy==4.14.0 -typer==0.9.0 -typing-extensions==4.9.0 -urllib3==2.2.0 -uvicorn==0.27.1 -wasabi==1.1.2 -weasel==0.3.4 + # via word-generator-api +typer==0.12.3 + # via fastapi-cli + # via spacy + # via weasel + # via word-generator-api +typing-extensions==4.12.2 + # via aiosqlite + # via fastapi + # via limits + # via pydantic + # via pydantic-core + # via typer +urllib3==2.2.2 + # via requests + # via sentry-sdk +uvicorn==0.30.3 + # via fastapi + # via word-generator-api +uvloop==0.19.0 + # via uvicorn +wasabi==1.1.3 + # via spacy + # via thinc + # via weasel +watchfiles==0.22.0 + # via uvicorn +weasel==0.4.1 + # via spacy +websockets==12.0 + # via uvicorn wrapt==1.16.0 -# The following packages are considered to be unsafe in a requirements file: -setuptools==69.1.0 + # via deprecated + # via smart-open