chore: update some deps, add docs, lint and format code

bolinocroustibat · Jul 22, 2024 · e4540b3 · e4540b3
1 parent 9f5a5a8
commit e4540b3
Show file tree

Hide file tree

Showing 15 changed files with 452 additions and 143 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,20 +1,18 @@
 repos:
   # https://github.com/pre-commit/pre-commit-hooks#pre-commit-hooks
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.5.0
+    rev: v4.6.0
     hooks:
       - id: check-yaml
       - id: end-of-file-fixer
       - id: trailing-whitespace
       - id: check-added-large-files
 
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.0 # Ruff version
+    rev: v0.5.4 # Ruff version
     hooks:
+      # Run the linter
       - id: ruff
-        args: [--fix, --exit-non-zero-on-fix]
-
-  - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.0 # Ruff version
-    hooks:
+        args: [--fix, --exit-non-zero-on-fix, --select, I]
+      # Run the formatter
       - id: ruff-format
diff --git a/README.md b/README.md
@@ -115,6 +115,12 @@ uvicorn api:app --reload
 
 ## Lint and format the code
 
+Before contributing to the repository, it is necessary to initialize the pre-commit hooks:
+```bash
+pre-commit install
+```
+Once this is done, code formatting and linting, as well as import sorting, will be automatically checked before each commit.
+
 Lint with:
 ```bash
 rye lint --fix

diff --git a/api.py b/api.py
@@ -34,7 +34,7 @@
 DESCRIPTION: str = config["project"]["description"]
 VERSION: str = config["project"]["version"]
 
-if ENVIRONMENT != "local":
+if ENVIRONMENT not in ["local", "test"]:
     sentry_sdk.init(
         dsn=SENTRY_DSN,
         release=f"{APP_NAME}@{VERSION}",
@@ -43,6 +43,7 @@
         # of transactions for performance monitoring.
         # Sentry recommend adjusting this value in production,
         traces_sample_rate=1.0,
+        profiles_sample_rate=1.0,
         # Experimental profiling
         _experiments={
             "profiles_sample_rate": 1.0,
@@ -96,9 +97,7 @@ async def generate_word(request: Request, lang: str) -> dict[str, str | None] |
 
 @app.get("/{lang}/word/get", tags=["word"])
 @limiter.limit("20/minute")
-async def get_random_word_from_db(
-    request: Request, lang: str
-) -> dict[str, str | None] | None:
+async def get_random_word_from_db(request: Request, lang: str) -> dict[str, str | None] | None:
     """
     Get a random generated word from DB.
     """
@@ -141,9 +140,7 @@ async def generate_definition(request: Request, lang: str) -> dict[str, str] | N
 
 @app.get("/{lang}/definition/get", tags=["definition"])
 @limiter.limit("20/minute")
-async def get_random_definition_from_db(
-    request: Request, lang: str
-) -> dict[str, str] | None:
+async def get_random_definition_from_db(request: Request, lang: str) -> dict[str, str] | None:
     """
     Get a random generated definition from DB.
     """
@@ -175,9 +172,7 @@ async def get_random_definition_from_db(
 
 @app.get("/{lang}/alter", tags=["alter"])
 @limiter.limit("6/minute")
-async def alter_text(
-    request: Request, lang: str, text: str, percentage: float | None = 0.4
-) -> str:
+async def alter_text(request: Request, lang: str, text: str, percentage: float | None = 0.4) -> str:
     """
     Alter a text with random non existing words.
     """

diff --git a/commands/build_proba_file.py b/commands/build_proba_file.py
@@ -49,9 +49,7 @@ def build_2char_probabilities(
     temp2: dict = alphabet_dict | temp
     temp3: dict = alphabet_dict | {"last_letter": 0}
 
-    probabilities: dict = {"first_letter": alphabet_dict} | {
-        chars: temp3.copy() for chars in temp2
-    }
+    probabilities: dict = {"first_letter": alphabet_dict} | {chars: temp3.copy() for chars in temp2}
 
     # Populate the dictionary with probabilities
     with open(dictionary_filepath, "r", encoding="utf-8") as dictionary:
@@ -97,17 +95,12 @@ def build_chars_probability_file(lang: str, chars_nb: int = 2) -> None:
     async def _main():
         current_path = Path(__file__).parent.absolute()
 
-        with open(
-            current_path / f"../{lang}/data/alphabet_{lang.upper()}.json"
-        ) as infile:
+        with open(current_path / f"../{lang}/data/alphabet_{lang.upper()}.json") as infile:
             alphabet: list[str] = json.load(infile)
 
-        dictionary_filepath: Path = (
-            current_path / f"../{lang}/data/dictionary_{lang.upper()}.txt"
-        )
+        dictionary_filepath: Path = current_path / f"../{lang}/data/dictionary_{lang.upper()}.txt"
         json_filepath: Path = (
-            current_path
-            / f"../{lang}/data/proba_table_{chars_nb}char_{lang.upper()}.json"
+            current_path / f"../{lang}/data/proba_table_{chars_nb}char_{lang.upper()}.json"
         )
 
         if chars_nb == 1:

diff --git a/commands/clean_db_generated.py b/commands/clean_db_generated.py
@@ -21,9 +21,7 @@ async def _main():
                 existing = await RealWordEN.filter(string=entry.string)
                 if existing:
                     i += 1
-                    typer.secho(
-                        f'"{entry.string}" exists as a real word. Deleting.', fg="cyan"
-                    )
+                    typer.secho(f'"{entry.string}" exists as a real word. Deleting.', fg="cyan")
                     await entry.delete()
                 else:
                     continue
@@ -33,9 +31,7 @@ async def _main():
                 existing = await RealWordFR.filter(string=entry.string)
                 if existing:
                     i += 1
-                    typer.secho(
-                        f'"{entry.string}" exists as a real word. Deleting.', fg="cyan"
-                    )
+                    typer.secho(f'"{entry.string}" exists as a real word. Deleting.', fg="cyan")
                     await entry.delete()
                 else:
                     continue

diff --git a/commands/clean_db_real.py b/commands/clean_db_real.py
@@ -28,9 +28,7 @@ async def _main():
         for j, entry in enumerate(await real_word_class.objects.all()):
             if ".ADV" in entry.string:
                 i += 1
-                typer.secho(
-                    f'"{entry.string}" have .ADV in string, fixing...', fg="cyan"
-                )
+                typer.secho(f'"{entry.string}" have .ADV in string, fixing...', fg="cyan")
                 replacement = entry.string.replace(".ADV", "")
                 try:
                     await entry.update(
@@ -61,9 +59,7 @@ async def _main():
             else:
                 continue
 
-        typer.secho(
-            f'"{i}/{j}" real words had .ADV in string and were fixed.', fg="green"
-        )
+        typer.secho(f'"{i}/{j}" real words had .ADV in string and were fixed.', fg="green")
 
     aiorun(_main())
 

diff --git a/commands/dictionary_to_db.py b/commands/dictionary_to_db.py
@@ -57,9 +57,7 @@ async def _main():
                                         proper=False,
                                     )
                                 else:
-                                    await RealWordFR.objects.create(
-                                        string=word, proper=False
-                                    )
+                                    await RealWordFR.objects.create(string=word, proper=False)
                             except Exception as e:
                                 typer.secho(f"{word}", fg="red")
                                 typer.secho(e, fg="red")

diff --git a/commands/tweet.py b/commands/tweet.py
@@ -5,7 +5,7 @@
 import typer
 
 from common import prepare_db
-from config import SENTRY_DSN, SENTRY_CRON_MONITOR_ID, TWITTER
+from config import SENTRY_CRON_MONITOR_ID, SENTRY_DSN, TWITTER
 from en import generate_tweet_en
 from fr import generate_tweet_fr
 

diff --git a/common/generate_word.py b/common/generate_word.py
@@ -11,7 +11,9 @@
 
 
 async def generate_word_and_save(lang: str, ip: str) -> dict | None:
-    already_generated = True  # assume it has already been generated so we enter the while loop at least once
+    already_generated = (
+        True  # assume it has already been generated so we enter the while loop at least once
+    )
     retries = 0
     while already_generated and retries < 10:
         try:
@@ -93,9 +95,7 @@ def _generate_word_core(json_proba_file: str) -> str:
     word = char1
     # for _ in probas[char1]:
     # choose second char
-    char2 = random.choices(
-        list(probas[char1].keys()), weights=list(probas[char1].values()), k=1
-    )[0]
+    char2 = random.choices(list(probas[char1].keys()), weights=list(probas[char1].values()), k=1)[0]
     if char2 == "last_letter":
         # it's a 1-letter word
         return word

diff --git a/en/alter_text.py b/en/alter_text.py
@@ -23,9 +23,7 @@
 }
 
 
-async def alter_text_en(
-    text: str, percentage: float, forced_replacements: dict | None = {}
-) -> str:
+async def alter_text_en(text: str, percentage: float, forced_replacements: dict | None = {}) -> str:
     """
     Alter a text randomly using NLTK POS tagging.
     See https://www.guru99.com/pos-tagging-chunking-nltk.html

diff --git a/fr/alter_text.py b/fr/alter_text.py
@@ -59,9 +59,7 @@ def create_melt_tagger(nlp, name):
 }
 
 
-async def alter_text_fr(
-    text: str, percentage: float, forced_replacements: dict | None = {}
-) -> str:
+async def alter_text_fr(text: str, percentage: float, forced_replacements: dict | None = {}) -> str:
     """
     Alter a text randomly using Spacy and Lefff
     """

diff --git a/fr/classify.py b/fr/classify.py
@@ -20,8 +20,7 @@ def classify_fr(word: str) -> dict:
         gender = "f"
         number = "p"
     elif (word[-4:] == "ment") and (
-        get_char_from_position(word, -5)
-        in ["a", "e", "é", "i", "î", "ï", "o", "ô", "u", "û"]
+        get_char_from_position(word, -5) in ["a", "e", "é", "i", "î", "ï", "o", "ô", "u", "û"]
     ):
         type = "adverb"
     elif word[-4:] == "ique":

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "word-generator-api"
-version = "1.5.6"
+version = "1.5.7"
 description = "Generates words that don't exist but sound English, French, Spanish or Italian, along with their altered dictionary definitions."
 authors = [{ name = "Adrien Carpentier", email = "[email protected]" }]
 dependencies = [
@@ -10,7 +10,7 @@ dependencies = [
     "nltk<4.0,>=3.7",
     "requests<3.0.0,>=2.27.1",
     "slowapi<1.0.0,>=0.1.5",
-    "sentry-sdk>=1.15.0",
+    "sentry-sdk>=2.10.0",
     "spacy<4.0.0,>=3.2.4",
     "tqdm<5.0.0,>=4.63.0",
     "tortoise-orm>=0.19.3",
@@ -29,19 +29,14 @@ requires = ["pdm-pep517>=1.0.0"]
 build-backend = "pdm.pep517.api"
 
 [tool]
-rye = { dev-dependencies = [
-    "pre-commit>=3.6.0",
-    "ruff>=0.2.0",
-] }
+rye = { dev-dependencies = ["pre-commit>=3.7.1", "ruff>=0.5.4"] }
 
 [tool.pdm]
 includes = []
 
 [tool.pdm.dev-dependencies]
-dev = [
-    "ruff>=0.1.0",
-    "pre-commit>=3.5.0",
-]
+dev = ["pre-commit>=3.7.1", "ruff>=0.5.4"]
 
 [tool.ruff]
-ignore = ["E501"]
+lint = { select = ["I"] }
+line-length = 100