diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 930186d..a831ac3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,6 +8,7 @@ repos: - id: end-of-file-fixer exclude: "coverage_report/.*" - id: check-yaml + args: [ --unsafe ] - id: check-added-large-files - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.3.4 diff --git a/docs/css/mkdocstrings.css b/docs/css/mkdocstrings.css index f53a26a..72d284c 100644 --- a/docs/css/mkdocstrings.css +++ b/docs/css/mkdocstrings.css @@ -3,3 +3,11 @@ div.doc-contents:not(.first) { border-left: 4px solid rgba(230, 230, 230); margin-bottom: 80px; } + +.x_icon { + color: #ef5552; +} + +.v_icon { + color: #4cae50; +} diff --git a/docs/hooks.py b/docs/hooks.py index f36a49f..2416f11 100644 --- a/docs/hooks.py +++ b/docs/hooks.py @@ -2,11 +2,24 @@ import json import os +from dataclasses import dataclass +from typing import Dict, List from mkdocs.config.defaults import MkDocsConfig from mkdocs.structure.nav import Page +@dataclass +class DynamicEntry: + """Represents a dynamic entry for a data table : the data will be pulled + from the results files. + """ + + name: str + results: Dict + additional_fields: List[str] + + def on_page_markdown(markdown: str, page: Page, config: MkDocsConfig, **kwargs) -> str: """Function that runs before rendering the markdown. @@ -21,58 +34,125 @@ def on_page_markdown(markdown: str, page: Page, config: MkDocsConfig, **kwargs) Returns: str: The updated markdown content. """ - if page.file.src_uri == "leaderboard.md": + if "leaderboards" in page.file.src_uri: lines = markdown.split("\n") entries = [] for line in lines: if line.startswith(">>>"): # This is a line with a path to a result file # -> parse it and extract the results - kb_name, result_file_path = line[3:].split("|") + name, result_file_path, *args = line[3:].split("|") with open(os.path.join(config.docs_dir, result_file_path), "r") as f: res = json.load(f) - entries.append( - { - "kb_name": kb_name, - "score": res["overall_score"], - "nwp": res["next_word_prediction"]["score"]["top3_accuracy"], - "acp": res["auto_completion"]["score"]["top3_accuracy"], - "acr": res["auto_correction"]["score"]["fscore"], - } - ) - - # Sort according to the overall score - entries.sort(reverse=True, key=lambda x: x["score"]) - - # Find the best scores to highlight - best_score = max(entries, key=lambda x: x["score"])["score"] - best_nwp = max(entries, key=lambda x: x["nwp"])["nwp"] - best_acp = max(entries, key=lambda x: x["acp"])["acp"] - best_acr = max(entries, key=lambda x: x["acr"])["acr"] + entries.append(DynamicEntry(name, res, args)) + + # Each leaderboard implements its own render logic + rendered_entries = [None for _ in entries] + if page.file.src_uri.endswith("main.md"): + rendered_entries = render_main(entries) + elif page.file.src_uri.endswith("compare.md"): + rendered_entries = render_compare(entries) # Replace the lines accordingly for i, line in enumerate(lines): if line.startswith(">>>"): - e = entries.pop(0) - - score = f"{round(e['score'], 2):g}" - nwp = f"{round(e['nwp'], 2):g}" - acp = f"{round(e['acp'], 2):g}" - acr = f"{round(e['acr'], 2):g}" - - # Highlight the best scores - if e["score"] == best_score: - score = f"**{score}**" - if e["nwp"] == best_nwp: - nwp = f"**{nwp}**" - if e["acp"] == best_acp: - acp = f"**{acp}**" - if e["acr"] == best_acr: - acr = f"**{acr}**" - - # Overwrite the line - lines[i] = f"| {e['kb_name']} | {score} | {nwp} | {acp} | {acr} |" + lines[i] = rendered_entries.pop(0) return "\n".join(lines) + + +def render_main(entries: List[DynamicEntry]) -> List[str]: + """Code for rendering the leaderboard : `leaderboards/main.md`.""" + # Extract the scores we are going to use + for e in entries: + e.score = e.results["overall_score"] + e.nwp = e.results["next_word_prediction"]["score"]["top3_accuracy"] + e.acp = e.results["auto_completion"]["score"]["top3_accuracy"] + e.acr = e.results["auto_correction"]["score"]["fscore"] + + # Sort entries according to the overall score + entries.sort(reverse=True, key=lambda e: e.score) + + # Find the best scores to highlight for each column + best_score = max(e.score for e in entries) + best_nwp = max(e.nwp for e in entries) + best_acp = max(e.acp for e in entries) + best_acr = max(e.acr for e in entries) + + # Render the entries + rendered_entries = [] + for e in entries: + score = f"{round(e.score, 2):g}" + nwp = f"{round(e.nwp, 2):g}" + acp = f"{round(e.acp, 2):g}" + acr = f"{round(e.acr, 2):g}" + + # Highlight the best scores + if e.score == best_score: + score = f"**{score}**" + if e.nwp == best_nwp: + nwp = f"**{nwp}**" + if e.acp == best_acp: + acp = f"**{acp}**" + if e.acr == best_acr: + acr = f"**{acr}**" + + # Render + rendered_entries.append(f"| {e.name} | {score} | {acr} | {acp} | {nwp} |") + + return rendered_entries + + +def render_compare(entries: List[DynamicEntry]) -> List[str]: + """Code for rendering the leaderboard : `leaderboards/compare.md`.""" + # Extract the scores we are going to use + for e in entries: + e.score = e.results["overall_score"] + e.nwp = e.results["next_word_prediction"]["score"]["top3_accuracy"] + e.acp = e.results["auto_completion"]["score"]["top3_accuracy"] + e.acr_detection = e.results["auto_correction"]["score"]["recall"] + e.acr_frustration = 1 - e.results["auto_correction"]["score"]["precision"] + + # Sort entries according to the overall score + entries.sort(reverse=True, key=lambda e: e.score) + + # Find the best scores to highlight for each column + best_score = max(e.score for e in entries) + best_nwp = max(e.nwp for e in entries) + best_acp = max(e.acp for e in entries) + best_acr_detection = max(e.acr_detection for e in entries) + best_acr_frustration = min(e.acr_frustration for e in entries) + + # Render the entries + rendered_entries = [] + for e in entries: + score = f"{round(e.score * 1000)}" + nwp = f"{round(e.nwp * 100)}%" + acp = f"{round(e.acp * 100)}%" + acr_detection = f"{round(e.acr_detection * 100)}%" + acr_frustration = f"{round(e.acr_frustration * 100)}%" + + # Highlight the best scores + if e.score == best_score: + score = f"**{score}**" + if e.nwp == best_nwp: + nwp = f"**{nwp}**" + if e.acp == best_acp: + acp = f"**{acp}**" + if e.acr_detection == best_acr_detection: + acr_detection = f"**{acr_detection}**" + if e.acr_frustration == best_acr_frustration: + acr_frustration = f"**{acr_frustration}**" + + # Render + additional_fields = " | ".join(e.additional_fields) + if additional_fields != "": + rendered_entries.append( + f"| {e.name} | {score} | {acr_detection} | {acr_frustration} | {acp} | {nwp} | {additional_fields} |" + ) + else: + rendered_entries.append(f"| {e.name} | {score} | {acr_detection} | {acr_frustration} | {acp} | {nwp} |") + + return rendered_entries diff --git a/docs/javascripts/tablesort.js b/docs/javascripts/tablesort.js deleted file mode 100644 index 3319325..0000000 --- a/docs/javascripts/tablesort.js +++ /dev/null @@ -1,6 +0,0 @@ -document$.subscribe(function() { - var tables = document.querySelectorAll("article table:not([class])") - tables.forEach(function(table) { - new Tablesort(table) - }) -}) diff --git a/docs/leaderboard.md b/docs/leaderboard.md deleted file mode 100644 index fbc3fe0..0000000 --- a/docs/leaderboard.md +++ /dev/null @@ -1,26 +0,0 @@ -# Leaderboard - -[//]: # (A bit of explanation is required for this page) -[//]: # (There is a Mkdocs hook (defined in `docs/hooks.py`) that will read the content of this page, extract the path of result files listed here, read their content, and organize their score into a table) - -| Keyboard | Score | Next-word prediction | Auto-completion | Auto-correction | -|---------:|:-----:|:--------------------:|:---------------:|:---------------:| ->>>Fleksy|results/fleksy.json ->>>iOS keyboard|results/ios.json ->>>KeyboardKit Open-source|results/keyboardkit_oss.json ->>>KeyboardKit Pro|results/keyboardkit_pro.json ->>>Gboard|results/gboard.json ->>>Swiftkey|results/swiftkey.json ->>>Tappa keyboard|results/tappa.json ->>>Yandex keyboard|results/yandex.json - -!!! info - The metrics used in this leaderboard are : - - * For next-word prediction : top-3 accuracy - * For auto-completion : top-3 accuracy - * For auto-correction : F-score - - See [Understanding the metrics](how_testing_is_done.md#understanding-the-metrics) for more details. - - The overall score is a _weighted sum_ of each task's score. diff --git a/docs/leaderboards/compare.md b/docs/leaderboards/compare.md new file mode 100644 index 0000000..764706c --- /dev/null +++ b/docs/leaderboards/compare.md @@ -0,0 +1,53 @@ +--- +hide: + - toc +--- + +# Leaderboard + +[//]: # (A bit of explanation is required for this page) +[//]: # (There is a Mkdocs hook (defined in `docs/hooks.py`) that will read the content of this page. Any line starting with `>>>` will be extracted and replaced with the scores found in the corresponding result file.) +[//]: # (The format to follow is : `>>>{name}|{result_file_name}|{optional_additional_fields}`) + +| Keyboard | Overall score | Typo detection rate | Auto-correction frustration rate | Auto-completion success rate | Next-word prediction success rate | SDK available | +|---------:|:-------------:|:-------------------:|:--------------------------------:|:---------------:|:-------------------:|:-------------:| +>>>Fleksy|results/fleksy.json|:fontawesome-solid-circle-check:{ .v_icon } +>>>iOS keyboard|results/ios.json|:fontawesome-regular-circle-xmark:{ .x_icon } +>>>KeyboardKit Open-source|results/keyboardkit_oss.json|:fontawesome-solid-circle-check:{ .v_icon } +>>>KeyboardKit Pro|results/keyboardkit_pro.json|:fontawesome-solid-circle-check:{ .v_icon } +>>>Gboard|results/gboard.json|:fontawesome-regular-circle-xmark:{ .x_icon } +>>>Swiftkey|results/swiftkey.json|:fontawesome-regular-circle-xmark:{ .x_icon } +>>>Tappa|results/tappa.json|:fontawesome-solid-circle-check:{ .v_icon } +>>>Yandex|results/yandex.json|:fontawesome-regular-circle-xmark:{ .x_icon } + +### Metrics + +=== "Overall score" + + A single, general score representing the performances of the keyboard across all tasks. + + :material-trending-up: _Higher is better._ + +=== "Typo detection rate" + + Percentage of typos detected and corrected by the keyboard. + + :material-trending-up: _Higher is better._ + +=== "Auto-correction frustration rate" + + Percentage of words correctly typed, but corrected to something else by the keyboard. + + :material-trending-down: _Lower is better._ + +=== "Auto-completion success rate" + + Percentage of words correctly auto-completed. + + :material-trending-up: _Higher is better._ + +=== "Next-word prediction success rate" + + Percentage of words correctly predicted from the context. + + :material-trending-up: _Higher is better._ diff --git a/docs/leaderboards/main.md b/docs/leaderboards/main.md new file mode 100644 index 0000000..df36ab8 --- /dev/null +++ b/docs/leaderboards/main.md @@ -0,0 +1,29 @@ +# Leaderboard + +[//]: # (A bit of explanation is required for this page) +[//]: # (There is a Mkdocs hook (defined in `docs/hooks.py`) that will read the content of this page. Any line starting with `>>>` will be extracted and replaced with the scores found in the corresponding result file.) +[//]: # (The format to follow is : `>>>{name}|{result_file_name}|{optional_additional_fields}`) + +| Keyboard | Overall
score | Auto-correction | Auto-completion | Next-word prediction | +|---------:|:----------------:|:---------------:|:---------------:|:--------------------:| +>>>Fleksy|results/fleksy.json +>>>iOS keyboard|results/ios.json +>>>KeyboardKit Open-source|results/keyboardkit_oss.json +>>>KeyboardKit Pro|results/keyboardkit_pro.json +>>>Gboard|results/gboard.json +>>>Swiftkey|results/swiftkey.json +>>>Tappa|results/tappa.json +>>>Yandex|results/yandex.json + +--- + +The metrics used in this leaderboard are : + +* Auto-correction : _**F-score**_ +* Auto-completion : _**top-3 accuracy**_ +* Next-word prediction : _**top-3 accuracy**_ + +!!! tip + See [Understanding the metrics](../how_testing_is_done.md#understanding-the-metrics) for more details. + +The overall score is a _**weighted sum**_ of all tasks. diff --git a/mkdocs.yml b/mkdocs.yml index 6dfe5cb..2554c28 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -34,6 +34,12 @@ markdown_extensions: - attr_list - pymdownx.highlight - pymdownx.superfences + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true nav: - Welcome: "index.md" @@ -42,7 +48,7 @@ nav: - "emu_setup.md" - "how_testing_is_done.md" - "architecture.md" - - "leaderboard.md" + - "leaderboards/main.md" - Code reference: - "public_api.md" - "internals.md" @@ -68,7 +74,3 @@ extra: extra_css: - css/mkdocstrings.css - -extra_javascript: - - https://unpkg.com/tablesort@5.3.0/dist/tablesort.min.js - - javascripts/tablesort.js