From ae5155233394fa0dc814d84942b9edf567d8ed77 Mon Sep 17 00:00:00 2001 From: Astariul Date: Tue, 28 May 2024 17:29:26 +0900 Subject: [PATCH 1/3] =?UTF-8?q?=E2=9C=A8=20Add=20another=20leaderboard?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/css/mkdocstrings.css | 8 + docs/hooks.py | 158 +++++++++++++----- docs/javascripts/tablesort.js | 6 - docs/leaderboards/compare.md | 24 +++ docs/{leaderboard.md => leaderboards/main.md} | 13 +- mkdocs.yml | 9 +- 6 files changed, 162 insertions(+), 56 deletions(-) delete mode 100644 docs/javascripts/tablesort.js create mode 100644 docs/leaderboards/compare.md rename docs/{leaderboard.md => leaderboards/main.md} (53%) diff --git a/docs/css/mkdocstrings.css b/docs/css/mkdocstrings.css index f53a26a..72d284c 100644 --- a/docs/css/mkdocstrings.css +++ b/docs/css/mkdocstrings.css @@ -3,3 +3,11 @@ div.doc-contents:not(.first) { border-left: 4px solid rgba(230, 230, 230); margin-bottom: 80px; } + +.x_icon { + color: #ef5552; +} + +.v_icon { + color: #4cae50; +} diff --git a/docs/hooks.py b/docs/hooks.py index f36a49f..2416f11 100644 --- a/docs/hooks.py +++ b/docs/hooks.py @@ -2,11 +2,24 @@ import json import os +from dataclasses import dataclass +from typing import Dict, List from mkdocs.config.defaults import MkDocsConfig from mkdocs.structure.nav import Page +@dataclass +class DynamicEntry: + """Represents a dynamic entry for a data table : the data will be pulled + from the results files. + """ + + name: str + results: Dict + additional_fields: List[str] + + def on_page_markdown(markdown: str, page: Page, config: MkDocsConfig, **kwargs) -> str: """Function that runs before rendering the markdown. @@ -21,58 +34,125 @@ def on_page_markdown(markdown: str, page: Page, config: MkDocsConfig, **kwargs) Returns: str: The updated markdown content. """ - if page.file.src_uri == "leaderboard.md": + if "leaderboards" in page.file.src_uri: lines = markdown.split("\n") entries = [] for line in lines: if line.startswith(">>>"): # This is a line with a path to a result file # -> parse it and extract the results - kb_name, result_file_path = line[3:].split("|") + name, result_file_path, *args = line[3:].split("|") with open(os.path.join(config.docs_dir, result_file_path), "r") as f: res = json.load(f) - entries.append( - { - "kb_name": kb_name, - "score": res["overall_score"], - "nwp": res["next_word_prediction"]["score"]["top3_accuracy"], - "acp": res["auto_completion"]["score"]["top3_accuracy"], - "acr": res["auto_correction"]["score"]["fscore"], - } - ) - - # Sort according to the overall score - entries.sort(reverse=True, key=lambda x: x["score"]) - - # Find the best scores to highlight - best_score = max(entries, key=lambda x: x["score"])["score"] - best_nwp = max(entries, key=lambda x: x["nwp"])["nwp"] - best_acp = max(entries, key=lambda x: x["acp"])["acp"] - best_acr = max(entries, key=lambda x: x["acr"])["acr"] + entries.append(DynamicEntry(name, res, args)) + + # Each leaderboard implements its own render logic + rendered_entries = [None for _ in entries] + if page.file.src_uri.endswith("main.md"): + rendered_entries = render_main(entries) + elif page.file.src_uri.endswith("compare.md"): + rendered_entries = render_compare(entries) # Replace the lines accordingly for i, line in enumerate(lines): if line.startswith(">>>"): - e = entries.pop(0) - - score = f"{round(e['score'], 2):g}" - nwp = f"{round(e['nwp'], 2):g}" - acp = f"{round(e['acp'], 2):g}" - acr = f"{round(e['acr'], 2):g}" - - # Highlight the best scores - if e["score"] == best_score: - score = f"**{score}**" - if e["nwp"] == best_nwp: - nwp = f"**{nwp}**" - if e["acp"] == best_acp: - acp = f"**{acp}**" - if e["acr"] == best_acr: - acr = f"**{acr}**" - - # Overwrite the line - lines[i] = f"| {e['kb_name']} | {score} | {nwp} | {acp} | {acr} |" + lines[i] = rendered_entries.pop(0) return "\n".join(lines) + + +def render_main(entries: List[DynamicEntry]) -> List[str]: + """Code for rendering the leaderboard : `leaderboards/main.md`.""" + # Extract the scores we are going to use + for e in entries: + e.score = e.results["overall_score"] + e.nwp = e.results["next_word_prediction"]["score"]["top3_accuracy"] + e.acp = e.results["auto_completion"]["score"]["top3_accuracy"] + e.acr = e.results["auto_correction"]["score"]["fscore"] + + # Sort entries according to the overall score + entries.sort(reverse=True, key=lambda e: e.score) + + # Find the best scores to highlight for each column + best_score = max(e.score for e in entries) + best_nwp = max(e.nwp for e in entries) + best_acp = max(e.acp for e in entries) + best_acr = max(e.acr for e in entries) + + # Render the entries + rendered_entries = [] + for e in entries: + score = f"{round(e.score, 2):g}" + nwp = f"{round(e.nwp, 2):g}" + acp = f"{round(e.acp, 2):g}" + acr = f"{round(e.acr, 2):g}" + + # Highlight the best scores + if e.score == best_score: + score = f"**{score}**" + if e.nwp == best_nwp: + nwp = f"**{nwp}**" + if e.acp == best_acp: + acp = f"**{acp}**" + if e.acr == best_acr: + acr = f"**{acr}**" + + # Render + rendered_entries.append(f"| {e.name} | {score} | {acr} | {acp} | {nwp} |") + + return rendered_entries + + +def render_compare(entries: List[DynamicEntry]) -> List[str]: + """Code for rendering the leaderboard : `leaderboards/compare.md`.""" + # Extract the scores we are going to use + for e in entries: + e.score = e.results["overall_score"] + e.nwp = e.results["next_word_prediction"]["score"]["top3_accuracy"] + e.acp = e.results["auto_completion"]["score"]["top3_accuracy"] + e.acr_detection = e.results["auto_correction"]["score"]["recall"] + e.acr_frustration = 1 - e.results["auto_correction"]["score"]["precision"] + + # Sort entries according to the overall score + entries.sort(reverse=True, key=lambda e: e.score) + + # Find the best scores to highlight for each column + best_score = max(e.score for e in entries) + best_nwp = max(e.nwp for e in entries) + best_acp = max(e.acp for e in entries) + best_acr_detection = max(e.acr_detection for e in entries) + best_acr_frustration = min(e.acr_frustration for e in entries) + + # Render the entries + rendered_entries = [] + for e in entries: + score = f"{round(e.score * 1000)}" + nwp = f"{round(e.nwp * 100)}%" + acp = f"{round(e.acp * 100)}%" + acr_detection = f"{round(e.acr_detection * 100)}%" + acr_frustration = f"{round(e.acr_frustration * 100)}%" + + # Highlight the best scores + if e.score == best_score: + score = f"**{score}**" + if e.nwp == best_nwp: + nwp = f"**{nwp}**" + if e.acp == best_acp: + acp = f"**{acp}**" + if e.acr_detection == best_acr_detection: + acr_detection = f"**{acr_detection}**" + if e.acr_frustration == best_acr_frustration: + acr_frustration = f"**{acr_frustration}**" + + # Render + additional_fields = " | ".join(e.additional_fields) + if additional_fields != "": + rendered_entries.append( + f"| {e.name} | {score} | {acr_detection} | {acr_frustration} | {acp} | {nwp} | {additional_fields} |" + ) + else: + rendered_entries.append(f"| {e.name} | {score} | {acr_detection} | {acr_frustration} | {acp} | {nwp} |") + + return rendered_entries diff --git a/docs/javascripts/tablesort.js b/docs/javascripts/tablesort.js deleted file mode 100644 index 3319325..0000000 --- a/docs/javascripts/tablesort.js +++ /dev/null @@ -1,6 +0,0 @@ -document$.subscribe(function() { - var tables = document.querySelectorAll("article table:not([class])") - tables.forEach(function(table) { - new Tablesort(table) - }) -}) diff --git a/docs/leaderboards/compare.md b/docs/leaderboards/compare.md new file mode 100644 index 0000000..e6c3cfd --- /dev/null +++ b/docs/leaderboards/compare.md @@ -0,0 +1,24 @@ +--- +hide: + - toc +--- + +# Leaderboard + +[//]: # (A bit of explanation is required for this page) +[//]: # (There is a Mkdocs hook (defined in `docs/hooks.py`) that will read the content of this page. Any line starting with `>>>` will be extracted and replaced with the scores found in the corresponding result file.) +[//]: # (The format to follow is : `>>>{name}|{result_file_name}|{optional_additional_fields}`) + +| Keyboard | Score | Typo detection rate | Auto-correction frustration rate | Auto-completion | Next-word prediction | SDK available | +|---------:|:-----:|:-------------------:|:--------------------------------:|:---------------:|:-------------------:|:-------------:| +>>>Fleksy|results/fleksy.json|:fontawesome-solid-circle-check:{ .v_icon } +>>>iOS keyboard|results/ios.json|:fontawesome-regular-circle-xmark:{ .x_icon } +>>>KeyboardKit Open-source|results/keyboardkit_oss.json|:fontawesome-solid-circle-check:{ .v_icon } +>>>KeyboardKit Pro|results/keyboardkit_pro.json|:fontawesome-solid-circle-check:{ .v_icon } +>>>Gboard|results/gboard.json|:fontawesome-regular-circle-xmark:{ .x_icon } +>>>Swiftkey|results/swiftkey.json|:fontawesome-regular-circle-xmark:{ .x_icon } +>>>Tappa|results/tappa.json|:fontawesome-regular-circle-xmark:{ .x_icon } +>>>Yandex|results/yandex.json|:fontawesome-regular-circle-xmark:{ .x_icon } + +!!! info + This leaderboard uses the data from the [raw leaderboard](main.md). diff --git a/docs/leaderboard.md b/docs/leaderboards/main.md similarity index 53% rename from docs/leaderboard.md rename to docs/leaderboards/main.md index fbc3fe0..425dfa7 100644 --- a/docs/leaderboard.md +++ b/docs/leaderboards/main.md @@ -1,18 +1,19 @@ # Leaderboard [//]: # (A bit of explanation is required for this page) -[//]: # (There is a Mkdocs hook (defined in `docs/hooks.py`) that will read the content of this page, extract the path of result files listed here, read their content, and organize their score into a table) +[//]: # (There is a Mkdocs hook (defined in `docs/hooks.py`) that will read the content of this page. Any line starting with `>>>` will be extracted and replaced with the scores found in the corresponding result file.) +[//]: # (The format to follow is : `>>>{name}|{result_file_name}|{optional_additional_fields}`) -| Keyboard | Score | Next-word prediction | Auto-completion | Auto-correction | -|---------:|:-----:|:--------------------:|:---------------:|:---------------:| +| Keyboard | Score | Auto-correction | Auto-completion | Next-word prediction | +|---------:|:-----:|:---------------:|:---------------:|:--------------------:| >>>Fleksy|results/fleksy.json >>>iOS keyboard|results/ios.json >>>KeyboardKit Open-source|results/keyboardkit_oss.json >>>KeyboardKit Pro|results/keyboardkit_pro.json >>>Gboard|results/gboard.json >>>Swiftkey|results/swiftkey.json ->>>Tappa keyboard|results/tappa.json ->>>Yandex keyboard|results/yandex.json +>>>Tappa|results/tappa.json +>>>Yandex|results/yandex.json !!! info The metrics used in this leaderboard are : @@ -21,6 +22,6 @@ * For auto-completion : top-3 accuracy * For auto-correction : F-score - See [Understanding the metrics](how_testing_is_done.md#understanding-the-metrics) for more details. + See [Understanding the metrics](../how_testing_is_done.md#understanding-the-metrics) for more details. The overall score is a _weighted sum_ of each task's score. diff --git a/mkdocs.yml b/mkdocs.yml index 6dfe5cb..9684d97 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -34,6 +34,9 @@ markdown_extensions: - attr_list - pymdownx.highlight - pymdownx.superfences + - pymdownx.emoji: + emoji_index: "!!python/name:material.extensions.emoji.twemoji" + emoji_generator: "!!python/name:material.extensions.emoji.to_svg" nav: - Welcome: "index.md" @@ -42,7 +45,7 @@ nav: - "emu_setup.md" - "how_testing_is_done.md" - "architecture.md" - - "leaderboard.md" + - "leaderboards/main.md" - Code reference: - "public_api.md" - "internals.md" @@ -68,7 +71,3 @@ extra: extra_css: - css/mkdocstrings.css - -extra_javascript: - - https://unpkg.com/tablesort@5.3.0/dist/tablesort.min.js - - javascripts/tablesort.js From a44c2d4c5a9aa4f819a3a563326f34b5d3329174 Mon Sep 17 00:00:00 2001 From: Astariul Date: Tue, 28 May 2024 17:38:33 +0900 Subject: [PATCH 2/3] =?UTF-8?q?=F0=9F=90=9B=20Fix=20issue=20in=20mkdocs.ym?= =?UTF-8?q?l?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 1 + mkdocs.yml | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 930186d..a831ac3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,6 +8,7 @@ repos: - id: end-of-file-fixer exclude: "coverage_report/.*" - id: check-yaml + args: [ --unsafe ] - id: check-added-large-files - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.3.4 diff --git a/mkdocs.yml b/mkdocs.yml index 9684d97..6f9afe0 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -35,8 +35,8 @@ markdown_extensions: - pymdownx.highlight - pymdownx.superfences - pymdownx.emoji: - emoji_index: "!!python/name:material.extensions.emoji.twemoji" - emoji_generator: "!!python/name:material.extensions.emoji.to_svg" + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg nav: - Welcome: "index.md" From 94e732622fdf8dbae8356a565014f440fc6a3b54 Mon Sep 17 00:00:00 2001 From: Astariul Date: Tue, 28 May 2024 18:12:05 +0900 Subject: [PATCH 3/3] =?UTF-8?q?=F0=9F=93=9D=20Better=20leaderboards?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/leaderboards/compare.md | 39 +++++++++++++++++++++++++++++++----- docs/leaderboards/main.md | 18 +++++++++-------- mkdocs.yml | 3 +++ 3 files changed, 47 insertions(+), 13 deletions(-) diff --git a/docs/leaderboards/compare.md b/docs/leaderboards/compare.md index e6c3cfd..764706c 100644 --- a/docs/leaderboards/compare.md +++ b/docs/leaderboards/compare.md @@ -9,16 +9,45 @@ hide: [//]: # (There is a Mkdocs hook (defined in `docs/hooks.py`) that will read the content of this page. Any line starting with `>>>` will be extracted and replaced with the scores found in the corresponding result file.) [//]: # (The format to follow is : `>>>{name}|{result_file_name}|{optional_additional_fields}`) -| Keyboard | Score | Typo detection rate | Auto-correction frustration rate | Auto-completion | Next-word prediction | SDK available | -|---------:|:-----:|:-------------------:|:--------------------------------:|:---------------:|:-------------------:|:-------------:| +| Keyboard | Overall score | Typo detection rate | Auto-correction frustration rate | Auto-completion success rate | Next-word prediction success rate | SDK available | +|---------:|:-------------:|:-------------------:|:--------------------------------:|:---------------:|:-------------------:|:-------------:| >>>Fleksy|results/fleksy.json|:fontawesome-solid-circle-check:{ .v_icon } >>>iOS keyboard|results/ios.json|:fontawesome-regular-circle-xmark:{ .x_icon } >>>KeyboardKit Open-source|results/keyboardkit_oss.json|:fontawesome-solid-circle-check:{ .v_icon } >>>KeyboardKit Pro|results/keyboardkit_pro.json|:fontawesome-solid-circle-check:{ .v_icon } >>>Gboard|results/gboard.json|:fontawesome-regular-circle-xmark:{ .x_icon } >>>Swiftkey|results/swiftkey.json|:fontawesome-regular-circle-xmark:{ .x_icon } ->>>Tappa|results/tappa.json|:fontawesome-regular-circle-xmark:{ .x_icon } +>>>Tappa|results/tappa.json|:fontawesome-solid-circle-check:{ .v_icon } >>>Yandex|results/yandex.json|:fontawesome-regular-circle-xmark:{ .x_icon } -!!! info - This leaderboard uses the data from the [raw leaderboard](main.md). +### Metrics + +=== "Overall score" + + A single, general score representing the performances of the keyboard across all tasks. + + :material-trending-up: _Higher is better._ + +=== "Typo detection rate" + + Percentage of typos detected and corrected by the keyboard. + + :material-trending-up: _Higher is better._ + +=== "Auto-correction frustration rate" + + Percentage of words correctly typed, but corrected to something else by the keyboard. + + :material-trending-down: _Lower is better._ + +=== "Auto-completion success rate" + + Percentage of words correctly auto-completed. + + :material-trending-up: _Higher is better._ + +=== "Next-word prediction success rate" + + Percentage of words correctly predicted from the context. + + :material-trending-up: _Higher is better._ diff --git a/docs/leaderboards/main.md b/docs/leaderboards/main.md index 425dfa7..df36ab8 100644 --- a/docs/leaderboards/main.md +++ b/docs/leaderboards/main.md @@ -4,8 +4,8 @@ [//]: # (There is a Mkdocs hook (defined in `docs/hooks.py`) that will read the content of this page. Any line starting with `>>>` will be extracted and replaced with the scores found in the corresponding result file.) [//]: # (The format to follow is : `>>>{name}|{result_file_name}|{optional_additional_fields}`) -| Keyboard | Score | Auto-correction | Auto-completion | Next-word prediction | -|---------:|:-----:|:---------------:|:---------------:|:--------------------:| +| Keyboard | Overall
score | Auto-correction | Auto-completion | Next-word prediction | +|---------:|:----------------:|:---------------:|:---------------:|:--------------------:| >>>Fleksy|results/fleksy.json >>>iOS keyboard|results/ios.json >>>KeyboardKit Open-source|results/keyboardkit_oss.json @@ -15,13 +15,15 @@ >>>Tappa|results/tappa.json >>>Yandex|results/yandex.json -!!! info - The metrics used in this leaderboard are : +--- - * For next-word prediction : top-3 accuracy - * For auto-completion : top-3 accuracy - * For auto-correction : F-score +The metrics used in this leaderboard are : +* Auto-correction : _**F-score**_ +* Auto-completion : _**top-3 accuracy**_ +* Next-word prediction : _**top-3 accuracy**_ + +!!! tip See [Understanding the metrics](../how_testing_is_done.md#understanding-the-metrics) for more details. - The overall score is a _weighted sum_ of each task's score. +The overall score is a _**weighted sum**_ of all tasks. diff --git a/mkdocs.yml b/mkdocs.yml index 6f9afe0..2554c28 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -37,6 +37,9 @@ markdown_extensions: - pymdownx.emoji: emoji_index: !!python/name:material.extensions.emoji.twemoji emoji_generator: !!python/name:material.extensions.emoji.to_svg + - pymdownx.superfences + - pymdownx.tabbed: + alternate_style: true nav: - Welcome: "index.md"