Skip to content

Commit

Permalink
🔀 Merge pull request #37 from FleksySDK/leaderboard_improvement
Browse files Browse the repository at this point in the history
Leaderboard improvements
  • Loading branch information
astariul authored May 28, 2024
2 parents b6dfbc3 + 94e7326 commit 5a5deb8
Show file tree
Hide file tree
Showing 8 changed files with 217 additions and 76 deletions.
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ repos:
- id: end-of-file-fixer
exclude: "coverage_report/.*"
- id: check-yaml
args: [ --unsafe ]
- id: check-added-large-files
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.3.4
Expand Down
8 changes: 8 additions & 0 deletions docs/css/mkdocstrings.css
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,11 @@ div.doc-contents:not(.first) {
border-left: 4px solid rgba(230, 230, 230);
margin-bottom: 80px;
}

.x_icon {
color: #ef5552;
}

.v_icon {
color: #4cae50;
}
158 changes: 119 additions & 39 deletions docs/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,24 @@

import json
import os
from dataclasses import dataclass
from typing import Dict, List

from mkdocs.config.defaults import MkDocsConfig
from mkdocs.structure.nav import Page


@dataclass
class DynamicEntry:
"""Represents a dynamic entry for a data table : the data will be pulled
from the results files.
"""

name: str
results: Dict
additional_fields: List[str]


def on_page_markdown(markdown: str, page: Page, config: MkDocsConfig, **kwargs) -> str:
"""Function that runs before rendering the markdown.
Expand All @@ -21,58 +34,125 @@ def on_page_markdown(markdown: str, page: Page, config: MkDocsConfig, **kwargs)
Returns:
str: The updated markdown content.
"""
if page.file.src_uri == "leaderboard.md":
if "leaderboards" in page.file.src_uri:
lines = markdown.split("\n")
entries = []
for line in lines:
if line.startswith(">>>"):
# This is a line with a path to a result file
# -> parse it and extract the results
kb_name, result_file_path = line[3:].split("|")
name, result_file_path, *args = line[3:].split("|")

with open(os.path.join(config.docs_dir, result_file_path), "r") as f:
res = json.load(f)

entries.append(
{
"kb_name": kb_name,
"score": res["overall_score"],
"nwp": res["next_word_prediction"]["score"]["top3_accuracy"],
"acp": res["auto_completion"]["score"]["top3_accuracy"],
"acr": res["auto_correction"]["score"]["fscore"],
}
)

# Sort according to the overall score
entries.sort(reverse=True, key=lambda x: x["score"])

# Find the best scores to highlight
best_score = max(entries, key=lambda x: x["score"])["score"]
best_nwp = max(entries, key=lambda x: x["nwp"])["nwp"]
best_acp = max(entries, key=lambda x: x["acp"])["acp"]
best_acr = max(entries, key=lambda x: x["acr"])["acr"]
entries.append(DynamicEntry(name, res, args))

# Each leaderboard implements its own render logic
rendered_entries = [None for _ in entries]
if page.file.src_uri.endswith("main.md"):
rendered_entries = render_main(entries)
elif page.file.src_uri.endswith("compare.md"):
rendered_entries = render_compare(entries)

# Replace the lines accordingly
for i, line in enumerate(lines):
if line.startswith(">>>"):
e = entries.pop(0)

score = f"{round(e['score'], 2):g}"
nwp = f"{round(e['nwp'], 2):g}"
acp = f"{round(e['acp'], 2):g}"
acr = f"{round(e['acr'], 2):g}"

# Highlight the best scores
if e["score"] == best_score:
score = f"**{score}**"
if e["nwp"] == best_nwp:
nwp = f"**{nwp}**"
if e["acp"] == best_acp:
acp = f"**{acp}**"
if e["acr"] == best_acr:
acr = f"**{acr}**"

# Overwrite the line
lines[i] = f"| {e['kb_name']} | {score} | {nwp} | {acp} | {acr} |"
lines[i] = rendered_entries.pop(0)

return "\n".join(lines)


def render_main(entries: List[DynamicEntry]) -> List[str]:
"""Code for rendering the leaderboard : `leaderboards/main.md`."""
# Extract the scores we are going to use
for e in entries:
e.score = e.results["overall_score"]
e.nwp = e.results["next_word_prediction"]["score"]["top3_accuracy"]
e.acp = e.results["auto_completion"]["score"]["top3_accuracy"]
e.acr = e.results["auto_correction"]["score"]["fscore"]

# Sort entries according to the overall score
entries.sort(reverse=True, key=lambda e: e.score)

# Find the best scores to highlight for each column
best_score = max(e.score for e in entries)
best_nwp = max(e.nwp for e in entries)
best_acp = max(e.acp for e in entries)
best_acr = max(e.acr for e in entries)

# Render the entries
rendered_entries = []
for e in entries:
score = f"{round(e.score, 2):g}"
nwp = f"{round(e.nwp, 2):g}"
acp = f"{round(e.acp, 2):g}"
acr = f"{round(e.acr, 2):g}"

# Highlight the best scores
if e.score == best_score:
score = f"**{score}**"
if e.nwp == best_nwp:
nwp = f"**{nwp}**"
if e.acp == best_acp:
acp = f"**{acp}**"
if e.acr == best_acr:
acr = f"**{acr}**"

# Render
rendered_entries.append(f"| {e.name} | {score} | {acr} | {acp} | {nwp} |")

return rendered_entries


def render_compare(entries: List[DynamicEntry]) -> List[str]:
"""Code for rendering the leaderboard : `leaderboards/compare.md`."""
# Extract the scores we are going to use
for e in entries:
e.score = e.results["overall_score"]
e.nwp = e.results["next_word_prediction"]["score"]["top3_accuracy"]
e.acp = e.results["auto_completion"]["score"]["top3_accuracy"]
e.acr_detection = e.results["auto_correction"]["score"]["recall"]
e.acr_frustration = 1 - e.results["auto_correction"]["score"]["precision"]

# Sort entries according to the overall score
entries.sort(reverse=True, key=lambda e: e.score)

# Find the best scores to highlight for each column
best_score = max(e.score for e in entries)
best_nwp = max(e.nwp for e in entries)
best_acp = max(e.acp for e in entries)
best_acr_detection = max(e.acr_detection for e in entries)
best_acr_frustration = min(e.acr_frustration for e in entries)

# Render the entries
rendered_entries = []
for e in entries:
score = f"{round(e.score * 1000)}"
nwp = f"{round(e.nwp * 100)}%"
acp = f"{round(e.acp * 100)}%"
acr_detection = f"{round(e.acr_detection * 100)}%"
acr_frustration = f"{round(e.acr_frustration * 100)}%"

# Highlight the best scores
if e.score == best_score:
score = f"**{score}**"
if e.nwp == best_nwp:
nwp = f"**{nwp}**"
if e.acp == best_acp:
acp = f"**{acp}**"
if e.acr_detection == best_acr_detection:
acr_detection = f"**{acr_detection}**"
if e.acr_frustration == best_acr_frustration:
acr_frustration = f"**{acr_frustration}**"

# Render
additional_fields = " | ".join(e.additional_fields)
if additional_fields != "":
rendered_entries.append(
f"| {e.name} | {score} | {acr_detection} | {acr_frustration} | {acp} | {nwp} | {additional_fields} |"
)
else:
rendered_entries.append(f"| {e.name} | {score} | {acr_detection} | {acr_frustration} | {acp} | {nwp} |")

return rendered_entries
6 changes: 0 additions & 6 deletions docs/javascripts/tablesort.js

This file was deleted.

26 changes: 0 additions & 26 deletions docs/leaderboard.md

This file was deleted.

53 changes: 53 additions & 0 deletions docs/leaderboards/compare.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
---
hide:
- toc
---

# Leaderboard

[//]: # (A bit of explanation is required for this page)
[//]: # (There is a Mkdocs hook (defined in `docs/hooks.py`) that will read the content of this page. Any line starting with `>>>` will be extracted and replaced with the scores found in the corresponding result file.)
[//]: # (The format to follow is : `>>>{name}|{result_file_name}|{optional_additional_fields}`)

| Keyboard | Overall score | Typo detection rate | Auto-correction frustration rate | Auto-completion success rate | Next-word prediction success rate | SDK available |
|---------:|:-------------:|:-------------------:|:--------------------------------:|:---------------:|:-------------------:|:-------------:|
>>>Fleksy|results/fleksy.json|:fontawesome-solid-circle-check:{ .v_icon }
>>>iOS keyboard|results/ios.json|:fontawesome-regular-circle-xmark:{ .x_icon }
>>>KeyboardKit Open-source|results/keyboardkit_oss.json|:fontawesome-solid-circle-check:{ .v_icon }
>>>KeyboardKit Pro|results/keyboardkit_pro.json|:fontawesome-solid-circle-check:{ .v_icon }
>>>Gboard|results/gboard.json|:fontawesome-regular-circle-xmark:{ .x_icon }
>>>Swiftkey|results/swiftkey.json|:fontawesome-regular-circle-xmark:{ .x_icon }
>>>Tappa|results/tappa.json|:fontawesome-solid-circle-check:{ .v_icon }
>>>Yandex|results/yandex.json|:fontawesome-regular-circle-xmark:{ .x_icon }
### Metrics

=== "Overall score"

A single, general score representing the performances of the keyboard across all tasks.

:material-trending-up: _Higher is better._

=== "Typo detection rate"

Percentage of typos detected and corrected by the keyboard.

:material-trending-up: _Higher is better._

=== "Auto-correction frustration rate"

Percentage of words correctly typed, but corrected to something else by the keyboard.

:material-trending-down: _Lower is better._

=== "Auto-completion success rate"

Percentage of words correctly auto-completed.

:material-trending-up: _Higher is better._

=== "Next-word prediction success rate"

Percentage of words correctly predicted from the context.

:material-trending-up: _Higher is better._
29 changes: 29 additions & 0 deletions docs/leaderboards/main.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Leaderboard

[//]: # (A bit of explanation is required for this page)
[//]: # (There is a Mkdocs hook (defined in `docs/hooks.py`) that will read the content of this page. Any line starting with `>>>` will be extracted and replaced with the scores found in the corresponding result file.)
[//]: # (The format to follow is : `>>>{name}|{result_file_name}|{optional_additional_fields}`)

| Keyboard | Overall<br>score | Auto-correction | Auto-completion | Next-word prediction |
|---------:|:----------------:|:---------------:|:---------------:|:--------------------:|
>>>Fleksy|results/fleksy.json
>>>iOS keyboard|results/ios.json
>>>KeyboardKit Open-source|results/keyboardkit_oss.json
>>>KeyboardKit Pro|results/keyboardkit_pro.json
>>>Gboard|results/gboard.json
>>>Swiftkey|results/swiftkey.json
>>>Tappa|results/tappa.json
>>>Yandex|results/yandex.json
---

The metrics used in this leaderboard are :

* Auto-correction : _**F-score**_
* Auto-completion : _**top-3 accuracy**_
* Next-word prediction : _**top-3 accuracy**_

!!! tip
See [Understanding the metrics](../how_testing_is_done.md#understanding-the-metrics) for more details.

The overall score is a _**weighted sum**_ of all tasks.
12 changes: 7 additions & 5 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ markdown_extensions:
- attr_list
- pymdownx.highlight
- pymdownx.superfences
- pymdownx.emoji:
emoji_index: !!python/name:material.extensions.emoji.twemoji
emoji_generator: !!python/name:material.extensions.emoji.to_svg
- pymdownx.superfences
- pymdownx.tabbed:
alternate_style: true

nav:
- Welcome: "index.md"
Expand All @@ -42,7 +48,7 @@ nav:
- "emu_setup.md"
- "how_testing_is_done.md"
- "architecture.md"
- "leaderboard.md"
- "leaderboards/main.md"
- Code reference:
- "public_api.md"
- "internals.md"
Expand All @@ -68,7 +74,3 @@ extra:

extra_css:
- css/mkdocstrings.css

extra_javascript:
- https://unpkg.com/[email protected]/dist/tablesort.min.js
- javascripts/tablesort.js

0 comments on commit 5a5deb8

Please sign in to comment.