diff --git a/README.md b/README.md index 8451184..c25979a 100644 --- a/README.md +++ b/README.md @@ -83,24 +83,25 @@ visualizers = ["ner", "textcat"] spacy_streamlit.visualize(models, default_text, visualizers) ``` -| Argument | Type | Description | -| ------------------------ | -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `models` | List[str] / Dict[str, str] | Names of loadable spaCy models (paths or package names). The models become selectable via a dropdown. Can either be a list of names or the names mapped to descriptions to display in the dropdown. | -| `default_text` | str | Default text to analyze on load. Defaults to `""`. | -| `default_model` | Optional[str] | Optional name of default model. If not set, the first model in the list of `models` is used. | -| `visualizers` | List[str] | Names of visualizers to show. Defaults to `["parser", "ner", "textcat", "similarity", "tokens"]`. | -| `ner_labels` | Optional[List[str]] | NER labels to include. If not set, all labels present in the `"ner"` pipeline component will be used. | -| `ner_attrs` | List[str] | Span attributes shown in table of named entities. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. | -| `token_attrs` | List[str] | Token attributes to show in token visualizer. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. | -| `similarity_texts` | Tuple[str, str] | The default texts to compare in the similarity visualizer. Defaults to `("apple", "orange")`. | -| `show_json_doc` | bool | Show button to toggle JSON representation of the `Doc`. Defaults to `True`. | -| `show_meta` | bool | Show button to toggle `meta.json` of the current pipeline. Defaults to `True`. | -| `show_config` | bool | Show button to toggle `config.cfg` of the current pipeline. Defaults to `True`. | -| `show_visualizer_select` | bool | Show sidebar dropdown to select visualizers to display (based on enabled visualizers). Defaults to `False`. | -| `sidebar_title` | Optional[str] | Title shown in the sidebar. Defaults to `None`. | -| `sidebar_description` | Optional[str] | Description shown in the sidebar. Accepts Markdown-formatted text. | -| `show_logo` | bool | Show the spaCy logo in the sidebar. Defaults to `True`. | -| `color` | Optional[str] | Experimental: Primary color to use for some of the main UI elements (`None` to disable hack). Defaults to `"#09A3D5"`. | +| Argument | Type | Description | +| ------------------------ | -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `models` | List[str] / Dict[str, str] | Names of loadable spaCy models (paths or package names). The models become selectable via a dropdown. Can either be a list of names or the names mapped to descriptions to display in the dropdown. | +| `default_text` | str | Default text to analyze on load. Defaults to `""`. | +| `default_model` | Optional[str] | Optional name of default model. If not set, the first model in the list of `models` is used. | +| `visualizers` | List[str] | Names of visualizers to show. Defaults to `["parser", "ner", "textcat", "similarity", "tokens"]`. | +| `ner_labels` | Optional[List[str]] | NER labels to include. If not set, all labels present in the `"ner"` pipeline component will be used. | +| `ner_attrs` | List[str] | Span attributes shown in table of named entities. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. | +| `token_attrs` | List[str] | Token attributes to show in token visualizer. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. | +| `similarity_texts` | Tuple[str, str] | The default texts to compare in the similarity visualizer. Defaults to `("apple", "orange")`. | +| `show_json_doc` | bool | Show button to toggle JSON representation of the `Doc`. Defaults to `True`. | +| `show_meta` | bool | Show button to toggle `meta.json` of the current pipeline. Defaults to `True`. | +| `show_config` | bool | Show button to toggle `config.cfg` of the current pipeline. Defaults to `True`. | +| `show_visualizer_select` | bool | Show sidebar dropdown to select visualizers to display (based on enabled visualizers). Defaults to `False`. | +| `sidebar_title` | Optional[str] | Title shown in the sidebar. Defaults to `None`. | +| `sidebar_description` | Optional[str] | Description shown in the sidebar. Accepts Markdown-formatted text. | +| `show_logo` | bool | Show the spaCy logo in the sidebar. Defaults to `True`. | +| `color` | Optional[str] | Experimental: Primary color to use for some of the main UI elements (`None` to disable hack). Defaults to `"#09A3D5"`. | +| `get_default_text` | Callable[[Language], str] | Optional callable that takes the currently loaded `nlp` object and returns the default text. Can be used to provide language-specific default texts. If the function returns `None`, the value of `default_text` is used, if available. Defaults to `None`. | #### function `visualize_parser` diff --git a/spacy_streamlit/visualizer.py b/spacy_streamlit/visualizer.py index 8e35cbf..0b5853a 100644 --- a/spacy_streamlit/visualizer.py +++ b/spacy_streamlit/visualizer.py @@ -1,6 +1,7 @@ -from typing import List, Sequence, Tuple, Optional, Dict, Union +from typing import List, Sequence, Tuple, Optional, Dict, Union, Callable import streamlit as st import spacy +from spacy.language import Language from spacy import displacy import pandas as pd @@ -35,6 +36,7 @@ def visualize( show_logo: bool = True, color: Optional[str] = "#09A3D5", key: Optional[str] = None, + get_default_text: Callable[[Language], str] = None, ) -> None: """Embed the full visualizer with selected components.""" if color: @@ -84,6 +86,9 @@ def visualize( else: active_visualizers = visualizers + default_text = ( + get_default_text(nlp) if get_default_text is not None else default_text + ) text = st.text_area("Text to analyze", default_text, key=f"{key}_visualize_text") doc = process_text(spacy_model, text) @@ -114,7 +119,8 @@ def visualize( config_exp.code(nlp.config.to_str()) st.sidebar.markdown( - FOOTER, unsafe_allow_html=True, + FOOTER, + unsafe_allow_html=True, )