diff --git a/README.md b/README.md
index 8b8a6bc..db33878 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,8 @@ default_text = "Sundar Pichai is the CEO of Google."
spacy_streamlit.visualize(models, default_text)
```
-You can then run your app with `streamlit run streamlit_app.py`. The app should pop up in your web browser. 😀
+You can then run your app with `streamlit run streamlit_app.py`. The app should
+pop up in your web browser. 😀
#### 📦 Example: [`01_out-of-the-box.py`](examples/01_out-of-the-box.py)
@@ -82,21 +83,22 @@ visualizers = ["ner", "textcat"]
spacy_streamlit.visualize(models, default_text, visualizers)
```
-| Argument | Type | Description |
-| --------------------- | ------------------- | ---------------------------------------------------------------------------------------------------------------------- |
-| `models` | List[str] | Names of loadable spaCy models (paths or package names). The models become selectable via a dropdown. |
-| `default_text` | str | Default text to analyze on load. Defaults to `""`. |
-| `visualizers` | List[str] | Names of visualizers to show. Defaults to `["parser", "ner", "textcat", "similarity", "tokens"]`. |
-| `ner_labels` | Optional[List[str]] | NER labels to include. If not set, all labels present in the `"ner"` pipeline component will be used. |
-| `ner_attrs` | List[str] | Span attributes shown in table of named entities. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. |
-| `token_attrs` | List[str] | Token attributes to show in token visualizer. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. |
-| `similarity_texts` | Tuple[str, str] | The default texts to compare in the similarity visualizer. Defaults to `("apple", "orange")`. |
-| `show_json_doc` | bool | Show button to toggle JSON representation of the `Doc`. Defaults to `True`. |
-| `show_model_meta` | bool | Show button to toggle model `meta.json`. Defaults to `True`. |
-| `sidebar_title` | Optional[str] | Title shown in the sidebar. Defaults to `None`. |
-| `sidebar_description` | Optional[str] | Description shown in the sidebar. Accepts Markdown-formatted text. |
-| `show_logo` | bool | Show the spaCy logo in the sidebar. Defaults to `True`. |
-| `color` | Optional[str] | Experimental: Primary color to use for some of the main UI elements (`None` to disable hack). Defaults to `"#09A3D5"`. |
+| Argument | Type | Description |
+| ------------------------ | -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `models` | List[str] / Dict[str, str] | Names of loadable spaCy models (paths or package names). The models become selectable via a dropdown. Can either be a list of names or the names mapped to descriptions to display in the dropdown. |
+| `default_text` | str | Default text to analyze on load. Defaults to `""`. |
+| `visualizers` | List[str] | Names of visualizers to show. Defaults to `["parser", "ner", "textcat", "similarity", "tokens"]`. |
+| `ner_labels` | Optional[List[str]] | NER labels to include. If not set, all labels present in the `"ner"` pipeline component will be used. |
+| `ner_attrs` | List[str] | Span attributes shown in table of named entities. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. |
+| `token_attrs` | List[str] | Token attributes to show in token visualizer. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. |
+| `similarity_texts` | Tuple[str, str] | The default texts to compare in the similarity visualizer. Defaults to `("apple", "orange")`. |
+| `show_json_doc` | bool | Show button to toggle JSON representation of the `Doc`. Defaults to `True`. |
+| `show_model_meta` | bool | Show button to toggle model `meta.json`. Defaults to `True`. |
+| `show_visualizer_select` | bool | Show sidebar dropdown to select visualizers to display (based on enabled visualizers). Defaults to `False`. |
+| `sidebar_title` | Optional[str] | Title shown in the sidebar. Defaults to `None`. |
+| `sidebar_description` | Optional[str] | Description shown in the sidebar. Accepts Markdown-formatted text. |
+| `show_logo` | bool | Show the spaCy logo in the sidebar. Defaults to `True`. |
+| `color` | Optional[str] | Experimental: Primary color to use for some of the main UI elements (`None` to disable hack). Defaults to `"#09A3D5"`. |
#### function `visualize_parser`
@@ -144,7 +146,6 @@ visualize_ner(doc, labels=nlp.get_pipe("ner").labels)
| `sidebar_title` | Optional[str] | Title of the config settings in the sidebar. |
| `colors` | Dict[str,str] | A dictionary mapping labels to display colors ({"LABEL": "COLOR"}) |
-
#### function `visualize_textcat`
Visualize text categories predicted by a trained text classifier.
diff --git a/setup.cfg b/setup.cfg
index 61adb8b..8db8492 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
[metadata]
-version = 0.0.3
+version = 0.1.0
description = Visualize spaCy with streamlit
url = https://github.com/explosion/spacy-streamlit
author = Explosion
diff --git a/spacy_streamlit/visualizer.py b/spacy_streamlit/visualizer.py
index 199f46c..0fea012 100644
--- a/spacy_streamlit/visualizer.py
+++ b/spacy_streamlit/visualizer.py
@@ -1,4 +1,4 @@
-from typing import List, Sequence, Tuple, Optional, Dict
+from typing import List, Sequence, Tuple, Optional, Dict, Union
import streamlit as st
import spacy
from spacy import displacy
@@ -17,7 +17,7 @@
def visualize(
- models: List[str],
+ models: Union[List[str], Dict[str, str]],
default_text: str = "",
visualizers: List[str] = ["parser", "ner", "textcat", "similarity", "tokens"],
ner_labels: Optional[List[str]] = None,
@@ -26,6 +26,7 @@ def visualize(
token_attrs: List[str] = TOKEN_ATTRS,
show_json_doc: bool = True,
show_model_meta: bool = True,
+ show_visualizer_select: bool = False,
sidebar_title: Optional[str] = None,
sidebar_description: Optional[str] = None,
show_logo: bool = True,
@@ -42,38 +43,61 @@ def visualize(
if sidebar_description:
st.sidebar.markdown(sidebar_description)
- spacy_model = st.sidebar.selectbox("Model name", models, key=f"{key}_visualize_models")
- model_load_state = st.info(f"Loading model '{spacy_model}'...")
+ # Allow both dict of model name / description as well as lit of names
+ model_names = models
+ format_func = str
+ if isinstance(models, dict):
+ format_func = lambda name: models.get(name, name)
+ model_names = list(models.keys())
+
+ spacy_model = st.sidebar.selectbox(
+ "Pipeline",
+ model_names,
+ key=f"{key}_visualize_models",
+ format_func=format_func,
+ )
+ model_load_state = st.info(f"Loading pipeline '{spacy_model}'...")
nlp = load_model(spacy_model)
model_load_state.empty()
+ if show_visualizer_select:
+ active_visualizers = st.sidebar.multiselect(
+ "Visualizers",
+ options=visualizers,
+ default=list(visualizers),
+ key=f"{key}_viz_select",
+ )
+ else:
+ active_visualizers = visualizers
+
text = st.text_area("Text to analyze", default_text, key=f"{key}_visualize_text")
doc = process_text(spacy_model, text)
- if "parser" in visualizers:
+ if "parser" in visualizers and "parser" in active_visualizers:
visualize_parser(doc, key=key)
- if "ner" in visualizers:
+ if "ner" in visualizers and "ner" in active_visualizers:
ner_labels = ner_labels or nlp.get_pipe("ner").labels
visualize_ner(doc, labels=ner_labels, attrs=ner_attrs, key=key)
- if "textcat" in visualizers:
+ if "textcat" in visualizers and "textcat" in active_visualizers:
visualize_textcat(doc)
- if "similarity" in visualizers:
+ if "similarity" in visualizers and "similarity" in active_visualizers:
visualize_similarity(nlp, key=key)
- if "tokens" in visualizers:
+ if "tokens" in visualizers and "tokens" in active_visualizers:
visualize_tokens(doc, attrs=token_attrs)
if show_json_doc:
st.header("JSON Doc")
- if st.button("Show JSON Doc", key=f"{key}_visualize_show_json_doc"):
+ if st.checkbox("Show JSON Doc", key=f"{key}_visualize_show_json_doc"):
st.json(doc.to_json())
if show_model_meta:
st.header("JSON model meta")
- if st.button("Show JSON model meta", key=f"{key}_visualize_show_model_meta"):
+ if st.checkbox("Show JSON model meta", key=f"{key}_visualize_show_model_meta"):
st.json(nlp.meta)
st.sidebar.markdown(
- FOOTER, unsafe_allow_html=True,
+ FOOTER,
+ unsafe_allow_html=True,
)
@@ -89,10 +113,16 @@ def visualize_parser(
st.header(title)
if sidebar_title:
st.sidebar.header(sidebar_title)
- split_sents = st.sidebar.checkbox("Split sentences", value=True, key=f"{key}_parser_split_sents")
+ split_sents = st.sidebar.checkbox(
+ "Split sentences", value=True, key=f"{key}_parser_split_sents"
+ )
options = {
- "collapse_punct": st.sidebar.checkbox("Collapse punctuation", value=True, key=f"{key}_parser_collapse_punct"),
- "collapse_phrases": st.sidebar.checkbox("Collapse phrases", key=f"{key}_parser_collapse_phrases"),
+ "collapse_punct": st.sidebar.checkbox(
+ "Collapse punctuation", value=True, key=f"{key}_parser_collapse_punct"
+ ),
+ "collapse_phrases": st.sidebar.checkbox(
+ "Collapse phrases", key=f"{key}_parser_collapse_phrases"
+ ),
"compact": st.sidebar.checkbox("Compact mode", key=f"{key}_parser_compact"),
}
docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
@@ -122,7 +152,10 @@ def visualize_ner(
if sidebar_title:
st.sidebar.header(sidebar_title)
label_select = st.sidebar.multiselect(
- "Entity labels", options=labels, default=list(labels), key=f"{key}_ner_label_select"
+ "Entity labels",
+ options=labels,
+ default=list(labels),
+ key=f"{key}_ner_label_select",
)
html = displacy.render(
doc, style="ent", options={"ents": label_select, "colors": colors}
@@ -165,8 +198,12 @@ def visualize_similarity(
if not meta.get("width", 0):
st.warning("No vectors available in the model.")
st.code(meta)
- text1 = st.text_input("Text or word 1", default_texts[0], key=f"{key}_similarity_text1")
- text2 = st.text_input("Text or word 2", default_texts[1], key=f"{key}_similarity_text2")
+ text1 = st.text_input(
+ "Text or word 1", default_texts[0], key=f"{key}_similarity_text1"
+ )
+ text2 = st.text_input(
+ "Text or word 2", default_texts[1], key=f"{key}_similarity_text2"
+ )
doc1 = nlp.make_doc(text1)
doc2 = nlp.make_doc(text2)
similarity = doc1.similarity(doc2)