Support more flexible model and visualizer select

explosion · Sep 19, 2020 · 425039f · 425039f
1 parent 7bf768c
commit 425039f
Show file tree

Hide file tree

Showing 3 changed files with 74 additions and 36 deletions.
diff --git a/README.md b/README.md
@@ -44,7 +44,8 @@ default_text = "Sundar Pichai is the CEO of Google."
 spacy_streamlit.visualize(models, default_text)
 ```
 
-You can then run your app with `streamlit run streamlit_app.py`. The app should pop up in your web browser. 😀
+You can then run your app with `streamlit run streamlit_app.py`. The app should
+pop up in your web browser. 😀
 
 #### 📦 Example: [`01_out-of-the-box.py`](examples/01_out-of-the-box.py)
 
@@ -82,21 +83,22 @@ visualizers = ["ner", "textcat"]
 spacy_streamlit.visualize(models, default_text, visualizers)
 ```
 
-| Argument              | Type                | Description                                                                                                            |
-| --------------------- | ------------------- | ---------------------------------------------------------------------------------------------------------------------- |
-| `models`              | List[str]           | Names of loadable spaCy models (paths or package names). The models become selectable via a dropdown.                  |
-| `default_text`        | str                 | Default text to analyze on load. Defaults to `""`.                                                                     |
-| `visualizers`         | List[str]           | Names of visualizers to show. Defaults to `["parser", "ner", "textcat", "similarity", "tokens"]`.                      |
-| `ner_labels`          | Optional[List[str]] | NER labels to include. If not set, all labels present in the `"ner"` pipeline component will be used.                  |
-| `ner_attrs`           | List[str]           | Span attributes shown in table of named entities. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults.   |
-| `token_attrs`         | List[str]           | Token attributes to show in token visualizer. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults.       |
-| `similarity_texts`    | Tuple[str, str]     | The default texts to compare in the similarity visualizer. Defaults to `("apple", "orange")`.                          |
-| `show_json_doc`       | bool                | Show button to toggle JSON representation of the `Doc`. Defaults to `True`.                                            |
-| `show_model_meta`     | bool                | Show button to toggle model `meta.json`. Defaults to `True`.                                                           |
-| `sidebar_title`       | Optional[str]       | Title shown in the sidebar. Defaults to `None`.                                                                        |
-| `sidebar_description` | Optional[str]       | Description shown in the sidebar. Accepts Markdown-formatted text.                                                     |
-| `show_logo`           | bool                | Show the spaCy logo in the sidebar. Defaults to `True`.                                                                |
-| `color`               | Optional[str]       | Experimental: Primary color to use for some of the main UI elements (`None` to disable hack). Defaults to `"#09A3D5"`. |
+| Argument                 | Type                       | Description                                                                                                                                                                                         |
+| ------------------------ | -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `models`                 | List[str] / Dict[str, str] | Names of loadable spaCy models (paths or package names). The models become selectable via a dropdown. Can either be a list of names or the names mapped to descriptions to display in the dropdown. |
+| `default_text`           | str                        | Default text to analyze on load. Defaults to `""`.                                                                                                                                                  |
+| `visualizers`            | List[str]                  | Names of visualizers to show. Defaults to `["parser", "ner", "textcat", "similarity", "tokens"]`.                                                                                                   |
+| `ner_labels`             | Optional[List[str]]        | NER labels to include. If not set, all labels present in the `"ner"` pipeline component will be used.                                                                                               |
+| `ner_attrs`              | List[str]                  | Span attributes shown in table of named entities. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults.                                                                                |
+| `token_attrs`            | List[str]                  | Token attributes to show in token visualizer. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults.                                                                                    |
+| `similarity_texts`       | Tuple[str, str]            | The default texts to compare in the similarity visualizer. Defaults to `("apple", "orange")`.                                                                                                       |
+| `show_json_doc`          | bool                       | Show button to toggle JSON representation of the `Doc`. Defaults to `True`.                                                                                                                         |
+| `show_model_meta`        | bool                       | Show button to toggle model `meta.json`. Defaults to `True`.                                                                                                                                        |
+| `show_visualizer_select` | bool                       | Show sidebar dropdown to select visualizers to display (based on enabled visualizers). Defaults to `False`.                                                                                         |
+| `sidebar_title`          | Optional[str]              | Title shown in the sidebar. Defaults to `None`.                                                                                                                                                     |
+| `sidebar_description`    | Optional[str]              | Description shown in the sidebar. Accepts Markdown-formatted text.                                                                                                                                  |
+| `show_logo`              | bool                       | Show the spaCy logo in the sidebar. Defaults to `True`.                                                                                                                                             |
+| `color`                  | Optional[str]              | Experimental: Primary color to use for some of the main UI elements (`None` to disable hack). Defaults to `"#09A3D5"`.                                                                              |
 
 #### <kbd>function</kbd> `visualize_parser`
 
@@ -144,7 +146,6 @@ visualize_ner(doc, labels=nlp.get_pipe("ner").labels)
 | `sidebar_title` | Optional[str] | Title of the config settings in the sidebar.                                  |
 | `colors`        | Dict[str,str] | A dictionary mapping labels to display colors ({"LABEL": "COLOR"})            |
 
-
 #### <kbd>function</kbd> `visualize_textcat`
 
 Visualize text categories predicted by a trained text classifier.

diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [metadata]
-version = 0.0.3
+version = 0.1.0
 description = Visualize spaCy with streamlit
 url = https://github.com/explosion/spacy-streamlit
 author = Explosion

diff --git a/spacy_streamlit/visualizer.py b/spacy_streamlit/visualizer.py
@@ -1,4 +1,4 @@
-from typing import List, Sequence, Tuple, Optional, Dict
+from typing import List, Sequence, Tuple, Optional, Dict, Union
 import streamlit as st
 import spacy
 from spacy import displacy
@@ -17,7 +17,7 @@
 
 
 def visualize(
-    models: List[str],
+    models: Union[List[str], Dict[str, str]],
     default_text: str = "",
     visualizers: List[str] = ["parser", "ner", "textcat", "similarity", "tokens"],
     ner_labels: Optional[List[str]] = None,
@@ -26,6 +26,7 @@ def visualize(
     token_attrs: List[str] = TOKEN_ATTRS,
     show_json_doc: bool = True,
     show_model_meta: bool = True,
+    show_visualizer_select: bool = False,
     sidebar_title: Optional[str] = None,
     sidebar_description: Optional[str] = None,
     show_logo: bool = True,
@@ -42,38 +43,61 @@ def visualize(
     if sidebar_description:
         st.sidebar.markdown(sidebar_description)
 
-    spacy_model = st.sidebar.selectbox("Model name", models, key=f"{key}_visualize_models")
-    model_load_state = st.info(f"Loading model '{spacy_model}'...")
+    # Allow both dict of model name / description as well as lit of names
+    model_names = models
+    format_func = str
+    if isinstance(models, dict):
+        format_func = lambda name: models.get(name, name)
+        model_names = list(models.keys())
+
+    spacy_model = st.sidebar.selectbox(
+        "Pipeline",
+        model_names,
+        key=f"{key}_visualize_models",
+        format_func=format_func,
+    )
+    model_load_state = st.info(f"Loading pipeline '{spacy_model}'...")
     nlp = load_model(spacy_model)
     model_load_state.empty()
 
+    if show_visualizer_select:
+        active_visualizers = st.sidebar.multiselect(
+            "Visualizers",
+            options=visualizers,
+            default=list(visualizers),
+            key=f"{key}_viz_select",
+        )
+    else:
+        active_visualizers = visualizers
+
     text = st.text_area("Text to analyze", default_text, key=f"{key}_visualize_text")
     doc = process_text(spacy_model, text)
 
-    if "parser" in visualizers:
+    if "parser" in visualizers and "parser" in active_visualizers:
         visualize_parser(doc, key=key)
-    if "ner" in visualizers:
+    if "ner" in visualizers and "ner" in active_visualizers:
         ner_labels = ner_labels or nlp.get_pipe("ner").labels
         visualize_ner(doc, labels=ner_labels, attrs=ner_attrs, key=key)
-    if "textcat" in visualizers:
+    if "textcat" in visualizers and "textcat" in active_visualizers:
         visualize_textcat(doc)
-    if "similarity" in visualizers:
+    if "similarity" in visualizers and "similarity" in active_visualizers:
         visualize_similarity(nlp, key=key)
-    if "tokens" in visualizers:
+    if "tokens" in visualizers and "tokens" in active_visualizers:
         visualize_tokens(doc, attrs=token_attrs)
 
     if show_json_doc:
         st.header("JSON Doc")
-        if st.button("Show JSON Doc", key=f"{key}_visualize_show_json_doc"):
+        if st.checkbox("Show JSON Doc", key=f"{key}_visualize_show_json_doc"):
             st.json(doc.to_json())
 
     if show_model_meta:
         st.header("JSON model meta")
-        if st.button("Show JSON model meta", key=f"{key}_visualize_show_model_meta"):
+        if st.checkbox("Show JSON model meta", key=f"{key}_visualize_show_model_meta"):
             st.json(nlp.meta)
 
     st.sidebar.markdown(
-        FOOTER, unsafe_allow_html=True,
+        FOOTER,
+        unsafe_allow_html=True,
     )
 
 
@@ -89,10 +113,16 @@ def visualize_parser(
         st.header(title)
     if sidebar_title:
         st.sidebar.header(sidebar_title)
-    split_sents = st.sidebar.checkbox("Split sentences", value=True, key=f"{key}_parser_split_sents")
+    split_sents = st.sidebar.checkbox(
+        "Split sentences", value=True, key=f"{key}_parser_split_sents"
+    )
     options = {
-        "collapse_punct": st.sidebar.checkbox("Collapse punctuation", value=True, key=f"{key}_parser_collapse_punct"),
-        "collapse_phrases": st.sidebar.checkbox("Collapse phrases", key=f"{key}_parser_collapse_phrases"),
+        "collapse_punct": st.sidebar.checkbox(
+            "Collapse punctuation", value=True, key=f"{key}_parser_collapse_punct"
+        ),
+        "collapse_phrases": st.sidebar.checkbox(
+            "Collapse phrases", key=f"{key}_parser_collapse_phrases"
+        ),
         "compact": st.sidebar.checkbox("Compact mode", key=f"{key}_parser_compact"),
     }
     docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
@@ -122,7 +152,10 @@ def visualize_ner(
     if sidebar_title:
         st.sidebar.header(sidebar_title)
     label_select = st.sidebar.multiselect(
-        "Entity labels", options=labels, default=list(labels), key=f"{key}_ner_label_select"
+        "Entity labels",
+        options=labels,
+        default=list(labels),
+        key=f"{key}_ner_label_select",
     )
     html = displacy.render(
         doc, style="ent", options={"ents": label_select, "colors": colors}
@@ -165,8 +198,12 @@ def visualize_similarity(
     if not meta.get("width", 0):
         st.warning("No vectors available in the model.")
     st.code(meta)
-    text1 = st.text_input("Text or word 1", default_texts[0], key=f"{key}_similarity_text1")
-    text2 = st.text_input("Text or word 2", default_texts[1], key=f"{key}_similarity_text2")
+    text1 = st.text_input(
+        "Text or word 1", default_texts[0], key=f"{key}_similarity_text1"
+    )
+    text2 = st.text_input(
+        "Text or word 2", default_texts[1], key=f"{key}_similarity_text2"
+    )
     doc1 = nlp.make_doc(text1)
     doc2 = nlp.make_doc(text2)
     similarity = doc1.similarity(doc2)