Skip to content

Commit

Permalink
Support more flexible model and visualizer select
Browse files Browse the repository at this point in the history
  • Loading branch information
ines committed Sep 19, 2020
1 parent 7bf768c commit 425039f
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 36 deletions.
35 changes: 18 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ default_text = "Sundar Pichai is the CEO of Google."
spacy_streamlit.visualize(models, default_text)
```

You can then run your app with `streamlit run streamlit_app.py`. The app should pop up in your web browser. 😀
You can then run your app with `streamlit run streamlit_app.py`. The app should
pop up in your web browser. 😀

#### 📦 Example: [`01_out-of-the-box.py`](examples/01_out-of-the-box.py)

Expand Down Expand Up @@ -82,21 +83,22 @@ visualizers = ["ner", "textcat"]
spacy_streamlit.visualize(models, default_text, visualizers)
```

| Argument | Type | Description |
| --------------------- | ------------------- | ---------------------------------------------------------------------------------------------------------------------- |
| `models` | List[str] | Names of loadable spaCy models (paths or package names). The models become selectable via a dropdown. |
| `default_text` | str | Default text to analyze on load. Defaults to `""`. |
| `visualizers` | List[str] | Names of visualizers to show. Defaults to `["parser", "ner", "textcat", "similarity", "tokens"]`. |
| `ner_labels` | Optional[List[str]] | NER labels to include. If not set, all labels present in the `"ner"` pipeline component will be used. |
| `ner_attrs` | List[str] | Span attributes shown in table of named entities. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. |
| `token_attrs` | List[str] | Token attributes to show in token visualizer. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. |
| `similarity_texts` | Tuple[str, str] | The default texts to compare in the similarity visualizer. Defaults to `("apple", "orange")`. |
| `show_json_doc` | bool | Show button to toggle JSON representation of the `Doc`. Defaults to `True`. |
| `show_model_meta` | bool | Show button to toggle model `meta.json`. Defaults to `True`. |
| `sidebar_title` | Optional[str] | Title shown in the sidebar. Defaults to `None`. |
| `sidebar_description` | Optional[str] | Description shown in the sidebar. Accepts Markdown-formatted text. |
| `show_logo` | bool | Show the spaCy logo in the sidebar. Defaults to `True`. |
| `color` | Optional[str] | Experimental: Primary color to use for some of the main UI elements (`None` to disable hack). Defaults to `"#09A3D5"`. |
| Argument | Type | Description |
| ------------------------ | -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `models` | List[str] / Dict[str, str] | Names of loadable spaCy models (paths or package names). The models become selectable via a dropdown. Can either be a list of names or the names mapped to descriptions to display in the dropdown. |
| `default_text` | str | Default text to analyze on load. Defaults to `""`. |
| `visualizers` | List[str] | Names of visualizers to show. Defaults to `["parser", "ner", "textcat", "similarity", "tokens"]`. |
| `ner_labels` | Optional[List[str]] | NER labels to include. If not set, all labels present in the `"ner"` pipeline component will be used. |
| `ner_attrs` | List[str] | Span attributes shown in table of named entities. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. |
| `token_attrs` | List[str] | Token attributes to show in token visualizer. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. |
| `similarity_texts` | Tuple[str, str] | The default texts to compare in the similarity visualizer. Defaults to `("apple", "orange")`. |
| `show_json_doc` | bool | Show button to toggle JSON representation of the `Doc`. Defaults to `True`. |
| `show_model_meta` | bool | Show button to toggle model `meta.json`. Defaults to `True`. |
| `show_visualizer_select` | bool | Show sidebar dropdown to select visualizers to display (based on enabled visualizers). Defaults to `False`. |
| `sidebar_title` | Optional[str] | Title shown in the sidebar. Defaults to `None`. |
| `sidebar_description` | Optional[str] | Description shown in the sidebar. Accepts Markdown-formatted text. |
| `show_logo` | bool | Show the spaCy logo in the sidebar. Defaults to `True`. |
| `color` | Optional[str] | Experimental: Primary color to use for some of the main UI elements (`None` to disable hack). Defaults to `"#09A3D5"`. |

#### <kbd>function</kbd> `visualize_parser`

Expand Down Expand Up @@ -144,7 +146,6 @@ visualize_ner(doc, labels=nlp.get_pipe("ner").labels)
| `sidebar_title` | Optional[str] | Title of the config settings in the sidebar. |
| `colors` | Dict[str,str] | A dictionary mapping labels to display colors ({"LABEL": "COLOR"}) |


#### <kbd>function</kbd> `visualize_textcat`

Visualize text categories predicted by a trained text classifier.
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[metadata]
version = 0.0.3
version = 0.1.0
description = Visualize spaCy with streamlit
url = https://github.com/explosion/spacy-streamlit
author = Explosion
Expand Down
73 changes: 55 additions & 18 deletions spacy_streamlit/visualizer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List, Sequence, Tuple, Optional, Dict
from typing import List, Sequence, Tuple, Optional, Dict, Union
import streamlit as st
import spacy
from spacy import displacy
Expand All @@ -17,7 +17,7 @@


def visualize(
models: List[str],
models: Union[List[str], Dict[str, str]],
default_text: str = "",
visualizers: List[str] = ["parser", "ner", "textcat", "similarity", "tokens"],
ner_labels: Optional[List[str]] = None,
Expand All @@ -26,6 +26,7 @@ def visualize(
token_attrs: List[str] = TOKEN_ATTRS,
show_json_doc: bool = True,
show_model_meta: bool = True,
show_visualizer_select: bool = False,
sidebar_title: Optional[str] = None,
sidebar_description: Optional[str] = None,
show_logo: bool = True,
Expand All @@ -42,38 +43,61 @@ def visualize(
if sidebar_description:
st.sidebar.markdown(sidebar_description)

spacy_model = st.sidebar.selectbox("Model name", models, key=f"{key}_visualize_models")
model_load_state = st.info(f"Loading model '{spacy_model}'...")
# Allow both dict of model name / description as well as lit of names
model_names = models
format_func = str
if isinstance(models, dict):
format_func = lambda name: models.get(name, name)
model_names = list(models.keys())

spacy_model = st.sidebar.selectbox(
"Pipeline",
model_names,
key=f"{key}_visualize_models",
format_func=format_func,
)
model_load_state = st.info(f"Loading pipeline '{spacy_model}'...")
nlp = load_model(spacy_model)
model_load_state.empty()

if show_visualizer_select:
active_visualizers = st.sidebar.multiselect(
"Visualizers",
options=visualizers,
default=list(visualizers),
key=f"{key}_viz_select",
)
else:
active_visualizers = visualizers

text = st.text_area("Text to analyze", default_text, key=f"{key}_visualize_text")
doc = process_text(spacy_model, text)

if "parser" in visualizers:
if "parser" in visualizers and "parser" in active_visualizers:
visualize_parser(doc, key=key)
if "ner" in visualizers:
if "ner" in visualizers and "ner" in active_visualizers:
ner_labels = ner_labels or nlp.get_pipe("ner").labels
visualize_ner(doc, labels=ner_labels, attrs=ner_attrs, key=key)
if "textcat" in visualizers:
if "textcat" in visualizers and "textcat" in active_visualizers:
visualize_textcat(doc)
if "similarity" in visualizers:
if "similarity" in visualizers and "similarity" in active_visualizers:
visualize_similarity(nlp, key=key)
if "tokens" in visualizers:
if "tokens" in visualizers and "tokens" in active_visualizers:
visualize_tokens(doc, attrs=token_attrs)

if show_json_doc:
st.header("JSON Doc")
if st.button("Show JSON Doc", key=f"{key}_visualize_show_json_doc"):
if st.checkbox("Show JSON Doc", key=f"{key}_visualize_show_json_doc"):
st.json(doc.to_json())

if show_model_meta:
st.header("JSON model meta")
if st.button("Show JSON model meta", key=f"{key}_visualize_show_model_meta"):
if st.checkbox("Show JSON model meta", key=f"{key}_visualize_show_model_meta"):
st.json(nlp.meta)

st.sidebar.markdown(
FOOTER, unsafe_allow_html=True,
FOOTER,
unsafe_allow_html=True,
)


Expand All @@ -89,10 +113,16 @@ def visualize_parser(
st.header(title)
if sidebar_title:
st.sidebar.header(sidebar_title)
split_sents = st.sidebar.checkbox("Split sentences", value=True, key=f"{key}_parser_split_sents")
split_sents = st.sidebar.checkbox(
"Split sentences", value=True, key=f"{key}_parser_split_sents"
)
options = {
"collapse_punct": st.sidebar.checkbox("Collapse punctuation", value=True, key=f"{key}_parser_collapse_punct"),
"collapse_phrases": st.sidebar.checkbox("Collapse phrases", key=f"{key}_parser_collapse_phrases"),
"collapse_punct": st.sidebar.checkbox(
"Collapse punctuation", value=True, key=f"{key}_parser_collapse_punct"
),
"collapse_phrases": st.sidebar.checkbox(
"Collapse phrases", key=f"{key}_parser_collapse_phrases"
),
"compact": st.sidebar.checkbox("Compact mode", key=f"{key}_parser_compact"),
}
docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
Expand Down Expand Up @@ -122,7 +152,10 @@ def visualize_ner(
if sidebar_title:
st.sidebar.header(sidebar_title)
label_select = st.sidebar.multiselect(
"Entity labels", options=labels, default=list(labels), key=f"{key}_ner_label_select"
"Entity labels",
options=labels,
default=list(labels),
key=f"{key}_ner_label_select",
)
html = displacy.render(
doc, style="ent", options={"ents": label_select, "colors": colors}
Expand Down Expand Up @@ -165,8 +198,12 @@ def visualize_similarity(
if not meta.get("width", 0):
st.warning("No vectors available in the model.")
st.code(meta)
text1 = st.text_input("Text or word 1", default_texts[0], key=f"{key}_similarity_text1")
text2 = st.text_input("Text or word 2", default_texts[1], key=f"{key}_similarity_text2")
text1 = st.text_input(
"Text or word 1", default_texts[0], key=f"{key}_similarity_text1"
)
text2 = st.text_input(
"Text or word 2", default_texts[1], key=f"{key}_similarity_text2"
)
doc1 = nlp.make_doc(text1)
doc2 = nlp.make_doc(text2)
similarity = doc1.similarity(doc2)
Expand Down

0 comments on commit 425039f

Please sign in to comment.