diff --git a/README.md b/README.md index 8b8a6bc..db33878 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,8 @@ default_text = "Sundar Pichai is the CEO of Google." spacy_streamlit.visualize(models, default_text) ``` -You can then run your app with `streamlit run streamlit_app.py`. The app should pop up in your web browser. 😀 +You can then run your app with `streamlit run streamlit_app.py`. The app should +pop up in your web browser. 😀 #### 📦 Example: [`01_out-of-the-box.py`](examples/01_out-of-the-box.py) @@ -82,21 +83,22 @@ visualizers = ["ner", "textcat"] spacy_streamlit.visualize(models, default_text, visualizers) ``` -| Argument | Type | Description | -| --------------------- | ------------------- | ---------------------------------------------------------------------------------------------------------------------- | -| `models` | List[str] | Names of loadable spaCy models (paths or package names). The models become selectable via a dropdown. | -| `default_text` | str | Default text to analyze on load. Defaults to `""`. | -| `visualizers` | List[str] | Names of visualizers to show. Defaults to `["parser", "ner", "textcat", "similarity", "tokens"]`. | -| `ner_labels` | Optional[List[str]] | NER labels to include. If not set, all labels present in the `"ner"` pipeline component will be used. | -| `ner_attrs` | List[str] | Span attributes shown in table of named entities. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. | -| `token_attrs` | List[str] | Token attributes to show in token visualizer. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. | -| `similarity_texts` | Tuple[str, str] | The default texts to compare in the similarity visualizer. Defaults to `("apple", "orange")`. | -| `show_json_doc` | bool | Show button to toggle JSON representation of the `Doc`. Defaults to `True`. | -| `show_model_meta` | bool | Show button to toggle model `meta.json`. Defaults to `True`. | -| `sidebar_title` | Optional[str] | Title shown in the sidebar. Defaults to `None`. | -| `sidebar_description` | Optional[str] | Description shown in the sidebar. Accepts Markdown-formatted text. | -| `show_logo` | bool | Show the spaCy logo in the sidebar. Defaults to `True`. | -| `color` | Optional[str] | Experimental: Primary color to use for some of the main UI elements (`None` to disable hack). Defaults to `"#09A3D5"`. | +| Argument | Type | Description | +| ------------------------ | -------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `models` | List[str] / Dict[str, str] | Names of loadable spaCy models (paths or package names). The models become selectable via a dropdown. Can either be a list of names or the names mapped to descriptions to display in the dropdown. | +| `default_text` | str | Default text to analyze on load. Defaults to `""`. | +| `visualizers` | List[str] | Names of visualizers to show. Defaults to `["parser", "ner", "textcat", "similarity", "tokens"]`. | +| `ner_labels` | Optional[List[str]] | NER labels to include. If not set, all labels present in the `"ner"` pipeline component will be used. | +| `ner_attrs` | List[str] | Span attributes shown in table of named entities. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. | +| `token_attrs` | List[str] | Token attributes to show in token visualizer. See [`visualizer.py`](spacy_streamlit/visualizer.py) for defaults. | +| `similarity_texts` | Tuple[str, str] | The default texts to compare in the similarity visualizer. Defaults to `("apple", "orange")`. | +| `show_json_doc` | bool | Show button to toggle JSON representation of the `Doc`. Defaults to `True`. | +| `show_model_meta` | bool | Show button to toggle model `meta.json`. Defaults to `True`. | +| `show_visualizer_select` | bool | Show sidebar dropdown to select visualizers to display (based on enabled visualizers). Defaults to `False`. | +| `sidebar_title` | Optional[str] | Title shown in the sidebar. Defaults to `None`. | +| `sidebar_description` | Optional[str] | Description shown in the sidebar. Accepts Markdown-formatted text. | +| `show_logo` | bool | Show the spaCy logo in the sidebar. Defaults to `True`. | +| `color` | Optional[str] | Experimental: Primary color to use for some of the main UI elements (`None` to disable hack). Defaults to `"#09A3D5"`. | #### function `visualize_parser` @@ -144,7 +146,6 @@ visualize_ner(doc, labels=nlp.get_pipe("ner").labels) | `sidebar_title` | Optional[str] | Title of the config settings in the sidebar. | | `colors` | Dict[str,str] | A dictionary mapping labels to display colors ({"LABEL": "COLOR"}) | - #### function `visualize_textcat` Visualize text categories predicted by a trained text classifier. diff --git a/setup.cfg b/setup.cfg index 61adb8b..8db8492 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [metadata] -version = 0.0.3 +version = 0.1.0 description = Visualize spaCy with streamlit url = https://github.com/explosion/spacy-streamlit author = Explosion diff --git a/spacy_streamlit/visualizer.py b/spacy_streamlit/visualizer.py index 199f46c..0fea012 100644 --- a/spacy_streamlit/visualizer.py +++ b/spacy_streamlit/visualizer.py @@ -1,4 +1,4 @@ -from typing import List, Sequence, Tuple, Optional, Dict +from typing import List, Sequence, Tuple, Optional, Dict, Union import streamlit as st import spacy from spacy import displacy @@ -17,7 +17,7 @@ def visualize( - models: List[str], + models: Union[List[str], Dict[str, str]], default_text: str = "", visualizers: List[str] = ["parser", "ner", "textcat", "similarity", "tokens"], ner_labels: Optional[List[str]] = None, @@ -26,6 +26,7 @@ def visualize( token_attrs: List[str] = TOKEN_ATTRS, show_json_doc: bool = True, show_model_meta: bool = True, + show_visualizer_select: bool = False, sidebar_title: Optional[str] = None, sidebar_description: Optional[str] = None, show_logo: bool = True, @@ -42,38 +43,61 @@ def visualize( if sidebar_description: st.sidebar.markdown(sidebar_description) - spacy_model = st.sidebar.selectbox("Model name", models, key=f"{key}_visualize_models") - model_load_state = st.info(f"Loading model '{spacy_model}'...") + # Allow both dict of model name / description as well as lit of names + model_names = models + format_func = str + if isinstance(models, dict): + format_func = lambda name: models.get(name, name) + model_names = list(models.keys()) + + spacy_model = st.sidebar.selectbox( + "Pipeline", + model_names, + key=f"{key}_visualize_models", + format_func=format_func, + ) + model_load_state = st.info(f"Loading pipeline '{spacy_model}'...") nlp = load_model(spacy_model) model_load_state.empty() + if show_visualizer_select: + active_visualizers = st.sidebar.multiselect( + "Visualizers", + options=visualizers, + default=list(visualizers), + key=f"{key}_viz_select", + ) + else: + active_visualizers = visualizers + text = st.text_area("Text to analyze", default_text, key=f"{key}_visualize_text") doc = process_text(spacy_model, text) - if "parser" in visualizers: + if "parser" in visualizers and "parser" in active_visualizers: visualize_parser(doc, key=key) - if "ner" in visualizers: + if "ner" in visualizers and "ner" in active_visualizers: ner_labels = ner_labels or nlp.get_pipe("ner").labels visualize_ner(doc, labels=ner_labels, attrs=ner_attrs, key=key) - if "textcat" in visualizers: + if "textcat" in visualizers and "textcat" in active_visualizers: visualize_textcat(doc) - if "similarity" in visualizers: + if "similarity" in visualizers and "similarity" in active_visualizers: visualize_similarity(nlp, key=key) - if "tokens" in visualizers: + if "tokens" in visualizers and "tokens" in active_visualizers: visualize_tokens(doc, attrs=token_attrs) if show_json_doc: st.header("JSON Doc") - if st.button("Show JSON Doc", key=f"{key}_visualize_show_json_doc"): + if st.checkbox("Show JSON Doc", key=f"{key}_visualize_show_json_doc"): st.json(doc.to_json()) if show_model_meta: st.header("JSON model meta") - if st.button("Show JSON model meta", key=f"{key}_visualize_show_model_meta"): + if st.checkbox("Show JSON model meta", key=f"{key}_visualize_show_model_meta"): st.json(nlp.meta) st.sidebar.markdown( - FOOTER, unsafe_allow_html=True, + FOOTER, + unsafe_allow_html=True, ) @@ -89,10 +113,16 @@ def visualize_parser( st.header(title) if sidebar_title: st.sidebar.header(sidebar_title) - split_sents = st.sidebar.checkbox("Split sentences", value=True, key=f"{key}_parser_split_sents") + split_sents = st.sidebar.checkbox( + "Split sentences", value=True, key=f"{key}_parser_split_sents" + ) options = { - "collapse_punct": st.sidebar.checkbox("Collapse punctuation", value=True, key=f"{key}_parser_collapse_punct"), - "collapse_phrases": st.sidebar.checkbox("Collapse phrases", key=f"{key}_parser_collapse_phrases"), + "collapse_punct": st.sidebar.checkbox( + "Collapse punctuation", value=True, key=f"{key}_parser_collapse_punct" + ), + "collapse_phrases": st.sidebar.checkbox( + "Collapse phrases", key=f"{key}_parser_collapse_phrases" + ), "compact": st.sidebar.checkbox("Compact mode", key=f"{key}_parser_compact"), } docs = [span.as_doc() for span in doc.sents] if split_sents else [doc] @@ -122,7 +152,10 @@ def visualize_ner( if sidebar_title: st.sidebar.header(sidebar_title) label_select = st.sidebar.multiselect( - "Entity labels", options=labels, default=list(labels), key=f"{key}_ner_label_select" + "Entity labels", + options=labels, + default=list(labels), + key=f"{key}_ner_label_select", ) html = displacy.render( doc, style="ent", options={"ents": label_select, "colors": colors} @@ -165,8 +198,12 @@ def visualize_similarity( if not meta.get("width", 0): st.warning("No vectors available in the model.") st.code(meta) - text1 = st.text_input("Text or word 1", default_texts[0], key=f"{key}_similarity_text1") - text2 = st.text_input("Text or word 2", default_texts[1], key=f"{key}_similarity_text2") + text1 = st.text_input( + "Text or word 1", default_texts[0], key=f"{key}_similarity_text1" + ) + text2 = st.text_input( + "Text or word 2", default_texts[1], key=f"{key}_similarity_text2" + ) doc1 = nlp.make_doc(text1) doc2 = nlp.make_doc(text2) similarity = doc1.similarity(doc2)