diff --git a/src/ia_collection_analyzer/streamlit.py b/src/ia_collection_analyzer/streamlit.py index db87a2e5..8b2c05b8 100644 --- a/src/ia_collection_analyzer/streamlit.py +++ b/src/ia_collection_analyzer/streamlit.py @@ -57,6 +57,7 @@ data_transform_text = st.text("cleaning data...") # drop columns with 80%+ nan items_pd = items_pd.dropna(axis=1, thresh=0.8 * len(items_pd)) +items_pd = items_pd.dropna(axis=0, thresh=0.7 * len(items_pd.columns)) # drop columns with different types inner. # for col in items_pd.columns: # items_pd[col] = items_pd[col].apply(lambda x: x if isinstance(x, type(items_pd[col][0])) else np.nan) @@ -68,7 +69,12 @@ data_transform_text.text("Data transformation and cleaning complete!") st.write("The collection contains the following items:") -st.write(items_pd.head(10)) # display the first 10 rows of the dataframe +try: + st.write(items_pd.head(10)) +except Exception as e: + st.markdown("Failed to display top 10 lines. Only first will be shown.") + st.write(items_pd.head(1)) + st.write(e) st.header("Selecting columns to analyze") st.write("Select additional columns you want to analyze:") @@ -78,6 +84,7 @@ selected_columns = st.multiselect("Select columns:", seleactable_columns, default=[]) filtered_pd = items_pd[REQUIRED_METADATA + selected_columns] +filtered_pd = filtered_pd.dropna(axis=0, how="any") st.write("Preview of the selected columns:") -st.write(items_pd.head(10)) +st.write(filtered_pd.head(30))