Skip to content

Commit

Permalink
fix: improve data cleaning and error handling in Streamlit display
Browse files Browse the repository at this point in the history
  • Loading branch information
Ovler-Young committed Nov 20, 2024
1 parent 9786ffd commit 9046343
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions src/ia_collection_analyzer/streamlit.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
data_transform_text = st.text("cleaning data...")
# drop columns with 80%+ nan
items_pd = items_pd.dropna(axis=1, thresh=0.8 * len(items_pd))
items_pd = items_pd.dropna(axis=0, thresh=0.7 * len(items_pd.columns))
# drop columns with different types inner.
# for col in items_pd.columns:
# items_pd[col] = items_pd[col].apply(lambda x: x if isinstance(x, type(items_pd[col][0])) else np.nan)
Expand All @@ -68,7 +69,12 @@
data_transform_text.text("Data transformation and cleaning complete!")

st.write("The collection contains the following items:")
st.write(items_pd.head(10)) # display the first 10 rows of the dataframe
try:
st.write(items_pd.head(10))
except Exception as e:
st.markdown("Failed to display top 10 lines. Only first will be shown.")
st.write(items_pd.head(1))
st.write(e)

st.header("Selecting columns to analyze")
st.write("Select additional columns you want to analyze:")
Expand All @@ -78,6 +84,7 @@
selected_columns = st.multiselect("Select columns:", seleactable_columns, default=[])

filtered_pd = items_pd[REQUIRED_METADATA + selected_columns]
filtered_pd = filtered_pd.dropna(axis=0, how="any")

st.write("Preview of the selected columns:")
st.write(items_pd.head(10))
st.write(filtered_pd.head(30))

0 comments on commit 9046343

Please sign in to comment.