Skip to content

Commit

Permalink
fmt
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-tzayats committed Dec 9, 2024
1 parent 417a816 commit 98dc6b5
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 27 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ run_mypy: ## Run mypy
mypy --config-file=mypy.ini .

run_flake8: ## Run flake8
flake8 --ignore=E203,E501,W503 --exclude=venv,pyvenv,tmp,*_pb2.py,*_pb2.pyi,images/*/src .
flake8 --ignore=E203,E501,W503 --exclude=venv,.venv,pyvenv,tmp,*_pb2.py,*_pb2.pyi,images/*/src .

check_black: ## Check to see if files would be updated with black.
# Exclude pyvenv and all generated protobuf code.
Expand All @@ -49,10 +49,10 @@ run_black: ## Run black to format files.
black --exclude="venv|pyvenv|tmp|.*_pb2.py|.*_pb2.pyi" .

check_isort: ## Check if files would be updated with isort.
isort --profile black --check --skip=venv --skip=pyvenv --skip-glob='*_pb2.py*' .
isort --profile black --check --skip=venv --skip=pyvenv --skip=.venv --skip-glob='*_pb2.py*' .

run_isort: ## Run isort to update imports.
isort --profile black --skip=pyvenv --skip=venv --skip=tmp --skip-glob='*_pb2.py*' .
isort --profile black --skip=pyvenv --skip=venv --skip=tmp --skip=.venv --skip-glob='*_pb2.py*' .


fmt_lint: shell ## lint/fmt in current python environment
Expand Down
2 changes: 1 addition & 1 deletion app_utils/shared_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from dataclasses import dataclass
from enum import Enum
from io import StringIO
from typing import Any, Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Union

import pandas as pd
import streamlit as st
Expand Down
52 changes: 29 additions & 23 deletions journeys/iteration.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,6 @@ def evaluation_mode_show() -> None:
columns=["Summary Statistic", "Value"],
)
st.dataframe(summary_stats, hide_index=True)


send_analyst_requests()
run_sql_queries()
Expand Down Expand Up @@ -927,10 +926,10 @@ def _get_content(
start_time = time.time()
analyst_results = []

for i, (id, row) in enumerate(eval_table_frame.iterrows(), start=1):
for i, (row_id, row_id) in enumerate(eval_table_frame.iterrows(), start=1):
status_text.text(f"Sending request {i}/{total_requests} to Analyst...")
messages = [
{"role": "user", "content": [{"type": "text", "text": row["QUERY"]}]}
{"role": "user", "content": [{"type": "text", "text": row_id["QUERY"]}]}
]
semantic_model = proto_to_yaml(st.session_state.semantic_model)
try:
Expand All @@ -942,12 +941,12 @@ def _get_content(
response_text = _get_content(response, item_type="text", key="text")
response_sql = _get_content(response, item_type="sql", key="statement")
analyst_results.append(
dict(ID=id, ANALYST_TEXT=response_text, ANALYST_SQL=response_sql)
dict(ID=row_id, ANALYST_TEXT=response_text, ANALYST_SQL=response_sql)
)
except Exception as e:
import traceback

st.error(f"Problem with {id}: {e} \n{traceback.format_exc()}")
st.error(f"Problem with {row_id}: {e} \n{traceback.format_exc()}")

progress_bar.progress(i / total_requests)
time.sleep(0.1)
Expand Down Expand Up @@ -1098,7 +1097,7 @@ def _safe_re_search(x, filter): # type: ignore[no-untyped-def]
try:
return re.search(filter, x).group(1).strip() # type: ignore[union-attr]
except Exception as e:
return f"Could Not Parse LLM Judge Response: {x}"
return f"Could Not Parse LLM Judge Response: {x} with error: {e}"

llm_judge_frame["EXPLANATION"] = llm_judge_frame["LLM_JUDGE"].apply(
_safe_re_search, args=(reason_filter,)
Expand All @@ -1119,36 +1118,36 @@ def visualize_eval_results(frame: pd.DataFrame) -> None:
st.markdown(
f"###### Results: {n_correct} out of {n_questions} questions correct with accuracy {accuracy:.2f}%"
)
for id, row in frame.iterrows():
for id, frame_row in frame.iterrows():
match_emoji = "✅" if row["CORRECT"] else "❌"
with st.expander(f"Row ID: {id} {match_emoji}"):
st.write(f"Input Query: {row['QUERY']}")
st.write(row["ANALYST_TEXT"].replace("\n", " "))
st.write(f"Input Query: {frame_row['QUERY']}")
st.write(frame_row["ANALYST_TEXT"].replace("\n", " "))

col1, col2 = st.columns(2)

with col1:
st.write("Analyst SQL")
st.code(row["ANALYST_SQL"], language="sql")
st.code(frame_row["ANALYST_SQL"], language="sql")

with col2:
st.write("Golden SQL")
st.code(row["GOLD_SQL"], language="sql")
st.code(frame_row["GOLD_SQL"], language="sql")

col1, col2 = st.columns(2)
with col1:
if isinstance(row["ANALYST_RESULT"], str):
st.error(row["ANALYST_RESULT"])
if isinstance(frame_row["ANALYST_RESULT"], str):
st.error(frame_row["ANALYST_RESULT"])
else:
st.write(row["ANALYST_RESULT"])
st.write(frame_row["ANALYST_RESULT"])

with col2:
if isinstance(row["GOLD_RESULT"], str):
st.error(row["GOLD_RESULT"])
if isinstance(frame_row["GOLD_RESULT"], str):
st.error(frame_row["GOLD_RESULT"])
else:
st.write(row["GOLD_RESULT"])
st.write(frame_row["GOLD_RESULT"])

st.write(f"**Explanation**: {row['EXPLANATION']}")
st.write(f"**Explanation**: {frame_row['EXPLANATION']}")


def result_comparisons() -> None:
Expand Down Expand Up @@ -1194,10 +1193,13 @@ def result_comparisons() -> None:
)
else:
exact_match = _results_contain_gold_data(
analyst_frame=res_row["ANALYST_RESULT"], gold_frame=res_row["GOLD_RESULT"]
analyst_frame=res_row["ANALYST_RESULT"],
gold_frame=res_row["GOLD_RESULT"],
)
matches[row_id] = exact_match
explanations[row_id] = "Data matches exactly" if exact_match else use_llm_judge
explanations[row_id] = (
"Data matches exactly" if exact_match else use_llm_judge
)

frame["CORRECT"] = matches
frame["EXPLANATION"] = explanations
Expand All @@ -1206,7 +1208,7 @@ def result_comparisons() -> None:

status_text.text("Calling LLM Judge...")
llm_judge_frame = _llm_judge(frame=filtered_frame)

for col in ("CORRECT", "EXPLANATION"):
frame[col] = llm_judge_frame[col].combine_first(frame[col])

Expand All @@ -1223,8 +1225,12 @@ def result_comparisons() -> None:
frame["MODEL_HASH"] = hash(st.session_state["working_yml"])

# Save results to frame as string
frame["ANALYST_RESULT"] = frame["ANALYST_RESULT"].apply(lambda x: x.to_string(index=False) if isinstance(x, pd.DataFrame) else x)
frame["GOLD_RESULT"] = frame["GOLD_RESULT"].apply(lambda x: x.to_string(index=False) if isinstance(x, pd.DataFrame) else x)
frame["ANALYST_RESULT"] = frame["ANALYST_RESULT"].apply(
lambda x: x.to_string(index=False) if isinstance(x, pd.DataFrame) else x
)
frame["GOLD_RESULT"] = frame["GOLD_RESULT"].apply(
lambda x: x.to_string(index=False) if isinstance(x, pd.DataFrame) else x
)

frame = frame.reset_index()[list(RESULTS_TABLE_SCHEMA)]
write_pandas(
Expand Down

0 comments on commit 98dc6b5

Please sign in to comment.