diff --git a/journeys/evaluation.py b/journeys/evaluation.py
index 00eb7c7c..a51456a1 100644
--- a/journeys/evaluation.py
+++ b/journeys/evaluation.py
@@ -6,7 +6,9 @@
 
 import pandas as pd
 import snowflake.snowpark._internal.utils as snowpark_utils
+import sqlglot
 import streamlit as st
+import yaml
 from loguru import logger
 from snowflake.connector.pandas_tools import write_pandas
 
@@ -16,6 +18,7 @@
     schema_selector_container,
     set_sit_query_tag,
     table_selector_container,
+    update_last_validated_model,
     validate_table_exist,
     validate_table_schema,
 )
@@ -26,6 +29,7 @@
     fetch_table,
     get_table_hash,
 )
+from semantic_model_generator.validate_model import validate
 
 EVALUATION_TABLE_SCHEMA = {
     "ID": "VARCHAR",
@@ -46,6 +50,7 @@
     "MODEL_HASH": "VARCHAR",
     "SEMANTIC_MODEL_STRING": "VARCHAR",
     "EVAL_TABLE": "VARCHAR",
+    "EVAL_HASH": "VARCHAR",
 }
 
 LLM_JUDGE_PROMPT_TEMPLATE = """\
@@ -85,13 +90,27 @@ def visualize_eval_results(frame: pd.DataFrame) -> None:
 
             col1, col2 = st.columns(2)
 
+            try:
+                analyst_sql = sqlglot.parse_one(row["ANALYST_SQL"], dialect="snowflake")
+                analyst_sql = analyst_sql.sql(dialect="snowflake", pretty=True)
+            except Exception as e:
+                logger.warning(f"Error parsing analyst SQL: {e} for {row_id}")
+                analyst_sql = row["ANALYST_SQL"]
+
+            try:
+                gold_sql = sqlglot.parse_one(row["GOLD_SQL"], dialect="snowflake")
+                gold_sql = gold_sql.sql(dialect="snowflake", pretty=True)
+            except Exception as e:
+                logger.warning(f"Error parsing gold SQL: {e} for {row_id}")
+                gold_sql = row["GOLD_SQL"]
+
             with col1:
                 st.write("Analyst SQL")
-                st.code(row["ANALYST_SQL"], language="sql")
+                st.code(analyst_sql, language="sql")
 
             with col2:
                 st.write("Golden SQL")
-                st.code(row["GOLD_SQL"], language="sql")
+                st.code(gold_sql, language="sql")
 
             col1, col2 = st.columns(2)
             with col1:
@@ -269,6 +288,7 @@ def result_comparisons() -> None:
 def write_eval_results(frame: pd.DataFrame) -> None:
     frame_to_write = frame.copy()
     frame_to_write["TIMESTAMP"] = st.session_state["eval_timestamp"]
+    frame_to_write["EVAL_HASH"] = st.session_state["eval_hash"]
     frame_to_write["EVAL_TABLE"] = st.session_state["eval_table"]
     frame_to_write["EVAL_TABLE_HASH"] = st.session_state["eval_table_hash"]
     frame_to_write["MODEL_HASH"] = st.session_state["semantic_model_hash"]
@@ -550,14 +570,12 @@ def evaluation_data_dialog() -> None:
         st.session_state["results_eval_table"] = st.session_state[
             "selected_results_eval_table"
         ]
-        # clear the results table if it exists
-        if "total_eval_frame" in st.session_state:
-            del st.session_state["total_eval_frame"]
+        clear_evaluation_data()
 
         st.rerun()
 
 
-def clear_evaluation_data() -> None:
+def clear_evaluation_selection() -> None:
     session_states = (
         "selected_eval_database",
         "selected_eval_schema",
@@ -574,9 +592,21 @@ def clear_evaluation_data() -> None:
             del st.session_state[feature]
 
 
+def clear_evaluation_data() -> None:
+    session_states = (
+        "total_eval_frame",
+        "eval_accuracy",
+        "analyst_results_frame",
+        "query_results_frame",
+    )
+    for feature in session_states:
+        if feature in st.session_state:
+            del st.session_state[feature]
+
+
 def evaluation_mode_show() -> None:
 
-    if st.button("Select Evaluation Tables", on_click=clear_evaluation_data):
+    if st.button("Select Evaluation Tables", on_click=clear_evaluation_selection):
         evaluation_data_dialog()
 
     st.write(
@@ -609,13 +639,14 @@ def evaluation_mode_show() -> None:
             [
                 ["Evaluation Table Hash", st.session_state["eval_table_hash"]],
                 ["Semantic Model Hash", st.session_state["semantic_model_hash"]],
+                ["Evaluation Run Hash", st.session_state["eval_hash"]],
                 ["Timestamp", st.session_state["eval_timestamp"]],
                 ["Accuracy", f"{st.session_state['eval_accuracy']:.2f}%"],
             ],
             columns=["Summary Statistic", "Value"],
         )
         if model_changed_test:
-            st.write("Model has changed since last evaluation run.")
+            st.warning("Model has changed since last evaluation run.")
             st.markdown("#### Previous Evaluation Run Summary")
         else:
             st.markdown("#### Current Evaluation Run Summary")
@@ -630,19 +661,33 @@ def run_evaluation() -> None:
         action="evaluation_run",
     )
     current_hash = generate_hash(st.session_state["working_yml"])
-    model_changed_test = ("semantic_model_hash" in st.session_state) and (
+    model_changed_test = ("semantic_model_hash" not in st.session_state) or (
         current_hash != st.session_state["semantic_model_hash"]
     )
-    if (
-        "validated" in st.session_state and not st.session_state["validated"]
-    ) or model_changed_test:
-        st.error("Please validate your semantic model before evaluating.")
+    placeholder = st.empty()
+
+    if not model_changed_test and "total_eval_frame" in st.session_state:
+        placeholder.write("Model has not changed since last evaluation run.")
         return
-    if "total_eval_frame" in st.session_state:
-        del st.session_state["total_eval_frame"]
+
+    if not st.session_state.validated or model_changed_test:
+        placeholder.write("Validating model...")
+        try:
+            # try loading the yaml
+            _ = yaml.safe_load(st.session_state["working_yml"])
+            # try validating the yaml using analyst
+            validate(st.session_state["working_yml"], get_snowflake_connection())
+            st.session_state.validated = True
+            update_last_validated_model()
+        except Exception as e:
+            placeholder.error(f"Could not validate model ❌ with error: {e}")
+            return
+    placeholder.write("Model validated ✅")
+    clear_evaluation_data()
     st.session_state["semantic_model_hash"] = current_hash
     st.write("Running evaluation...")
     st.session_state["eval_timestamp"] = time.strftime("%Y-%m-%d %H:%M:%S")
+    st.session_state["eval_hash"] = generate_hash(st.session_state["eval_timestamp"])
     send_analyst_requests()
     run_sql_queries()
     result_comparisons()