From f84923bfb9633b470799fe49840c4d6696108253 Mon Sep 17 00:00:00 2001 From: Troy Date: Fri, 11 Oct 2024 16:51:08 -0600 Subject: [PATCH 1/4] query report --- api/internal_api.py | 24 ++++++------- scripts/query_report.py | 78 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 12 deletions(-) create mode 100644 scripts/query_report.py diff --git a/api/internal_api.py b/api/internal_api.py index d509530..e3fdcd7 100644 --- a/api/internal_api.py +++ b/api/internal_api.py @@ -33,7 +33,7 @@ def get_db_config(streamlit=True): } -def get_connection(db_config): +def _get_connection(db_config): connection_string = f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['dbname']}" engine = sqlalchemy.create_engine(connection_string) conn = engine.connect() @@ -73,7 +73,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.engine.dispose() @contextmanager - def get_connection( + def _get_connection( self, ) -> Generator[sqlalchemy.engine.base.Connection, None, None]: """Context manager for database connections.""" @@ -93,15 +93,15 @@ def _run_query(self, query: str) -> pd.DataFrame: Returns: pandas.DataFrame: The query results. """ - with self.get_connection() as conn: + with self._get_connection() as conn: return pd.read_sql_query(query, conn) # queries def get_volume( self, - chain: str, start_date: datetime, end_date: datetime, + chain: str = "arbitrum_mainnet", resolution: str = "daily", ) -> pd.DataFrame: """ @@ -125,7 +125,7 @@ def get_volume( WHERE ts >= '{start_date}' and ts <= '{end_date}' ORDER BY ts """ - with self.get_connection() as conn: + with self._get_connection() as conn: return pd.read_sql_query(query, conn) def get_core_stats( @@ -157,7 +157,7 @@ def get_core_stats( GROUP BY ts, chain ORDER BY ts """ - with self.get_connection() as conn: + with self._get_connection() as conn: return pd.read_sql_query(query, conn) def get_core_stats_by_collateral( @@ -202,7 +202,7 @@ def get_core_stats_by_collateral( ts >= '{start_date}' and ts <= '{end_date}' ORDER BY ts """ - with self.get_connection() as conn: + with self._get_connection() as conn: return pd.read_sql_query(query, conn) def get_perps_stats( @@ -236,7 +236,7 @@ def get_perps_stats( ts >= '{start_date}' and ts <= '{end_date}' ORDER BY ts """ - with self.get_connection() as conn: + with self._get_connection() as conn: return pd.read_sql_query(query, conn) def get_perps_markets_history( @@ -255,7 +255,7 @@ def get_perps_markets_history( Returns: pandas.DataFrame: Perps markets history with columns: - 'ts', 'chain', 'market_symbol', 'size_usd', 'long_oi_pct', 'short_oi_pct' + 'ts', 'chain', 'market_symbol', 'total_oi_usd', 'long_oi_pct', 'short_oi_pct' """ chain_label = self.SUPPORTED_CHAINS[chain] query = f""" @@ -263,7 +263,7 @@ def get_perps_markets_history( ts, '{chain_label}' AS chain, CONCAT(market_symbol, ' (', '{chain_label}', ')') as market_symbol, - size_usd, + total_oi_usd, long_oi_pct, short_oi_pct FROM {self.environment}_{chain}.fct_perp_market_history_{chain} @@ -271,7 +271,7 @@ def get_perps_markets_history( ts >= '{start_date}' and ts <= '{end_date}' ORDER BY ts """ - with self.get_connection() as conn: + with self._get_connection() as conn: return pd.read_sql_query(query, conn) def get_snx_token_buyback( @@ -302,5 +302,5 @@ def get_snx_token_buyback( ts >= '{start_date}' and ts <= '{end_date}' ORDER BY ts """ - with self.get_connection() as conn: + with self._get_connection() as conn: return pd.read_sql_query(query, conn) diff --git a/scripts/query_report.py b/scripts/query_report.py new file mode 100644 index 0000000..14f23ba --- /dev/null +++ b/scripts/query_report.py @@ -0,0 +1,78 @@ +import time +from datetime import datetime, timedelta +from api.internal_api import SynthetixAPI, get_db_config + + +def time_query(api, query_name, *args, **kwargs): + start_time = time.time() + getattr(api, query_name)(*args, **kwargs) + end_time = time.time() + return end_time - start_time + + +def run_benchmarks(api, num_runs=3): + benchmarks = {} + + # Define test scenarios + end_date = datetime.now() + start_date_1d = end_date - timedelta(days=1) + start_date_7d = end_date - timedelta(days=7) + start_date_30d = end_date - timedelta(days=30) + + # Get the names of the queries from the SynthetixAPI class + queries = [ + method + for method in dir(api) + if method.startswith("get_") and callable(getattr(api, method)) + ] + v3_queries = [query for query in queries if "v2" not in query] + v2_queries = [query for query in queries if "v2" in query] + + scenarios = [ + ( + query_name, + { + "start_date": start_date, + "end_date": end_date, + "chain": chain, + }, + ) + for query_name in v3_queries + for start_date in [start_date_1d, start_date_7d, start_date_30d] + for chain in ["arbitrum_mainnet", "base_mainnet"] + ] + + for query_name, params in scenarios: + benchmarks[f"{query_name} - {params}"] = [] + for _ in range(num_runs): + try: + execution_time = time_query(api, query_name, **params) + benchmarks[f"{query_name} - {params}"].append(execution_time) + except: + pass + + return benchmarks + + +def print_report(benchmarks): + print("SynthetixAPI Benchmark Report") + print("=============================") + for query, times in benchmarks.items(): + if len(times) == 0: + print(f"\n{query}") + print(" No data available") + continue + + avg_time = sum(times) / len(times) + print(f"\n{query}") + print(f" Average execution time: {avg_time:.4f} seconds") + print(f" Min execution time: {min(times):.4f} seconds") + print(f" Max execution time: {max(times):.4f} seconds") + + +if __name__ == "__main__": + db_config = get_db_config(streamlit=False) + api = SynthetixAPI(db_config, environment="prod", streamlit=False) + + benchmarks = run_benchmarks(api) + print_report(benchmarks) From 42edc6d82dce815d37962830e588b2249a12fbaf Mon Sep 17 00:00:00 2001 From: Troy Date: Wed, 23 Oct 2024 13:03:04 -0600 Subject: [PATCH 2/4] fix api methods --- api/internal_api.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/api/internal_api.py b/api/internal_api.py index 11c57c0..d09ead7 100644 --- a/api/internal_api.py +++ b/api/internal_api.py @@ -16,6 +16,7 @@ def get_db_config(streamlit=True): DB_PASS = st.secrets.database.DB_PASS DB_HOST = st.secrets.database.DB_HOST DB_PORT = st.secrets.database.DB_PORT + DB_ENV = st.secrets.database.DB_ENV else: load_dotenv() DB_NAME = os.environ.get("DB_NAME") @@ -23,6 +24,7 @@ def get_db_config(streamlit=True): DB_PASS = os.environ.get("DB_PASS") DB_HOST = os.environ.get("DB_HOST") DB_PORT = os.environ.get("DB_PORT") + DB_ENV = os.environ.get("DB_ENV") return { "dbname": DB_NAME, @@ -30,6 +32,7 @@ def get_db_config(streamlit=True): "password": DB_PASS, "host": DB_HOST, "port": DB_PORT, + "env": DB_ENV, } @@ -57,8 +60,13 @@ def __init__( Args: environment (str): The environment to query data for ('prod' or 'dev') """ - self.environment = environment self.db_config = get_db_config(streamlit) + + if db_config["env"] is not None: + self.environment = self.db_config["env"] + else: + self.environment = environment + self.engine = self._create_engine() self.Session = sessionmaker(bind=self.engine) @@ -239,7 +247,7 @@ def get_core_account_activity( GROUP BY 1, 2, 3 ORDER BY 1 """ - with self.get_connection() as conn: + with self._get_connection() as conn: return pd.read_sql_query(query, conn) def get_core_nof_stakers( @@ -270,7 +278,7 @@ def get_core_nof_stakers( WHERE date >= '{start_date}' and date <= '{end_date}' ORDER BY date """ - with self.get_connection() as conn: + with self._get_connection() as conn: return pd.read_sql_query(query, conn) def get_perps_stats( @@ -339,7 +347,7 @@ def get_perps_open_interest( GROUP BY 1, 2 ORDER BY 2, 1 """ - with self.get_connection() as conn: + with self._get_connection() as conn: return pd.read_sql_query(query, conn) def get_perps_markets_history( @@ -409,7 +417,7 @@ def get_perps_account_activity( GROUP BY 1, 2 ORDER BY 1 """ - with self.get_connection() as conn: + with self._get_connection() as conn: return pd.read_sql_query(query, conn) def get_snx_token_buyback( @@ -477,7 +485,7 @@ def get_perps_v2_stats( ts >= '{start_date}' and ts <= '{end_date}' ORDER BY ts """ - with self.get_connection() as conn: + with self._get_connection() as conn: return pd.read_sql_query(query, conn) def get_perps_v2_open_interest( @@ -510,5 +518,5 @@ def get_perps_v2_open_interest( ts >= '{start_date}' and ts <= '{end_date}' ORDER BY ts """ - with self.get_connection() as conn: + with self._get_connection() as conn: return pd.read_sql_query(query, conn) From 464135f07d409dc8e9ba2eab3dda682c8a19527c Mon Sep 17 00:00:00 2001 From: Troy Date: Wed, 23 Oct 2024 13:03:25 -0600 Subject: [PATCH 3/4] add query timing --- dashboards/system_monitor/app.py | 3 +- .../system_monitor/views/performance.py | 24 +++ dashboards/utils/performance.py | 204 ++++++++++++++++++ scripts/query_report.py | 78 ------- 4 files changed, 230 insertions(+), 79 deletions(-) create mode 100644 dashboards/system_monitor/views/performance.py create mode 100644 dashboards/utils/performance.py delete mode 100644 scripts/query_report.py diff --git a/dashboards/system_monitor/app.py b/dashboards/system_monitor/app.py index a4f147d..e16f9a1 100644 --- a/dashboards/system_monitor/app.py +++ b/dashboards/system_monitor/app.py @@ -25,10 +25,11 @@ # pages core = st.Page("views/core.py", title="Core System") perps = st.Page("views/perps.py", title="Perps Markets") +performance = st.Page("views/performance.py", title="Query Performance") # navigation pages = { - "": [core, perps], + "": [core, perps, performance], } nav = st.navigation(pages) nav.run() diff --git a/dashboards/system_monitor/views/performance.py b/dashboards/system_monitor/views/performance.py new file mode 100644 index 0000000..e334f0d --- /dev/null +++ b/dashboards/system_monitor/views/performance.py @@ -0,0 +1,24 @@ +import streamlit as st + +from api.internal_api import SynthetixAPI, get_db_config +from dashboards.utils import performance + +st.markdown("# Query Performance") + +if "result_df" not in st.session_state: + st.session_state.result_df = None + + +def time_queries(): + api = SynthetixAPI(db_config=get_db_config(streamlit=True)) + results = performance.run_benchmarks(api) + + # create dataframe + df = performance.create_benchmark_dataframe(results) + st.session_state.result_df = df + + +st.button("Run queries", on_click=time_queries) + +if st.session_state.result_df is not None: + st.dataframe(st.session_state.result_df) diff --git a/dashboards/utils/performance.py b/dashboards/utils/performance.py new file mode 100644 index 0000000..4e0d6ad --- /dev/null +++ b/dashboards/utils/performance.py @@ -0,0 +1,204 @@ +import time +from datetime import datetime, timedelta +import pandas as pd +from api.internal_api import SynthetixAPI, get_db_config +import logging +from typing import Dict, List, Tuple, TypedDict + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", +) +logger = logging.getLogger(__name__) + + +class BenchmarkData(TypedDict): + query_name: str + params: dict + execution_times: List[float] + errors: List[str] + + +def create_benchmark_data(query_name: str, params: dict) -> BenchmarkData: + """Create a new benchmark data dictionary.""" + return { + "query_name": query_name, + "params": params, + "execution_times": [], + "errors": [], + } + + +def calculate_stats(benchmark_data: BenchmarkData) -> Dict[str, float]: + """Calculate statistics for a benchmark result.""" + times = benchmark_data["execution_times"] + if not times: + return {"avg_time": 0, "min_time": 0, "max_time": 0, "success_rate": 0} + + total_attempts = len(times) + len(benchmark_data["errors"]) + return { + "avg_time": sum(times) / len(times), + "min_time": min(times), + "max_time": max(times), + "success_rate": len(times) / total_attempts if total_attempts > 0 else 0, + } + + +def time_query(api, query_name: str, *args, **kwargs) -> float: + """Execute a query and measure its execution time.""" + start_time = time.time() + getattr(api, query_name)(*args, **kwargs) + end_time = time.time() + return end_time - start_time + + +def generate_scenarios(api) -> List[Tuple[str, dict]]: + """Generate test scenarios for benchmarking.""" + end_date = datetime.now() + date_ranges = { + "1d": end_date - timedelta(days=1), + "7d": end_date - timedelta(days=7), + "30d": end_date - timedelta(days=30), + } + + queries = [ + method + for method in dir(api) + if method.startswith("get_") and callable(getattr(api, method)) + ] + v3_queries = [query for query in queries if "v2" not in query] + + scenarios = [ + ( + query_name, + { + "start_date": start_date, + "end_date": end_date, + "chain": chain, + }, + ) + for query_name in v3_queries + for start_date_label, start_date in date_ranges.items() + for chain in ["arbitrum_mainnet", "base_mainnet"] + ] + + return scenarios + + +def run_benchmarks(api, num_runs: int = 3) -> Dict[str, BenchmarkData]: + """Run benchmarks for all scenarios.""" + logger.info("Starting benchmark run") + scenarios = generate_scenarios(api) + results: Dict[str, BenchmarkData] = {} + + total_scenarios = len(scenarios) + for idx, (query_name, params) in enumerate(scenarios, 1): + scenario_key = f"{query_name} - {params}" + logger.info(f"Running scenario {idx}/{total_scenarios}: {scenario_key}") + + benchmark_data = create_benchmark_data(query_name, params) + results[scenario_key] = benchmark_data + + for run in range(num_runs): + try: + logger.debug(f" Run {run + 1}/{num_runs}") + execution_time = time_query(api, query_name, **params) + benchmark_data["execution_times"].append(execution_time) + logger.debug(f" Completed in {execution_time:.4f} seconds") + except Exception as e: + error_msg = f"Error in run {run + 1}: {str(e)}" + benchmark_data["errors"].append(error_msg) + logger.error(error_msg) + + logger.info("Benchmark run completed") + return results + + +def create_benchmark_dataframe(results: Dict[str, BenchmarkData]) -> pd.DataFrame: + """Convert benchmark results to a pandas DataFrame.""" + data = [] + for scenario_key, benchmark_data in results.items(): + stats = calculate_stats(benchmark_data) + + row = { + "query_name": benchmark_data["query_name"], + "chain": benchmark_data["params"]["chain"], + "start_date": benchmark_data["params"]["start_date"].strftime("%Y-%m-%d"), + "end_date": benchmark_data["params"]["end_date"].strftime("%Y-%m-%d"), + "avg_time": stats["avg_time"], + "min_time": stats["min_time"], + "max_time": stats["max_time"], + "success_rate": stats["success_rate"], + "error_count": len(benchmark_data["errors"]), + } + data.append(row) + + return pd.DataFrame(data) + + +def print_report(results: Dict[str, BenchmarkData]): + """Print a formatted report of benchmark results.""" + print("\nSynthetixAPI Benchmark Report") + print("=============================") + + # Calculate overall statistics + total_queries = len(results) + total_successful = sum(1 for r in results.values() if r["execution_times"]) + total_errors = sum(len(r["errors"]) for r in results.values()) + + print(f"\nSummary:") + print(f"Total scenarios: {total_queries}") + print(f"Successful scenarios: {total_successful}") + print(f"Total errors: {total_errors}") + print("\nDetailed Results:") + print("----------------") + + for scenario_key, benchmark_data in results.items(): + print(f"\n{scenario_key}") + if not benchmark_data["execution_times"]: + print(" No successful executions") + if benchmark_data["errors"]: + print(" Errors encountered:") + for error in benchmark_data["errors"]: + print(f" - {error}") + continue + + stats = calculate_stats(benchmark_data) + print(f" Average execution time: {stats['avg_time']:.4f} seconds") + print(f" Min execution time: {stats['min_time']:.4f} seconds") + print(f" Max execution time: {stats['max_time']:.4f} seconds") + print(f" Success rate: {stats['success_rate'] * 100:.1f}%") + + if benchmark_data["errors"]: + print(" Errors encountered:") + for error in benchmark_data["errors"]: + print(f" - {error}") + + +def save_results(df: pd.DataFrame, prefix: str = "benchmark_results") -> str: + """Save benchmark results to a CSV file.""" + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"{prefix}_{timestamp}.csv" + df.to_csv(filename, index=False) + return filename + + +if __name__ == "__main__": + logger.info("Initializing benchmark script") + + db_config = get_db_config(streamlit=False) + api = SynthetixAPI(db_config, environment="prod", streamlit=False) + + # Run benchmarks + results = run_benchmarks(api) + + # Create DataFrame + df = create_benchmark_dataframe(results) + + # Print report + print_report(results) + + # Save results + csv_filename = save_results(df) + logger.info(f"Results saved to {csv_filename}") diff --git a/scripts/query_report.py b/scripts/query_report.py deleted file mode 100644 index 14f23ba..0000000 --- a/scripts/query_report.py +++ /dev/null @@ -1,78 +0,0 @@ -import time -from datetime import datetime, timedelta -from api.internal_api import SynthetixAPI, get_db_config - - -def time_query(api, query_name, *args, **kwargs): - start_time = time.time() - getattr(api, query_name)(*args, **kwargs) - end_time = time.time() - return end_time - start_time - - -def run_benchmarks(api, num_runs=3): - benchmarks = {} - - # Define test scenarios - end_date = datetime.now() - start_date_1d = end_date - timedelta(days=1) - start_date_7d = end_date - timedelta(days=7) - start_date_30d = end_date - timedelta(days=30) - - # Get the names of the queries from the SynthetixAPI class - queries = [ - method - for method in dir(api) - if method.startswith("get_") and callable(getattr(api, method)) - ] - v3_queries = [query for query in queries if "v2" not in query] - v2_queries = [query for query in queries if "v2" in query] - - scenarios = [ - ( - query_name, - { - "start_date": start_date, - "end_date": end_date, - "chain": chain, - }, - ) - for query_name in v3_queries - for start_date in [start_date_1d, start_date_7d, start_date_30d] - for chain in ["arbitrum_mainnet", "base_mainnet"] - ] - - for query_name, params in scenarios: - benchmarks[f"{query_name} - {params}"] = [] - for _ in range(num_runs): - try: - execution_time = time_query(api, query_name, **params) - benchmarks[f"{query_name} - {params}"].append(execution_time) - except: - pass - - return benchmarks - - -def print_report(benchmarks): - print("SynthetixAPI Benchmark Report") - print("=============================") - for query, times in benchmarks.items(): - if len(times) == 0: - print(f"\n{query}") - print(" No data available") - continue - - avg_time = sum(times) / len(times) - print(f"\n{query}") - print(f" Average execution time: {avg_time:.4f} seconds") - print(f" Min execution time: {min(times):.4f} seconds") - print(f" Max execution time: {max(times):.4f} seconds") - - -if __name__ == "__main__": - db_config = get_db_config(streamlit=False) - api = SynthetixAPI(db_config, environment="prod", streamlit=False) - - benchmarks = run_benchmarks(api) - print_report(benchmarks) From a3b096c53959f06a1b406a84d6c108df89a02b45 Mon Sep 17 00:00:00 2001 From: Troy Date: Thu, 24 Oct 2024 15:28:30 -0600 Subject: [PATCH 4/4] update performance dashboard --- dashboards/system_monitor/app.py | 9 +++++++++ dashboards/system_monitor/views/performance.py | 13 ++++++------- dashboards/utils/performance.py | 11 +++++------ 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/dashboards/system_monitor/app.py b/dashboards/system_monitor/app.py index e16f9a1..657f416 100644 --- a/dashboards/system_monitor/app.py +++ b/dashboards/system_monitor/app.py @@ -1,6 +1,7 @@ import os from dotenv import load_dotenv import streamlit as st +from api.internal_api import SynthetixAPI, get_db_config load_dotenv() @@ -22,6 +23,14 @@ st.markdown(hide_footer, unsafe_allow_html=True) +# set the API +@st.cache_resource +def load_api(): + return SynthetixAPI(db_config=get_db_config(streamlit=True)) + + +st.session_state.api = load_api() + # pages core = st.Page("views/core.py", title="Core System") perps = st.Page("views/perps.py", title="Perps Markets") diff --git a/dashboards/system_monitor/views/performance.py b/dashboards/system_monitor/views/performance.py index e334f0d..40ab4f1 100644 --- a/dashboards/system_monitor/views/performance.py +++ b/dashboards/system_monitor/views/performance.py @@ -5,20 +5,19 @@ st.markdown("# Query Performance") -if "result_df" not in st.session_state: - st.session_state.result_df = None +if "df_query" not in st.session_state: + st.session_state.df_query = None def time_queries(): - api = SynthetixAPI(db_config=get_db_config(streamlit=True)) - results = performance.run_benchmarks(api) + results = performance.run_benchmarks(st.session_state.api) # create dataframe df = performance.create_benchmark_dataframe(results) - st.session_state.result_df = df + st.session_state.df_query = df st.button("Run queries", on_click=time_queries) -if st.session_state.result_df is not None: - st.dataframe(st.session_state.result_df) +if st.session_state.df_query is not None: + st.dataframe(st.session_state.df_query) diff --git a/dashboards/utils/performance.py b/dashboards/utils/performance.py index 4e0d6ad..8253227 100644 --- a/dashboards/utils/performance.py +++ b/dashboards/utils/performance.py @@ -1,9 +1,9 @@ +import streamlit as st import time -from datetime import datetime, timedelta -import pandas as pd -from api.internal_api import SynthetixAPI, get_db_config import logging from typing import Dict, List, Tuple, TypedDict +from datetime import datetime, timedelta +import pandas as pd logging.basicConfig( level=logging.INFO, @@ -120,18 +120,17 @@ def create_benchmark_dataframe(results: Dict[str, BenchmarkData]) -> pd.DataFram data = [] for scenario_key, benchmark_data in results.items(): stats = calculate_stats(benchmark_data) + params = benchmark_data["params"] row = { "query_name": benchmark_data["query_name"], - "chain": benchmark_data["params"]["chain"], - "start_date": benchmark_data["params"]["start_date"].strftime("%Y-%m-%d"), - "end_date": benchmark_data["params"]["end_date"].strftime("%Y-%m-%d"), "avg_time": stats["avg_time"], "min_time": stats["min_time"], "max_time": stats["max_time"], "success_rate": stats["success_rate"], "error_count": len(benchmark_data["errors"]), } + row.update(params) data.append(row) return pd.DataFrame(data)