From f45164507ba9269ec86170e4f1d5af1c5a75fc94 Mon Sep 17 00:00:00 2001 From: Marcelo Altmann Date: Mon, 9 Sep 2024 12:02:06 -0300 Subject: [PATCH] Addressed review comments. Fixed review comments in hosts and proxysql.cnf files. Added measure unit to README.md to clarify the values are measured by proxysql using microseconds. --- README.md | 18 +++++++++--------- build/proxysql.cnf | 10 +++++----- build/test.cnf | 2 +- src/config.rs | 2 +- src/hosts.rs | 18 +++++++++++------- src/proxysql.rs | 19 ++++++++++++++----- src/queries.rs | 2 +- 7 files changed, 42 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 572d977..ed9dbeb 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,7 @@ The Query Discovery Mode is a set of possible rules to discover queries to autom 2. `SumTime` - Total Time Spent Executing the Query * Formula: `total_execution_time = sum_time` - * Description: This metric represents the total cumulative time spent executing the query across all its executions. It provides a clear understanding of how much processing time the query is consuming over time. A high total execution time can indicate that the query is either frequently executed or is time-intensive to process. + * Description: This metric represents the total cumulative time spent (measured in microseconds) executing the query across all its executions. It provides a clear understanding of how much processing time the query is consuming over time. A high total execution time can indicate that the query is either frequently executed or is time-intensive to process. 3. `SumRowsSent` - Total Number of Rows Sent by the Query (sum_rows_sent) * Formula: `total_rows_sent = sum_rows_sent` @@ -75,27 +75,27 @@ The Query Discovery Mode is a set of possible rules to discover queries to autom 4. `MeanTime` - Average Query Execution Time (Mean) * Formula: `mean_time = sum_time / count_star` - * Description: The mean time gives you an idea of the typical performance of the query over all executions. It provides a central tendency of how long the query generally takes to execute. + * Description: The mean time gives you an idea of the typical performance (measured in microseconds) of the query over all executions. It provides a central tendency of how long the query generally takes to execute. 5. `ExecutionTimeDistance` - Time Distance Between Query Executions * Formula: `execution_time_distance = max_time - min_time` - * Description: This shows the spread between the fastest and slowest executions of the query. A large range might indicate variability in system load, input sizes, or external factors affecting performance. + * Description: This shows the spread between the fastest and slowest executions of the query (measured in microseconds). A large range might indicate variability in system load, input sizes, or external factors affecting performance. 6. `QueryThroughput` - Query Throughput * Formula: `query_throughput = count_star / sum_time` - * Description: This shows how many queries are processed per unit of time. It’s useful for understanding system capacity and how efficiently the database is handling the queries. + * Description: This shows how many queries are processed per unit of time (measured in microseconds). It’s useful for understanding system capacity and how efficiently the database is handling the queries. 7. `WorstBestCase` - Worst Best-Case Query Performance - * Formula: `worst_case = min_time` - * Description: The min_time metric gives the fastest time the query was ever executed. It reflects the best-case performance scenario, which could indicate the query’s performance under optimal conditions. + * Formula: `worst_case = max(min_time)` + * Description: The min_time metric gives the fastest time the query was ever executed (measured in microseconds). It reflects the best-case performance scenario, which could indicate the query’s performance under optimal conditions. 8. `WorstWorstCase` - Worst Worst-Case Query Performance - * Formula: `worst_case = max_time` - * Description: The max_time shows the slowest time the query was executed. This can indicate potential bottlenecks or edge cases where the query underperforms, which could be due to larger data sets, locks, or high server load. + * Formula: `worst_case = max(max_time)` + * Description: The max_time shows the slowest time the query was executed (measured in microseconds). This can indicate potential bottlenecks or edge cases where the query underperforms, which could be due to larger data sets, locks, or high server load. 9. `DistanceMeanMax` - Distance Between Mean Time and Max Time (mean_time vs max_time) * Formula: `distance_mean_max = max_time - mean_time` - * Description: The distance between the mean execution time and the maximum execution time provides insight into how much slower the worst-case execution is compared to the average. A large gap indicates significant variability in query performance, which could be caused by certain executions encountering performance bottlenecks, such as large datasets, locking, or high system load. + * Description: The distance between the mean execution time and the maximum execution time provides insight into how much slower the worst-case execution is compared to the average (measured in microseconds). A large gap indicates significant variability in query performance, which could be caused by certain executions encountering performance bottlenecks, such as large datasets, locking, or high system load. # Operation Mode The Operation Mode is a set of possible rules to run the scheduler. The options are: diff --git a/build/proxysql.cnf b/build/proxysql.cnf index a45afd7..08a0b4f 100644 --- a/build/proxysql.cnf +++ b/build/proxysql.cnf @@ -17,14 +17,14 @@ mysql_variables= monitor_password="noria" } -mysql_users: +mysql_users= ( { - username = "root" - password = "noria" - default_hostgroup = 1 + username="root" + password="noria" + default_hostgroup=1 max_connections=1000 default_schema="noria" - active = 1 + active=1 } ) diff --git a/build/test.cnf b/build/test.cnf index 8942e3f..80625b0 100644 --- a/build/test.cnf +++ b/build/test.cnf @@ -6,7 +6,7 @@ readyset_user = 'root' readyset_password = 'noria' source_hostgroup = 1 readyset_hostgroup = 2 -warmup_time = 5 +warmup_time_s = 10 lock_file = '/tmp/readyset_scheduler.lock' operation_mode='All' number_of_queries=2 diff --git a/src/config.rs b/src/config.rs index da41f80..0fe6816 100644 --- a/src/config.rs +++ b/src/config.rs @@ -76,7 +76,7 @@ pub struct Config { pub readyset_password: String, pub source_hostgroup: u16, pub readyset_hostgroup: u16, - pub warmup_time: Option, + pub warmup_time_s: Option, pub lock_file: Option, pub operation_mode: Option, pub number_of_queries: u16, diff --git a/src/hosts.rs b/src/hosts.rs index 38786d4..1110fae 100644 --- a/src/hosts.rs +++ b/src/hosts.rs @@ -6,13 +6,13 @@ use mysql::{prelude::Queryable, Conn, OptsBuilder}; /// Defines the possible status of a host #[derive(PartialEq, Clone, Copy)] pub enum HostStatus { - ///backend server is fully operational + /// backend server is fully operational Online, - //backend sever is temporarily taken out of use because of either too many connection errors in a time that was too short, or the replication lag exceeded the allowed threshold + /// backend sever is temporarily taken out of use because of either too many connection errors in a time that was too short, or the replication lag exceeded the allowed threshold Shunned, - //when a server is put into OFFLINE_SOFT mode, no new connections are created toward that server, while the existing connections are kept until they are returned to the connection pool or destructed. In other words, connections are kept in use until multiplexing is enabled again, for example when a transaction is completed. This makes it possible to gracefully detach a backend as long as multiplexing is efficient + /// when a server is put into OFFLINE_SOFT mode, no new connections are created toward that server, while the existing connections are kept until they are returned to the connection pool or destructed. In other words, connections are kept in use until multiplexing is enabled again, for example when a transaction is completed. This makes it possible to gracefully detach a backend as long as multiplexing is efficient OfflineSoft, - //when a server is put into OFFLINE_HARD mode, no new connections are created toward that server and the existing **free **connections are ** immediately dropped**, while backend connections currently associated with a client session are dropped as soon as the client tries to use them. This is equivalent to deleting the server from a hostgroup. Internally, setting a server in OFFLINE_HARD status is equivalent to deleting the server + /// when a server is put into OFFLINE_HARD mode, no new connections are created toward that server and the existing free connections are immediately dropped, while backend connections currently associated with a client session are dropped as soon as the client tries to use them. This is equivalent to deleting the server from a hostgroup. Internally, setting a server in OFFLINE_HARD status is equivalent to deleting the server OfflineHard, } @@ -203,14 +203,18 @@ impl Host { /// true if the query was cached successfully, false otherwise. pub fn cache_query(&mut self, query: &Query) -> Result { match &mut self.conn { - None => return Ok(false), + None => { + return Err(mysql::Error::IoError(std::io::Error::new( + std::io::ErrorKind::Other, + "Connection to Readyset host is not established", + ))) + } Some(conn) => { conn.query_drop(format!( "CREATE CACHE d_{} FROM {}", query.get_digest(), query.get_digest_text() - )) - .expect("Failed to create readyset cache"); + ))?; } } Ok(true) diff --git a/src/proxysql.rs b/src/proxysql.rs index 8dfe40b..0a830ee 100644 --- a/src/proxysql.rs +++ b/src/proxysql.rs @@ -12,10 +12,9 @@ const MIRROR_QUERY_TOKEN: &str = "Mirror by readyset scheduler at"; const DESTINATION_QUERY_TOKEN: &str = "Added by readyset scheduler at"; pub struct ProxySQL { readyset_hostgroup: u16, - warmup_time: u16, + warmup_time_s: u16, conn: mysql::Conn, hosts: Vec, - //queries: Vec, } impl ProxySQL { @@ -58,17 +57,24 @@ impl ProxySQL { ProxySQL { conn, readyset_hostgroup: config.readyset_hostgroup, - warmup_time: config.warmup_time.unwrap_or(0), + warmup_time_s: config.warmup_time_s.unwrap_or(0), hosts, } } /// This function is used to add a query rule to ProxySQL. /// + /// # Arguments + /// + /// * `query` - A reference to a Query containing the query to be added as a rule. + /// + /// # Returns + /// + /// A boolean indicating if the rule was added successfully. pub fn add_as_query_rule(&mut self, query: &Query) -> Result { let datetime_now: DateTime = Local::now(); let date_formatted = datetime_now.format("%Y-%m-%d %H:%M:%S"); - if self.warmup_time > 0 { + if self.warmup_time_s > 0 { self.conn.query_drop(format!("INSERT INTO mysql_query_rules (username, mirror_hostgroup, active, digest, apply, comment) VALUES ('{}', {}, 1, '{}', 1, '{}: {}')", query.get_user(), self.readyset_hostgroup, query.get_digest(), MIRROR_QUERY_TOKEN, date_formatted)).expect("Failed to insert into mysql_query_rules"); messages::print_info("Inserted warm-up rule"); } else { @@ -135,7 +141,7 @@ impl ProxySQL { let elapsed = datetime_now .signed_duration_since(datetime_mirror_rule) .num_seconds(); - if elapsed > self.warmup_time as i64 { + if elapsed > self.warmup_time_s as i64 { let comment = format!( "{}\n Added by readyset scheduler at: {}", comment, date_formatted @@ -150,6 +156,9 @@ impl ProxySQL { Ok(updated_rules) } + /// This function is used to check if a given host is healthy. + /// This is done by checking if the Readyset host has an active + /// connection and if the snapshot is completed. pub fn health_check(&mut self) { let mut status_changes = Vec::new(); diff --git a/src/queries.rs b/src/queries.rs index 6db44e2..a8b738e 100644 --- a/src/queries.rs +++ b/src/queries.rs @@ -148,7 +148,7 @@ impl QueryDiscovery { AND s.count_star > {} AND s.sum_rows_sent > {} AND q.rule_id IS NULL - {} DESC + ORDER BY {} DESC LIMIT {} OFFSET {}", self.source_hostgroup, self.readyset_user,