From 084bea0601f3f0ee3361775ef32be032dd77921c Mon Sep 17 00:00:00 2001 From: David Figatner Date: Wed, 25 Dec 2024 09:59:24 -0800 Subject: [PATCH 1/6] parser for table refs --- quadratic-core/src/a1/error.rs | 17 + quadratic-core/src/a1/mod.rs | 2 + .../src/a1/table_ref/column_range.rs | 86 ++++ quadratic-core/src/a1/table_ref/mod.rs | 82 ++++ quadratic-core/src/a1/table_ref/parse.rs | 202 +++++++++ quadratic-core/src/a1/table_ref/tokenize.rs | 413 ++++++++++++++++++ 6 files changed, 802 insertions(+) create mode 100644 quadratic-core/src/a1/table_ref/column_range.rs create mode 100644 quadratic-core/src/a1/table_ref/mod.rs create mode 100644 quadratic-core/src/a1/table_ref/parse.rs create mode 100644 quadratic-core/src/a1/table_ref/tokenize.rs diff --git a/quadratic-core/src/a1/error.rs b/quadratic-core/src/a1/error.rs index fa709112e4..252b190f61 100644 --- a/quadratic-core/src/a1/error.rs +++ b/quadratic-core/src/a1/error.rs @@ -18,6 +18,12 @@ pub enum A1Error { InvalidExclusion(String), TranslateInvalid(String), SheetNotFound, + + InvalidTableRef(String), + TableNotFound(String), + MultipleRowDefinitions, + UnexpectedRowNumber, + InvalidRowRange(String), } impl From for String { @@ -46,6 +52,17 @@ impl std::fmt::Display for A1Error { A1Error::InvalidExclusion(msg) => write!(f, "Invalid Exclusion: {msg}"), A1Error::TranslateInvalid(msg) => write!(f, "Translate Invalid: {msg}"), A1Error::SheetNotFound => write!(f, "Sheet Not Found"), + + A1Error::InvalidTableRef(msg) => write!(f, "Invalid Table Ref: {msg}"), + A1Error::TableNotFound(msg) => write!(f, "Table Not Found: {msg}"), + A1Error::MultipleRowDefinitions => { + write!(f, "Table reference may only have one row definition") + } + A1Error::UnexpectedRowNumber => write!( + f, + "Row numbers in tables must be defined with # (e.g., [#12,15-12])" + ), + A1Error::InvalidRowRange(msg) => write!(f, "Invalid row range: {msg}"), } } } diff --git a/quadratic-core/src/a1/mod.rs b/quadratic-core/src/a1/mod.rs index 128d73f3c9..953a1ca274 100644 --- a/quadratic-core/src/a1/mod.rs +++ b/quadratic-core/src/a1/mod.rs @@ -8,6 +8,7 @@ mod error; mod js_selection; mod ref_range_bounds; mod sheet_cell_ref_range; +mod table_ref; pub use a1_selection::*; pub use a1_sheet_name::*; @@ -19,6 +20,7 @@ pub use error::*; pub use js_selection::*; pub use ref_range_bounds::*; pub use sheet_cell_ref_range::*; +pub use table_ref::*; /// Name to use when a sheet ID has no corresponding name. /// diff --git a/quadratic-core/src/a1/table_ref/column_range.rs b/quadratic-core/src/a1/table_ref/column_range.rs new file mode 100644 index 0000000000..3d0d8fe75c --- /dev/null +++ b/quadratic-core/src/a1/table_ref/column_range.rs @@ -0,0 +1,86 @@ +//! ColumnRange and RowRange +//! +//! These are used to defined the range of columns and rows within a table. Any +//! table reference may only have one list of row ranges, and any number of +//! column ranges. +//! +//! We serialize/deserialize RowRangeEntry#End to -1 if equal to u64::MAX. This +//! is to ensure compatibility with JS. + +use serde::{Deserialize, Serialize}; +use ts_rs::TS; + +use crate::CellRefCoord; + +#[derive(Clone, Debug, Eq, Hash, PartialEq, TS, Serialize, Deserialize)] +pub struct RowRangeEntry { + pub start: CellRefCoord, + pub end: CellRefCoord, +} + +impl RowRangeEntry { + pub fn new_rel(start: i64, end: i64) -> Self { + Self { + start: CellRefCoord::new_rel(start), + end: CellRefCoord::new_rel(end), + } + } + + pub fn new_abs(start: i64, end: i64) -> Self { + Self { + start: CellRefCoord::new_abs(start), + end: CellRefCoord::new_abs(end), + } + } +} + +#[derive(Default, Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize, TS)] +pub enum RowRange { + #[default] + All, + CurrentRow, + Rows(Vec), +} + +#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize, TS)] +pub enum ColRange { + Col(String), + ColRange(String, String), + ColumnToEnd(String), +} + +#[cfg(test)] +#[serial_test::parallel] +mod tests { + use super::*; + + #[test] + fn test_row_range_entry_new() { + let row_range = RowRangeEntry::new_rel(1, 15); + assert_eq!(row_range.start, CellRefCoord::new_rel(1)); + assert_eq!(row_range.end, CellRefCoord::new_rel(15)); + } + + #[test] + fn test_row_range_serialization() { + let row_range = RowRangeEntry { + start: CellRefCoord::new_rel(1), + end: CellRefCoord::new_rel(i64::MAX), + }; + let serialized = serde_json::to_string(&row_range).unwrap(); + assert_eq!(serialized, r#"{"start":1,"end":-1}"#); + + let deserialized: RowRangeEntry = serde_json::from_str(&serialized).unwrap(); + assert_eq!(deserialized, row_range); + + let row_range = RowRangeEntry { + start: CellRefCoord::new_rel(10), + end: CellRefCoord::new_rel(15), + }; + let serialized = serde_json::to_string(&row_range).unwrap(); + assert_eq!(serialized, r#"{"start":10,"end":15}"#); + + let deserialized: RowRangeEntry = serde_json::from_str(&serialized).unwrap(); + assert_eq!(deserialized, row_range); + } +} diff --git a/quadratic-core/src/a1/table_ref/mod.rs b/quadratic-core/src/a1/table_ref/mod.rs new file mode 100644 index 0000000000..6ac1548a61 --- /dev/null +++ b/quadratic-core/src/a1/table_ref/mod.rs @@ -0,0 +1,82 @@ +//! A reference to data within a table. +//! +//! Table name rules: +//! - may not have spaces (TODO) +//! - maximum of 255 characters (TODO) +//! - must be unique across the .grid file +//! +//! Table references do not require '' +//! +//! Table references: +//! - Table1[Column Name] - reference only the data in that column +//! - Table1[[Column 1]:[Column 3]] - all data within the range of the columns +//! - Table1[[Column 2]:] - all data in column 2 to the last column +//! - Table1[[Column 1],[Column 3]] - all data within the list of columns +//! - (not yet supported) Table1[[Column 1] [Column 3]] - the intersection of +//! two or more columns -- I don't understand this one +//! - Table1[[#ALL], [Column Name]] - column header and data +//! - Table1[#HEADERS] - only the table headers +//! - (not yet supported) Table1[#TOTALS] - reference the total line at the end +//! of the table (also known as the footer) +//! - Table1[[#HEADERS], [#DATA]] - table headers and data across entire table +//! - Table1 or Table1[#DATA] - table data without headers or footers +//! - Table1[@Column Name] - data in column name at the same row as the code +//! cell +//! - Table1[[#This Row],[Colum Name]] - dat in column name at the same row as +//! cell +//! +//! For purposes of data frames, we'll probably ignore #DATA, since we want to +//! define the data frame with the headers. +//! +//! Quadratic extends the table reference to also allow specific rows within +//! columns. The row range may change based on the sort/filter of the column: +//! - Table1[[#10]] - all data in row 10 +//! - Table1[[#12],[Column 1]] +//! - Table1[[#12:15],[Column 1]] +//! - Table1[[#12:LAST],[Column 1]] - from 12 to last row in table +//! - Table1[[#12:],[Column 1]] - same as #12:LAST +//! - Table1[[#12,15],[Column 1]] +//! - Table1[[#12,14,20],[Column 1]:[Column 2]] +//! - (possibly support) Table1[#$12],[Column 1] - maintains reference to the +//! absolute row 12, regardless of sorting/filtering +//! - Table1[[#LAST],[Column 1]] - last row in the table +//! +//! When parsing, we first try to see if it references a table. If not, then we +//! try A1 parsing. This allows Table1 to be a proper reference, even though it +//! can also be parsed as A1 (with a large column offset). +//! +//! Double brackets allow escaping of special characters, eg, +//! DeptSalesFYSummary[[Total $ Amount]] +//! +//! Special characters that require [[ ]] are: comma, :, [, ], and @ (Excel +//! requires more characters to be escaped--Quadratic will still accept them +//! with or without the double bracket) +//! +//! Special characters can also be escaped within column names using a single +//! quote: [, ], @, #, and '. For example: DeptSales['[No Idea why a left +//! bracket is needed here] +//! +//! The space character can be used to improve readability in a structured +//! reference. It is ignored in parsing: =DeptSales[ [Sales Person]:[Region] ] +//! =DeptSales[[#Headers], [#Data], [% Commission]] + +mod column_range; +pub mod parse; +mod tokenize; + +pub use column_range::*; + +use serde::{Deserialize, Serialize}; +use ts_rs::TS; + +use super::A1Error; + +#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize, TS)] +pub struct TableRef { + pub table_name: String, + pub data: bool, + pub headers: bool, + pub footers: bool, + pub row_ranges: RowRange, + pub col_ranges: Vec, +} diff --git a/quadratic-core/src/a1/table_ref/parse.rs b/quadratic-core/src/a1/table_ref/parse.rs new file mode 100644 index 0000000000..183345d274 --- /dev/null +++ b/quadratic-core/src/a1/table_ref/parse.rs @@ -0,0 +1,202 @@ +use lazy_static::lazy_static; +use regex::Regex; + +use super::{tokenize::Token, ColRange, RowRange, RowRangeEntry, TableRef}; +use crate::A1Error; + +lazy_static! { + static ref TABLE_NAME_PATTERN: Regex = + Regex::new(r"^([a-zA-Z0-9_]{1,255})(?:\[(.*)\])?$").unwrap(); +} + +impl TableRef { + /// Parses the table name using a regex from the start of the string. + /// Returns the table name and the remaining string. + fn parse_table_name(s: &str) -> Result<(String, &str), A1Error> { + if let Some(captures) = TABLE_NAME_PATTERN.captures(s) { + if let Some(name) = captures.get(1) { + let remaining = captures.get(2).map_or("", |m| m.as_str()).trim(); + return Ok((name.as_str().to_string(), remaining)); + } + } + Err(A1Error::InvalidTableRef("Invalid table name".into())) + } + + /// Parse a table reference given a list of table_names. + pub fn parse(s: &str, table_names: &[String]) -> Result { + let (table_name, remaining) = Self::parse_table_name(s)?; + let table_name = if let Some(name) = table_names + .iter() + .find(|t| t.eq_ignore_ascii_case(&table_name)) + { + name.clone() + } else { + return Err(A1Error::TableNotFound(table_name.clone())); + }; + + // if it's just the table name, return the entire table TableRef + if remaining.trim().is_empty() { + return Ok(Self { + table_name, + data: true, + headers: false, + footers: false, + row_ranges: RowRange::All, + col_ranges: vec![], + }); + } + + let mut row_ranges = None; + let mut column_ranges = vec![]; + let mut data = true; + let mut headers = false; + let mut footers = false; + let tokens = Self::tokenize(remaining)?; + dbg!(&tokens); + for token in tokens { + match token { + Token::RowRange(start, end) => match row_ranges { + Some(RowRange::Rows(mut rows)) => { + rows.push(RowRangeEntry::new_rel(start as i64, end as i64)); + row_ranges = Some(RowRange::Rows(rows)); + } + Some(_) => { + return Err(A1Error::MultipleRowDefinitions); + } + None => { + row_ranges = Some(RowRange::Rows(vec![RowRangeEntry::new_rel( + start as i64, + end as i64, + )])); + } + }, + Token::Column(name) => { + column_ranges.push(ColRange::Col(name)); + } + Token::ColumnRange(start, end) => { + column_ranges.push(ColRange::ColRange(start, end)); + } + Token::ColumnToEnd(name) => { + column_ranges.push(ColRange::ColumnToEnd(name)); + } + Token::All => { + if row_ranges.is_some() { + return Err(A1Error::MultipleRowDefinitions); + } + row_ranges = Some(RowRange::All); + } + Token::Headers => { + headers = true; + } + Token::Footers => { + footers = true; + } + Token::Data => { + data = true; + } + Token::ThisRow => { + if row_ranges.is_some() { + return Err(A1Error::MultipleRowDefinitions); + } + row_ranges = Some(RowRange::CurrentRow); + } + } + } + + Ok(Self { + table_name, + data, + headers, + footers, + row_ranges: row_ranges.unwrap_or(RowRange::All), + col_ranges: column_ranges, + }) + } +} + +#[cfg(test)] +#[serial_test::parallel] +mod tests { + use super::*; + + #[test] + fn test_parse_table_name() { + let (table_name, remaining) = TableRef::parse_table_name("Table1[Column 1]").unwrap(); + assert_eq!(table_name, "Table1"); + assert_eq!(remaining, "Column 1"); + } + + #[test] + fn test_simple_table_ref() { + let names = vec!["Table1".to_string()]; + let table_ref = TableRef::parse("Table1", &names).unwrap(); + assert_eq!(table_ref.table_name, "Table1"); + assert!(table_ref.data); + assert!(!table_ref.headers); + assert!(table_ref.col_ranges.is_empty()); + } + + #[test] + fn test_table_name_case_insensitive() { + let names = vec!["Table1".to_string()]; + let table_ref = TableRef::parse("table1", &names).unwrap(); + assert_eq!(table_ref.table_name, "Table1"); + } + + #[test] + fn test_table_name_not_found() { + let names = vec!["Table1".to_string()]; + let result = TableRef::parse("Table2", &names); + assert!(result.is_err()); + assert_eq!( + result.unwrap_err(), + A1Error::TableNotFound("Table2".to_string()) + ); + } + + #[test] + fn test_table_with_column() { + let names = vec!["Table1".to_string()]; + let table_ref = TableRef::parse("Table1[Column Name]", &names).unwrap(); + assert_eq!(table_ref.table_name, "Table1"); + assert_eq!(table_ref.col_ranges.len(), 1); + assert_eq!( + table_ref.col_ranges[0], + ColRange::Col("Column Name".to_string()) + ); + } + + #[test] + fn test_table_with_headers() { + let names = vec!["Table1".to_string()]; + let table_ref = TableRef::parse("Table1[#HEADERS]", &names).unwrap(); + assert_eq!(table_ref.table_name, "Table1"); + assert!(table_ref.headers); + } + + #[test] + fn test_table_with_row_range() { + let names = vec!["Table1".to_string()]; + + let variations = [ + ( + "Table1[[#12:15],[Column 1]]", + TableRef { + table_name: "Table1".to_string(), + data: true, + headers: false, + footers: false, + row_ranges: RowRange::Rows(vec![RowRangeEntry::new_rel(12, 15)]), + col_ranges: vec![ColRange::Col("Column 1".to_string())], + }, + ), + // "TABLE2[ [#12:15], [Column 2]]", + // "table3[[#12:15],[Column 3]]", + ]; + + for (s, expected) in variations.iter() { + let table_ref = TableRef::parse(s, &names).unwrap(); + assert_eq!(table_ref, *expected, "{}", s); + } + } +} diff --git a/quadratic-core/src/a1/table_ref/tokenize.rs b/quadratic-core/src/a1/table_ref/tokenize.rs new file mode 100644 index 0000000000..562e7c45be --- /dev/null +++ b/quadratic-core/src/a1/table_ref/tokenize.rs @@ -0,0 +1,413 @@ +//! Tokenizes a table reference after the table_name has been removed. + +use super::*; + +#[derive(Debug, PartialEq, Clone)] +pub(crate) enum Token { + All, + Headers, + Data, + Footers, + ThisRow, + Column(String), + ColumnRange(String, String), + ColumnToEnd(String), + RowRange(u64, u64), +} + +impl TableRef { + /// Tokenizes a string of row numbers separated by commas. + fn tokenize_rows(s: String) -> Result, A1Error> { + // remove the # + let s = s.chars().skip(1).collect::(); + + // Split by commas and process each part + let parts = s.split(','); + let mut tokens = Vec::new(); + + for part in parts { + tokens.push(Self::tokenize_single_row(part)?); + } + + Ok(tokens) + } + + /// Tokenizes a single row number. + fn tokenize_single_row(s: &str) -> Result { + let s = s.trim().to_ascii_uppercase(); + + // Handle single number case + if let Ok(num) = s.parse::() { + return Ok(Token::RowRange(num, num)); + } + + if s == "LAST" { + return Ok(Token::RowRange(u64::MAX, u64::MAX)); + } + + if s == "THIS ROW" { + return Ok(Token::ThisRow); + } + + // Handle range cases (contains ':') + if let Some((start, end)) = s.split_once(':') { + let start = start.trim(); + let end = end.trim(); + + let start_num = start + .parse::() + .map_err(|_| A1Error::InvalidTableRef("Invalid row number".into()))?; + + // Handle cases like "5:" or "5:LAST" + if end.is_empty() || end.eq_ignore_ascii_case("LAST") { + return Ok(Token::RowRange(start_num, u64::MAX)); + } + + // Handle "5:10" + let end_num = end + .parse::() + .map_err(|_| A1Error::InvalidTableRef("Invalid row number".into()))?; + + return Ok(Token::RowRange(start_num, end_num)); + } + + Err(A1Error::InvalidTableRef("Invalid row specification".into())) + } + + /// Separates bracketed entries, allowing double brackets and ' to escape + /// special characters. Returns a list of Strings that can be tokenized. + fn bracketed_entries(s: &str) -> Result, A1Error> { + let mut entries = Vec::new(); + + // inside an escaped region + let mut in_double_brackets = false; + + // special area is a bracketed region that starts with '#' + let mut in_special = false; + + // track bracket count + let mut bracket_count = 0; + + // track current entry + let mut entry = String::new(); + + let mut chars = s.chars(); + while let Some(c) = chars.next() { + match c { + '#' => { + if !in_double_brackets { + in_special = true; + entry.push(c); + } + } + ' ' => { + // ignore whitespace if between entries or in a special area + if !entry.is_empty() && !in_special { + entry.push(c); + } + } + '[' => { + if bracket_count == 1 { + in_double_brackets = true; + } + bracket_count += 1; + if bracket_count > 2 { + return Err(A1Error::InvalidTableRef("Unexpected [".into())); + } + } + ']' => { + bracket_count -= 1; + if bracket_count < 0 { + return Err(A1Error::InvalidTableRef("Unexpected ]".into())); + } + if bracket_count == 1 { + in_double_brackets = false; + } + in_special = false; + } + '\'' => { + if let Some(c) = chars.next() { + entry.push(c); + } else { + return Err(A1Error::InvalidTableRef( + "Unexpected escape character '".into(), + )); + } + } + ',' => { + if in_special || in_double_brackets { + entry.push(c); + } else { + if entry.is_empty() { + return Err(A1Error::InvalidTableRef("Empty entry found".into())); + } + entries.push(entry.trim().to_string()); + entry = String::new(); + } + } + ':' => { + if in_special || in_double_brackets { + entry.push(c); + } else { + if entry.is_empty() { + return Err(A1Error::InvalidTableRef("Empty entry found".into())); + } else { + entries.push(entry.trim().to_string()); + entries.push(":".to_string()); + entry = String::new(); + } + } + } + c => { + entry.push(c); + } + } + } + if !entry.is_empty() { + entries.push(entry); + } + + Ok(entries) + } + + pub(crate) fn tokenize(s: &str) -> Result, A1Error> { + // if there are no brackets, then it's a column name + if !s.contains('[') { + return Ok(vec![Token::Column(s.to_string())]); + } + + let bracketed_entries = Self::bracketed_entries(s)?; + let mut tokens = Vec::new(); + let mut iter = bracketed_entries.iter().peekable(); + while let Some(entry) = iter.next() { + match entry.as_str() { + "#HEADERS" => tokens.push(Token::Headers), + "#DATA" => tokens.push(Token::Data), + "#TOTALS" => tokens.push(Token::Footers), + "#ALL" => tokens.push(Token::All), + ":" => return Err(A1Error::InvalidTableRef("Unexpected colon".into())), + s => { + if s.is_empty() { + continue; + } + let s = s.to_string(); + if s.chars().next() == Some('#') { + tokens.extend(Self::tokenize_rows(s)?) + } else { + if iter.peek().is_some_and(|s| **s == ":") { + // skip the colon + iter.next(); + if let Some(column_name) = iter.next() { + tokens.push(Token::ColumnRange(s, column_name.to_string())); + } else { + tokens.push(Token::ColumnToEnd(s)); + } + } else { + tokens.push(Token::Column(s)); + } + } + } + } + } + + Ok(tokens) + } +} + +#[cfg(test)] +#[serial_test::parallel] +mod tests { + use super::*; + + #[test] + fn test_bracketed_entries() { + assert_eq!( + TableRef::bracketed_entries("[column 1]").unwrap(), + vec!["column 1"] + ); + assert_eq!( + TableRef::bracketed_entries("[#data]").unwrap(), + vec!["#data"] + ); + assert_eq!( + TableRef::bracketed_entries("[#12,15],[column 1],[column2]").unwrap(), + vec!["#12,15", "column 1", "column2"] + ); + assert_eq!( + TableRef::bracketed_entries("[#12, 15], [column 1] , [column2]").unwrap(), + vec!["#12,15", "column 1", "column2"] + ); + assert_eq!( + TableRef::bracketed_entries("[#ALL],[column 1]:[column2]").unwrap(), + vec!["#ALL", "column 1", ":", "column2"] + ); + } + + #[test] + fn test_bracketed_entries_escaped_tick() { + assert_eq!( + TableRef::bracketed_entries( + "[#ALL],[column 1', and column B]:[column2': the nice one]" + ) + .unwrap(), + vec![ + "#ALL", + "column 1, and column B", + ":", + "column2: the nice one" + ] + ); + } + + #[test] + fn test_bracketed_entries_escaped_brackets() { + assert_eq!( + TableRef::bracketed_entries( + "[#ALL],[[column 1, and column B]]:[[column2: the nice one]]" + ) + .unwrap(), + vec![ + "#ALL", + "column 1, and column B", + ":", + "column2: the nice one" + ] + ); + } + + #[test] + fn test_tokenize_column_name() { + assert_eq!( + TableRef::tokenize("[Column 1]").unwrap(), + vec![Token::Column("Column 1".to_string())] + ); + assert_eq!( + TableRef::tokenize("[[Column 1]]").unwrap(), + vec![Token::Column("Column 1".to_string())] + ); + } + + #[test] + fn test_tokenize_special() { + let special = [ + ("[#HEADERS]", Token::Headers), + ("[#DATA]", Token::Data), + ("[#TOTALS]", Token::Footers), + ("[#ALL]", Token::All), + ]; + for (s, expected) in special { + assert_eq!( + TableRef::tokenize(s).unwrap(), + vec![expected.clone()], + "Expected {:?} for {}", + expected, + s + ); + } + } + + #[test] + fn test_tokenize_rows() { + let rows = [ + ("#1", vec![Token::RowRange(1, 1)]), + ("#1:10", vec![Token::RowRange(1, 10)]), + ("#1:LAST", vec![Token::RowRange(1, u64::MAX)]), + ("#2:", vec![Token::RowRange(2, u64::MAX)]), + ( + "#1, 2, 3 : 5 ", + vec![ + Token::RowRange(1, 1), + Token::RowRange(2, 2), + Token::RowRange(3, 5), + ], + ), + ("#LAST", vec![Token::RowRange(u64::MAX, u64::MAX)]), + ("#This Row", vec![Token::ThisRow]), + ]; + for (s, expected) in rows { + assert_eq!( + TableRef::tokenize_rows(s.to_string()).unwrap(), + expected.clone(), + "Expected {:?} for {}", + expected, + s + ); + } + } + + #[test] + fn test_tokenize_columns() { + let columns = [ + ("[Column 1]", vec![Token::Column("Column 1".to_string())]), + ( + "[[Column,: 1]]", + vec![Token::Column("Column,: 1".to_string())], + ), + ( + "[Column 1]:[Column 2]", + vec![Token::ColumnRange( + "Column 1".to_string(), + "Column 2".to_string(), + )], + ), + ( + "[Column 1]:", + vec![Token::ColumnToEnd("Column 1".to_string())], + ), + ]; + for (s, expected) in columns { + assert_eq!( + TableRef::tokenize(s).unwrap(), + expected.clone(), + "Expected {:?} for {}", + expected, + s + ); + } + } + + #[test] + fn test_tokenize_rows_columns() { + let column_rows = [ + ( + "[#12,15],[Column 1]", + vec![ + Token::RowRange(12, 12), + Token::RowRange(15, 15), + Token::Column("Column 1".to_string()), + ], + ), + ( + "[#12:15],[Column 1]:[Column 2]", + vec![ + Token::RowRange(12, 15), + Token::ColumnRange("Column 1".to_string(), "Column 2".to_string()), + ], + ), + ( + "[#12:15],[Column 1]:", + vec![ + Token::RowRange(12, 15), + Token::ColumnToEnd("Column 1".to_string()), + ], + ), + ( + "[#12:15],[Column 1],[Column 2]", + vec![ + Token::RowRange(12, 15), + Token::Column("Column 1".to_string()), + Token::Column("Column 2".to_string()), + ], + ), + ]; + for (s, expected) in column_rows { + assert_eq!( + TableRef::tokenize(s).unwrap(), + expected, + "Expected {:?} for {}", + expected, + s + ); + } + } +} From b440d3a32363590087b9587fbb0e499f113c0257 Mon Sep 17 00:00:00 2001 From: David Figatner Date: Wed, 25 Dec 2024 10:22:47 -0800 Subject: [PATCH 2/6] fix tests --- quadratic-core/src/a1/table_ref/parse.rs | 7 +++---- quadratic-core/src/a1/table_ref/tokenize.rs | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/quadratic-core/src/a1/table_ref/parse.rs b/quadratic-core/src/a1/table_ref/parse.rs index 183345d274..f8c258b537 100644 --- a/quadratic-core/src/a1/table_ref/parse.rs +++ b/quadratic-core/src/a1/table_ref/parse.rs @@ -51,9 +51,8 @@ impl TableRef { let mut data = true; let mut headers = false; let mut footers = false; - let tokens = Self::tokenize(remaining)?; - dbg!(&tokens); - for token in tokens { + + for token in Self::tokenize(remaining)? { match token { Token::RowRange(start, end) => match row_ranges { Some(RowRange::Rows(mut rows)) => { @@ -169,7 +168,7 @@ mod tests { #[test] fn test_table_with_headers() { let names = vec!["Table1".to_string()]; - let table_ref = TableRef::parse("Table1[#HEADERS]", &names).unwrap(); + let table_ref = TableRef::parse("Table1[[#HEADERS]]", &names).unwrap(); assert_eq!(table_ref.table_name, "Table1"); assert!(table_ref.headers); } diff --git a/quadratic-core/src/a1/table_ref/tokenize.rs b/quadratic-core/src/a1/table_ref/tokenize.rs index 562e7c45be..180141eca5 100644 --- a/quadratic-core/src/a1/table_ref/tokenize.rs +++ b/quadratic-core/src/a1/table_ref/tokenize.rs @@ -177,6 +177,7 @@ impl TableRef { } let bracketed_entries = Self::bracketed_entries(s)?; + let mut tokens = Vec::new(); let mut iter = bracketed_entries.iter().peekable(); while let Some(entry) = iter.next() { @@ -209,7 +210,6 @@ impl TableRef { } } } - Ok(tokens) } } From 807123a096b5270aa6ebc7d95098db5453bb90aa Mon Sep 17 00:00:00 2001 From: David Figatner Date: Wed, 25 Dec 2024 11:04:24 -0800 Subject: [PATCH 3/6] working through to_string test cases --- quadratic-core/src/a1/table_ref/display.rs | 131 ++++++++++++++++++++ quadratic-core/src/a1/table_ref/mod.rs | 9 +- quadratic-core/src/a1/table_ref/parse.rs | 51 ++++++-- quadratic-core/src/a1/table_ref/tokenize.rs | 32 ++--- 4 files changed, 191 insertions(+), 32 deletions(-) create mode 100644 quadratic-core/src/a1/table_ref/display.rs diff --git a/quadratic-core/src/a1/table_ref/display.rs b/quadratic-core/src/a1/table_ref/display.rs new file mode 100644 index 0000000000..5152fae29b --- /dev/null +++ b/quadratic-core/src/a1/table_ref/display.rs @@ -0,0 +1,131 @@ +use crate::UNBOUNDED; + +use super::*; + +impl TableRef { + /// Returns true if the table reference is the default table reference. + pub fn is_default(&self) -> bool { + self.data + && !self.headers + && !self.totals + && self.row_ranges == RowRange::All + && self.col_ranges.is_empty() + } + + fn row_range_entry_to_string(entry: &RowRangeEntry) -> String { + if entry.start.coord == 1 && entry.end.coord == UNBOUNDED { + return String::default(); + } + let start = entry.start.coord.to_string(); + let end = if entry.end.coord == UNBOUNDED { + "".to_string() + } else { + entry.end.coord.to_string() + }; + + if start == end { + start + } else { + format!("{}:{}", start, end) + } + } + + /// Returns the string representation of the row range. + fn row_range_to_string(&self) -> String { + match &self.row_ranges { + RowRange::All => String::default(), + RowRange::CurrentRow => "[#THIS ROW]".to_string(), + RowRange::Rows(rows) => { + format!( + "[#{}]", + rows.iter() + .map(TableRef::row_range_entry_to_string) + .collect::>() + .join(",") + ) + } + } + } + + fn col_range_entry_to_string(entry: &ColRange) -> String { + match entry { + ColRange::Col(col) => format!("[{}]", col), + ColRange::ColRange(start, end) => format!("[{}:{}]", start, end), + ColRange::ColumnToEnd(col) => format!("[{}:]", col), + } + } + + fn col_ranges_to_string(&self) -> Vec { + self.col_ranges + .iter() + .map(TableRef::col_range_entry_to_string) + .collect::>() + } + + /// Returns the string representation of the table reference. + pub fn to_string(&self) -> String { + if self.is_default() { + return self.table_name.to_string(); + } + + let mut entries = vec![]; + + // only show special markers if not default, which is #[DATA] only + if !(self.data && !self.headers && !self.totals) { + if self.headers && self.data && self.totals { + entries.push("[#ALL]".to_string()); + } else { + if self.data { + entries.push("[#DATA]".to_string()); + } + if self.headers { + entries.push("[#HEADERS]".to_string()); + } + if self.totals { + entries.push("[#TOTALS]".to_string()); + } + } + } + entries.push(self.row_range_to_string()); + entries.extend(self.col_ranges_to_string()); + + format!("{}[{}]", self.table_name.to_string(), entries.join(",")) + } +} + +#[cfg(test)] +#[serial_test::parallel] +mod tests { + use super::*; + + #[test] + fn test_to_string_only_table_name() { + let names = vec!["Table1".to_string()]; + let table_ref = TableRef::parse("Table1", &names).unwrap_or_else(|e| { + panic!("Failed to parse Table1: {}", e); + }); + assert_eq!(table_ref.to_string(), "Table1"); + } + + #[test] + fn test_to_string() { + let names = vec!["Table1".to_string()]; + let tests = [ + "Table1[[#12:]]", + "Table1[[#12:15]]", + "Table1[[#12:]]", + "Table1[[#ALL]]", + "Table1[[#HEADERS],[#TOTALS]]", + "Table1[[#HEADERS],[Column 1]]", + "Table1[[#HEADERS],[Column 1],[Column 2]]", + "Table1[[#HEADERS],[Column 1],[Column 2],[Column 3]:[Column 4],[Column 6]]", + "Table1[[#DATA],[#HEADERS]][Column 1]]", + ]; + + for test in tests { + let table_ref = TableRef::parse(test, &names) + .unwrap_or_else(|e| panic!("Failed to parse {}: {}", test, e)); + assert_eq!(table_ref.to_string(), test, "{}", test); + } + } +} diff --git a/quadratic-core/src/a1/table_ref/mod.rs b/quadratic-core/src/a1/table_ref/mod.rs index 6ac1548a61..f820246ae6 100644 --- a/quadratic-core/src/a1/table_ref/mod.rs +++ b/quadratic-core/src/a1/table_ref/mod.rs @@ -10,7 +10,6 @@ //! Table references: //! - Table1[Column Name] - reference only the data in that column //! - Table1[[Column 1]:[Column 3]] - all data within the range of the columns -//! - Table1[[Column 2]:] - all data in column 2 to the last column //! - Table1[[Column 1],[Column 3]] - all data within the list of columns //! - (not yet supported) Table1[[Column 1] [Column 3]] - the intersection of //! two or more columns -- I don't understand this one @@ -19,7 +18,7 @@ //! - (not yet supported) Table1[#TOTALS] - reference the total line at the end //! of the table (also known as the footer) //! - Table1[[#HEADERS], [#DATA]] - table headers and data across entire table -//! - Table1 or Table1[#DATA] - table data without headers or footers +//! - Table1 or Table1[#DATA] - table data without headers or totals //! - Table1[@Column Name] - data in column name at the same row as the code //! cell //! - Table1[[#This Row],[Colum Name]] - dat in column name at the same row as @@ -33,8 +32,7 @@ //! - Table1[[#10]] - all data in row 10 //! - Table1[[#12],[Column 1]] //! - Table1[[#12:15],[Column 1]] -//! - Table1[[#12:LAST],[Column 1]] - from 12 to last row in table -//! - Table1[[#12:],[Column 1]] - same as #12:LAST +//! - Table1[[#12:],[Column 1]] - from row 12 to the end of the rows //! - Table1[[#12,15],[Column 1]] //! - Table1[[#12,14,20],[Column 1]:[Column 2]] //! - (possibly support) Table1[#$12],[Column 1] - maintains reference to the @@ -61,6 +59,7 @@ //! =DeptSales[[#Headers], [#Data], [% Commission]] mod column_range; +pub mod display; pub mod parse; mod tokenize; @@ -76,7 +75,7 @@ pub struct TableRef { pub table_name: String, pub data: bool, pub headers: bool, - pub footers: bool, + pub totals: bool, pub row_ranges: RowRange, pub col_ranges: Vec, } diff --git a/quadratic-core/src/a1/table_ref/parse.rs b/quadratic-core/src/a1/table_ref/parse.rs index f8c258b537..83b2cdb5ab 100644 --- a/quadratic-core/src/a1/table_ref/parse.rs +++ b/quadratic-core/src/a1/table_ref/parse.rs @@ -40,7 +40,7 @@ impl TableRef { table_name, data: true, headers: false, - footers: false, + totals: false, row_ranges: RowRange::All, col_ranges: vec![], }); @@ -50,7 +50,7 @@ impl TableRef { let mut column_ranges = vec![]; let mut data = true; let mut headers = false; - let mut footers = false; + let mut totals = false; for token in Self::tokenize(remaining)? { match token { @@ -79,16 +79,17 @@ impl TableRef { column_ranges.push(ColRange::ColumnToEnd(name)); } Token::All => { - if row_ranges.is_some() { - return Err(A1Error::MultipleRowDefinitions); - } - row_ranges = Some(RowRange::All); + headers = true; + data = true; + totals = true; } Token::Headers => { + data = false; headers = true; } - Token::Footers => { - footers = true; + Token::Totals => { + data = false; + totals = true; } Token::Data => { data = true; @@ -106,7 +107,7 @@ impl TableRef { table_name, data, headers, - footers, + totals, row_ranges: row_ranges.unwrap_or(RowRange::All), col_ranges: column_ranges, }) @@ -175,7 +176,11 @@ mod tests { #[test] fn test_table_with_row_range() { - let names = vec!["Table1".to_string()]; + let names = vec![ + "Table1".to_string(), + "Table2".to_string(), + "Table3".to_string(), + ]; let variations = [ ( @@ -184,13 +189,33 @@ mod tests { table_name: "Table1".to_string(), data: true, headers: false, - footers: false, + totals: false, row_ranges: RowRange::Rows(vec![RowRangeEntry::new_rel(12, 15)]), col_ranges: vec![ColRange::Col("Column 1".to_string())], }, ), - // "TABLE2[ [#12:15], [Column 2]]", - // "table3[[#12:15],[Column 3]]", + ( + "TABLE2[ [#12:15], [Column 2]]", + TableRef { + table_name: "Table2".to_string(), + data: true, + headers: false, + totals: false, + row_ranges: RowRange::Rows(vec![RowRangeEntry::new_rel(12, 15)]), + col_ranges: vec![ColRange::Col("Column 2".to_string())], + }, + ), + ( + "table3[[#12:15],[Column 3]]", + TableRef { + table_name: "Table3".to_string(), + data: true, + headers: false, + totals: false, + row_ranges: RowRange::Rows(vec![RowRangeEntry::new_rel(12, 15)]), + col_ranges: vec![ColRange::Col("Column 3".to_string())], + }, + ), ]; for (s, expected) in variations.iter() { diff --git a/quadratic-core/src/a1/table_ref/tokenize.rs b/quadratic-core/src/a1/table_ref/tokenize.rs index 180141eca5..610ba6a558 100644 --- a/quadratic-core/src/a1/table_ref/tokenize.rs +++ b/quadratic-core/src/a1/table_ref/tokenize.rs @@ -1,5 +1,7 @@ //! Tokenizes a table reference after the table_name has been removed. +use crate::UNBOUNDED; + use super::*; #[derive(Debug, PartialEq, Clone)] @@ -7,12 +9,12 @@ pub(crate) enum Token { All, Headers, Data, - Footers, + Totals, ThisRow, Column(String), ColumnRange(String, String), ColumnToEnd(String), - RowRange(u64, u64), + RowRange(i64, i64), } impl TableRef { @@ -37,12 +39,12 @@ impl TableRef { let s = s.trim().to_ascii_uppercase(); // Handle single number case - if let Ok(num) = s.parse::() { + if let Ok(num) = s.parse::() { return Ok(Token::RowRange(num, num)); } if s == "LAST" { - return Ok(Token::RowRange(u64::MAX, u64::MAX)); + return Ok(Token::RowRange(UNBOUNDED, UNBOUNDED)); } if s == "THIS ROW" { @@ -55,17 +57,17 @@ impl TableRef { let end = end.trim(); let start_num = start - .parse::() + .parse::() .map_err(|_| A1Error::InvalidTableRef("Invalid row number".into()))?; - // Handle cases like "5:" or "5:LAST" - if end.is_empty() || end.eq_ignore_ascii_case("LAST") { - return Ok(Token::RowRange(start_num, u64::MAX)); + // Handle cases like "5:" + if end.is_empty() { + return Ok(Token::RowRange(start_num, UNBOUNDED)); } // Handle "5:10" let end_num = end - .parse::() + .parse::() .map_err(|_| A1Error::InvalidTableRef("Invalid row number".into()))?; return Ok(Token::RowRange(start_num, end_num)); @@ -184,7 +186,7 @@ impl TableRef { match entry.as_str() { "#HEADERS" => tokens.push(Token::Headers), "#DATA" => tokens.push(Token::Data), - "#TOTALS" => tokens.push(Token::Footers), + "#TOTALS" => tokens.push(Token::Totals), "#ALL" => tokens.push(Token::All), ":" => return Err(A1Error::InvalidTableRef("Unexpected colon".into())), s => { @@ -217,6 +219,8 @@ impl TableRef { #[cfg(test)] #[serial_test::parallel] mod tests { + use crate::UNBOUNDED; + use super::*; #[test] @@ -292,7 +296,7 @@ mod tests { let special = [ ("[#HEADERS]", Token::Headers), ("[#DATA]", Token::Data), - ("[#TOTALS]", Token::Footers), + ("[#TOTALS]", Token::Totals), ("[#ALL]", Token::All), ]; for (s, expected) in special { @@ -311,8 +315,8 @@ mod tests { let rows = [ ("#1", vec![Token::RowRange(1, 1)]), ("#1:10", vec![Token::RowRange(1, 10)]), - ("#1:LAST", vec![Token::RowRange(1, u64::MAX)]), - ("#2:", vec![Token::RowRange(2, u64::MAX)]), + ("#1:", vec![Token::RowRange(1, UNBOUNDED)]), + ("#2:", vec![Token::RowRange(2, UNBOUNDED)]), ( "#1, 2, 3 : 5 ", vec![ @@ -321,7 +325,7 @@ mod tests { Token::RowRange(3, 5), ], ), - ("#LAST", vec![Token::RowRange(u64::MAX, u64::MAX)]), + ("#LAST", vec![Token::RowRange(UNBOUNDED, UNBOUNDED)]), ("#This Row", vec![Token::ThisRow]), ]; for (s, expected) in rows { From 96f290b4126e027be7caa0c7346be70f9f4dbdf9 Mon Sep 17 00:00:00 2001 From: David Figatner Date: Thu, 26 Dec 2024 05:30:39 -0800 Subject: [PATCH 4/6] fix table display tests --- .../src/a1/table_ref/column_range.rs | 10 ++++-- quadratic-core/src/a1/table_ref/display.rs | 31 ++++++++----------- quadratic-core/src/a1/table_ref/parse.rs | 16 ++++++---- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/quadratic-core/src/a1/table_ref/column_range.rs b/quadratic-core/src/a1/table_ref/column_range.rs index 3d0d8fe75c..e409d6fd7a 100644 --- a/quadratic-core/src/a1/table_ref/column_range.rs +++ b/quadratic-core/src/a1/table_ref/column_range.rs @@ -68,7 +68,10 @@ mod tests { end: CellRefCoord::new_rel(i64::MAX), }; let serialized = serde_json::to_string(&row_range).unwrap(); - assert_eq!(serialized, r#"{"start":1,"end":-1}"#); + assert_eq!( + serialized, + r#"{"start":{"coord":1,"is_absolute":false},"end":{"coord":-1,"is_absolute":false}}"# + ); let deserialized: RowRangeEntry = serde_json::from_str(&serialized).unwrap(); assert_eq!(deserialized, row_range); @@ -78,7 +81,10 @@ mod tests { end: CellRefCoord::new_rel(15), }; let serialized = serde_json::to_string(&row_range).unwrap(); - assert_eq!(serialized, r#"{"start":10,"end":15}"#); + assert_eq!( + serialized, + r#"{"start":{"coord":10,"is_absolute":false},"end":{"coord":15,"is_absolute":false}}"# + ); let deserialized: RowRangeEntry = serde_json::from_str(&serialized).unwrap(); assert_eq!(deserialized, row_range); diff --git a/quadratic-core/src/a1/table_ref/display.rs b/quadratic-core/src/a1/table_ref/display.rs index 5152fae29b..fbb5f8e504 100644 --- a/quadratic-core/src/a1/table_ref/display.rs +++ b/quadratic-core/src/a1/table_ref/display.rs @@ -24,34 +24,29 @@ impl TableRef { }; if start == end { - start + format!("[#{}]", start) } else { - format!("{}:{}", start, end) + format!("[#{}:{}]", start, end) } } /// Returns the string representation of the row range. - fn row_range_to_string(&self) -> String { + fn row_range_to_string(&self) -> Vec { match &self.row_ranges { - RowRange::All => String::default(), - RowRange::CurrentRow => "[#THIS ROW]".to_string(), - RowRange::Rows(rows) => { - format!( - "[#{}]", - rows.iter() - .map(TableRef::row_range_entry_to_string) - .collect::>() - .join(",") - ) - } + RowRange::All => vec![], + RowRange::CurrentRow => vec!["[#THIS ROW]".to_string()], + RowRange::Rows(rows) => rows + .iter() + .map(TableRef::row_range_entry_to_string) + .collect::>(), } } fn col_range_entry_to_string(entry: &ColRange) -> String { match entry { ColRange::Col(col) => format!("[{}]", col), - ColRange::ColRange(start, end) => format!("[{}:{}]", start, end), - ColRange::ColumnToEnd(col) => format!("[{}:]", col), + ColRange::ColRange(start, end) => format!("[{}]:[{}]", start, end), + ColRange::ColumnToEnd(col) => format!("[{}]:", col), } } @@ -86,7 +81,7 @@ impl TableRef { } } } - entries.push(self.row_range_to_string()); + entries.extend(self.row_range_to_string()); entries.extend(self.col_ranges_to_string()); format!("{}[{}]", self.table_name.to_string(), entries.join(",")) @@ -119,7 +114,7 @@ mod tests { "Table1[[#HEADERS],[Column 1]]", "Table1[[#HEADERS],[Column 1],[Column 2]]", "Table1[[#HEADERS],[Column 1],[Column 2],[Column 3]:[Column 4],[Column 6]]", - "Table1[[#DATA],[#HEADERS]][Column 1]]", + "Table1[[#DATA],[#HEADERS],[Column 1]]", ]; for test in tests { diff --git a/quadratic-core/src/a1/table_ref/parse.rs b/quadratic-core/src/a1/table_ref/parse.rs index 83b2cdb5ab..892b99084e 100644 --- a/quadratic-core/src/a1/table_ref/parse.rs +++ b/quadratic-core/src/a1/table_ref/parse.rs @@ -48,7 +48,7 @@ impl TableRef { let mut row_ranges = None; let mut column_ranges = vec![]; - let mut data = true; + let mut data = None; let mut headers = false; let mut totals = false; @@ -80,19 +80,23 @@ impl TableRef { } Token::All => { headers = true; - data = true; + data = Some(true); totals = true; } Token::Headers => { - data = false; + if data.is_none() { + data = Some(false); + } headers = true; } Token::Totals => { - data = false; + if data.is_none() { + data = Some(false); + } totals = true; } Token::Data => { - data = true; + data = Some(true); } Token::ThisRow => { if row_ranges.is_some() { @@ -105,7 +109,7 @@ impl TableRef { Ok(Self { table_name, - data, + data: data.unwrap_or(true), headers, totals, row_ranges: row_ranges.unwrap_or(RowRange::All), From 33f058cf29e4941e44530ed69ac80f9c7534954b Mon Sep 17 00:00:00 2001 From: David Figatner Date: Thu, 26 Dec 2024 05:33:25 -0800 Subject: [PATCH 5/6] fix comment --- quadratic-core/src/a1/table_ref/column_range.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/quadratic-core/src/a1/table_ref/column_range.rs b/quadratic-core/src/a1/table_ref/column_range.rs index e409d6fd7a..4a8b4e8110 100644 --- a/quadratic-core/src/a1/table_ref/column_range.rs +++ b/quadratic-core/src/a1/table_ref/column_range.rs @@ -4,8 +4,10 @@ //! table reference may only have one list of row ranges, and any number of //! column ranges. //! -//! We serialize/deserialize RowRangeEntry#End to -1 if equal to u64::MAX. This -//! is to ensure compatibility with JS. +//! We serialize/deserialize RowRangeEntry#End to -1 if equal to UNBOUNDED +//! (i64::MAX). This is to ensure compatibility with JS. +//! +//! i64 is used to maintain compatibility with CellRefCoord. use serde::{Deserialize, Serialize}; use ts_rs::TS; From e7c002d9514829bc403366e62e52883c76af0a4f Mon Sep 17 00:00:00 2001 From: David Figatner Date: Thu, 26 Dec 2024 05:34:24 -0800 Subject: [PATCH 6/6] comments --- quadratic-core/src/a1/table_ref/display.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/quadratic-core/src/a1/table_ref/display.rs b/quadratic-core/src/a1/table_ref/display.rs index fbb5f8e504..0ceaf8f33e 100644 --- a/quadratic-core/src/a1/table_ref/display.rs +++ b/quadratic-core/src/a1/table_ref/display.rs @@ -1,3 +1,5 @@ +//! Display TableRef as a string. + use crate::UNBOUNDED; use super::*;