diff --git a/doc/spark_coverage.txt b/doc/spark_coverage.txt
new file mode 100644
index 000000000..f7ef388e1
--- /dev/null
+++ b/doc/spark_coverage.txt
@@ -0,0 +1,421 @@
++---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|name |details |
++---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+|! |{PASSED, [{SELECT ! true;, OK}]} |
+|% |{PASSED, [{SELECT 2 % 1.8;, OK}]} |
+|& |{PASSED, [{SELECT 3 & 5;, OK}]} |
+|* |{PASSED, [{SELECT 2 * 3;, OK}]} |
+|+ |{PASSED, [{SELECT 1 + 2;, OK}]} |
+|- |{PASSED, [{SELECT 2 - 1;, OK}]} |
+|/ |{PASSED, [{SELECT 3 / 2;, OK}]} |
+|< |{PASSED, [{SELECT 1 < 2;, OK}]} |
+|<= |{PASSED, [{SELECT 2 <= 2;, OK}]} |
+|<=> |{PASSED, [{SELECT 2 <=> 2;, OK}]} |
+|= |{PASSED, [{SELECT 2 = 2;, OK}]} |
+|== |{PASSED, [{SELECT 2 == 2;, OK}]} |
+|> |{PASSED, [{SELECT 2 > 1;, OK}]} |
+|>= |{PASSED, [{SELECT 2 >= 1;, OK}]} |
+|^ |{PASSED, [{SELECT 3 ^ 5;, OK}]} |
+|abs |{PASSED, [{SELECT abs(-1);, OK}]} |
+|acos |{PASSED, [{SELECT acos(1);, OK}]} |
+|acosh |{PASSED, [{SELECT acosh(1);, OK}]} |
+|add_months |{PASSED, [{SELECT add_months('2016-08-31', 1);, OK}]} |
+|aes_decrypt |{PASSED, [{SELECT aes_decrypt(unhex('83F16B2AA704794132802D248E6BFD4E380078182D1544813898AC97E709B28A94'), '0000111122223333');, OK}]} |
+|aes_encrypt |{FAILED, [{SELECT hex(aes_encrypt('Spark', '0000111122223333'));, Failed on something else. Check query manually}]} |
+|aggregate |{FAILED, [{SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x);, Unsupported}]} |
+|and |{PASSED, [{SELECT true and true;, OK}]} |
+|any |{FAILED, [{SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col);, Unsupported}]} |
+|any_value |{FAILED, [{SELECT any_value(col) FROM VALUES (10), (5), (20) AS tab(col);, Unsupported}]} |
+|approx_count_distinct |{FAILED, [{SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1);, Unsupported}]} |
+|approx_percentile |{FAILED, [{SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col);, Unsupported}]} |
+|array |{FAILED, [{SELECT array(1, 2, 3);, Unsupported}]} |
+|array_agg |{FAILED, [{SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col);, Unsupported}]} |
+|array_append |{FAILED, [{SELECT array_append(array('b', 'd', 'c', 'a'), 'd');, Unsupported}]} |
+|array_compact |{FAILED, [{SELECT array_compact(array(1, 2, 3, null));, Unsupported}]} |
+|array_contains |{PASSED, [{SELECT array_contains(array(1, 2, 3), 2);, OK}]} |
+|array_distinct |{FAILED, [{SELECT array_distinct(array(1, 2, 3, null, 3));, Unsupported}]} |
+|array_except |{FAILED, [{SELECT array_except(array(1, 2, 3), array(1, 3, 5));, Unsupported}]} |
+|array_insert |{FAILED, [{SELECT array_insert(array(1, 2, 3, 4), 5, 5);, Unsupported}]} |
+|array_intersect |{FAILED, [{SELECT array_intersect(array(1, 2, 3), array(1, 3, 5));, Unsupported}]} |
+|array_join |{PASSED, [{SELECT array_join(array('hello', 'world'), ' ');, OK}]} |
+|array_max |{PASSED, [{SELECT array_max(array(1, 20, null, 3));, OK}]} |
+|array_min |{PASSED, [{SELECT array_min(array(1, 20, null, 3));, OK}]} |
+|array_position |{PASSED, [{SELECT array_position(array(3, 2, 1), 1);, OK}]} |
+|array_remove |{FAILED, [{SELECT array_remove(array(1, 2, 3, null, 3), 3);, Unsupported}]} |
+|array_repeat |{FAILED, [{SELECT array_repeat('123', 2);, Unsupported}]} |
+|array_size |{PASSED, [{SELECT array_size(array('b', 'd', 'c', 'a'));, OK}]} |
+|array_sort |{FAILED, [{SELECT array_sort(array(5, 6, 1), (left, right) -> case when left < right then -1 when left > right then 1 else 0 end);, Unsupported}]} |
+|array_union |{FAILED, [{SELECT array_union(array(1, 2, 3), array(1, 3, 5));, Unsupported}]} |
+|arrays_overlap |{PASSED, [{SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5));, OK}]} |
+|arrays_zip |{FAILED, [{SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4));, Unsupported}]} |
+|ascii |{PASSED, [{SELECT ascii('222');, OK}]} |
+|asin |{PASSED, [{SELECT asin(0);, OK}]} |
+|asinh |{PASSED, [{SELECT asinh(0);, OK}]} |
+|assert_true |{PASSED, [{SELECT assert_true(0 < 1);, OK}]} |
+|atan |{PASSED, [{SELECT atan(0);, OK}]} |
+|atan2 |{PASSED, [{SELECT atan2(0, 0);, OK}]} |
+|atanh |{PASSED, [{SELECT atanh(0);, OK}]} |
+|avg |{FAILED, [{SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} |
+|base64 |{PASSED, [{SELECT base64('Spark SQL');, OK}]} |
+|bigint |{SKIPPED, []} |
+|bin |{PASSED, [{SELECT bin(13);, OK}]} |
+|binary |{SKIPPED, []} |
+|bit_and |{FAILED, [{SELECT bit_and(col) FROM VALUES (3), (5) AS tab(col);, Unsupported}]} |
+|bit_count |{PASSED, [{SELECT bit_count(0);, OK}]} |
+|bit_get |{PASSED, [{SELECT bit_get(11, 0);, OK}]} |
+|bit_length |{PASSED, [{SELECT bit_length('Spark SQL');, OK}]} |
+|bit_or |{FAILED, [{SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col);, Unsupported}]} |
+|bit_xor |{FAILED, [{SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col);, Unsupported}]} |
+|bool_and |{FAILED, [{SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col);, Unsupported}]} |
+|bool_or |{FAILED, [{SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col);, Unsupported}]} |
+|boolean |{SKIPPED, []} |
+|bround |{PASSED, [{SELECT bround(2.5, 0);, OK}]} |
+|btrim |{PASSED, [{SELECT btrim(' SparkSQL ');, OK}]} |
+|cardinality |{PASSED, [{SELECT cardinality(array('b', 'd', 'c', 'a'));, OK}]} |
+|cast |{PASSED, [{SELECT cast('10' as int);, OK}]} |
+|cbrt |{PASSED, [{SELECT cbrt(27.0);, OK}]} |
+|ceil |{PASSED, [{SELECT ceil(-0.1);, OK}]} |
+|ceiling |{PASSED, [{SELECT ceiling(-0.1);, OK}]} |
+|char |{PASSED, [{SELECT char(65);, OK}]} |
+|char_length |{PASSED, [{SELECT char_length('Spark SQL ');, OK}]} |
+|character_length |{PASSED, [{SELECT character_length('Spark SQL ');, OK}]} |
+|chr |{PASSED, [{SELECT chr(65);, OK}]} |
+|coalesce |{PASSED, [{SELECT coalesce(NULL, 1, NULL);, OK}]} |
+|collect_list |{FAILED, [{SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col);, Unsupported}]} |
+|collect_set |{FAILED, [{SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col);, Unsupported}]} |
+|concat |{PASSED, [{SELECT concat('Spark', 'SQL');, OK}]} |
+|concat_ws |{PASSED, [{SELECT concat_ws(' ', 'Spark', 'SQL');, OK}]} |
+|contains |{PASSED, [{SELECT contains('Spark SQL', 'Spark');, OK}]} |
+|conv |{PASSED, [{SELECT conv('100', 2, 10);, OK}]} |
+|convert_timezone |{FAILED, [{SELECT convert_timezone('Europe/Brussels', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00');, Failed on native side}]} |
+|corr |{FAILED, [{SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2);, Unsupported}]} |
+|cos |{PASSED, [{SELECT cos(0);, OK}]} |
+|cosh |{PASSED, [{SELECT cosh(0);, OK}]} |
+|cot |{PASSED, [{SELECT cot(1);, OK}]} |
+|count |{FAILED, [{SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col);, Unsupported}]} |
+|count_if |{FAILED, [{SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col);, Unsupported}]} |
+|count_min_sketch |{FAILED, [{SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col);, Unsupported}]} |
+|covar_pop |{FAILED, [{SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2);, Unsupported}]} |
+|covar_samp |{FAILED, [{SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2);, Unsupported}]} |
+|crc32 |{PASSED, [{SELECT crc32('Spark');, OK}]} |
+|csc |{PASSED, [{SELECT csc(1);, OK}]} |
+|cume_dist |{FAILED, [{SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);, Unsupported}]} |
+|curdate |{PASSED, [{SELECT curdate();, OK}]} |
+|current_catalog |{PASSED, [{SELECT current_catalog();, OK}]} |
+|current_database |{PASSED, [{SELECT current_database();, OK}]} |
+|current_date |{PASSED, [{SELECT current_date();, OK}]} |
+|current_schema |{PASSED, [{SELECT current_schema();, OK}]} |
+|current_timestamp |{FAILED, [{SELECT current_timestamp();, Failed on something else. Check query manually}]} |
+|current_timezone |{PASSED, [{SELECT current_timezone();, OK}]} |
+|current_user |{PASSED, [{SELECT current_user();, OK}]} |
+|date |{SKIPPED, []} |
+|date_add |{PASSED, [{SELECT date_add('2016-07-30', 1);, OK}]} |
+|date_diff |{PASSED, [{SELECT date_diff('2009-07-31', '2009-07-30');, OK}]} |
+|date_format |{PASSED, [{SELECT date_format('2016-04-08', 'y');, OK}]} |
+|date_from_unix_date |{PASSED, [{SELECT date_from_unix_date(1);, OK}]} |
+|date_part |{PASSED, [{SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456');, OK}]} |
+|date_sub |{PASSED, [{SELECT date_sub('2016-07-30', 1);, OK}]} |
+|date_trunc |{PASSED, [{SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359');, OK}]} |
+|dateadd |{PASSED, [{SELECT dateadd('2016-07-30', 1);, OK}]} |
+|datediff |{PASSED, [{SELECT datediff('2009-07-31', '2009-07-30');, OK}]} |
+|datepart |{PASSED, [{SELECT datepart('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456');, OK}]} |
+|day |{PASSED, [{SELECT day('2009-07-30');, OK}]} |
+|dayofmonth |{PASSED, [{SELECT dayofmonth('2009-07-30');, OK}]} |
+|dayofweek |{PASSED, [{SELECT dayofweek('2009-07-30');, OK}]} |
+|dayofyear |{PASSED, [{SELECT dayofyear('2016-04-09');, OK}]} |
+|decimal |{SKIPPED, []} |
+|decode |{PASSED, [{SELECT decode(encode('abc', 'utf-8'), 'utf-8');, OK}]} |
+|degrees |{PASSED, [{SELECT degrees(3.141592653589793);, OK}]} |
+|dense_rank |{FAILED, [{SELECT a, b, dense_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);, Unsupported}]} |
+|div |{PASSED, [{SELECT 3 div 2;, OK}]} |
+|double |{SKIPPED, []} |
+|e |{PASSED, [{SELECT e();, OK}]} |
+|element_at |{PASSED, [{SELECT element_at(array(1, 2, 3), 2);, OK}]} |
+|elt |{FAILED, [{SELECT elt(1, 'scala', 'java');, Unsupported}]} |
+|encode |{PASSED, [{SELECT encode('abc', 'utf-8');, OK}]} |
+|endswith |{PASSED, [{SELECT endswith('Spark SQL', 'SQL');, OK}]} |
+|equal_null |{PASSED, [{SELECT equal_null(3, 3);, OK}]} |
+|every |{FAILED, [{SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col);, Unsupported}]} |
+|exists |{FAILED, [{SELECT exists(array(1, 2, 3), x -> x % 2 == 0);, Unsupported}]} |
+|exp |{PASSED, [{SELECT exp(0);, OK}]} |
+|explode |{FAILED, [{SELECT explode(array(10, 20));, Unsupported}]} |
+|explode_outer |{FAILED, [{SELECT explode_outer(array(10, 20));, Unsupported}]} |
+|expm1 |{PASSED, [{SELECT expm1(0);, OK}]} |
+|extract |{PASSED, [{SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456');, OK}]} |
+|factorial |{PASSED, [{SELECT factorial(5);, OK}]} |
+|filter |{FAILED, [{SELECT filter(array(1, 2, 3), x -> x % 2 == 1);, Unsupported}]} |
+|find_in_set |{PASSED, [{SELECT find_in_set('ab','abc,b,ab,c,def');, OK}]} |
+|first |{FAILED, [{SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col);, Unsupported}]} |
+|first_value |{FAILED, [{SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col);, Unsupported}]} |
+|flatten |{FAILED, [{SELECT flatten(array(array(1, 2), array(3, 4)));, Unsupported}]} |
+|float |{SKIPPED, []} |
+|floor |{PASSED, [{SELECT floor(-0.1);, OK}]} |
+|forall |{FAILED, [{SELECT forall(array(1, 2, 3), x -> x % 2 == 0);, Unsupported}]} |
+|format_number |{PASSED, [{SELECT format_number(12332.123456, 4);, OK}]} |
+|format_string |{PASSED, [{SELECT format_string("Hello World %d %s", 100, "days");, OK}]} |
+|from_csv |{FAILED, [{SELECT from_csv('1, 0.8', 'a INT, b DOUBLE');, Unsupported}]} |
+|from_json |{FAILED, [{SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE');, Unsupported}]} |
+|from_unixtime |{PASSED, [{SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss');, OK}]} |
+|from_utc_timestamp |{PASSED, [{SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul');, OK}]} |
+|get |{PASSED, [{SELECT get(array(1, 2, 3), 0);, OK}]} |
+|get_json_object |{PASSED, [{SELECT get_json_object('{"a":"b"}', '$.a');, OK}]} |
+|getbit |{PASSED, [{SELECT getbit(11, 0);, OK}]} |
+|greatest |{PASSED, [{SELECT greatest(10, 9, 2, 4, 3);, OK}]} |
+|grouping |{FAILED, [{SELECT name, grouping(name), sum(age) FROM VALUES (2, 'Alice'), (5, 'Bob') people(age, name) GROUP BY cube(name);, Failed on something else. Check query manually}]} |
+|grouping_id |{FAILED, [{SELECT name, grouping_id(), sum(age), avg(height) FROM VALUES (2, 'Alice', 165), (5, 'Bob', 180) people(age, name, height) GROUP BY cube(name, height);, Failed on something else. Check query manually}]} |
+|hash |{PASSED, [{SELECT hash('Spark', array(123), 2);, OK}]} |
+|hex |{PASSED, [{SELECT hex(17);, OK}]} |
+|histogram_numeric |{FAILED, [{SELECT histogram_numeric(col, 5) FROM VALUES (0), (1), (2), (10) AS tab(col);, Unsupported}]} |
+|hour |{PASSED, [{SELECT hour('2009-07-30 12:58:59');, OK}]} |
+|hypot |{PASSED, [{SELECT hypot(3, 4);, OK}]} |
+|if |{PASSED, [{SELECT if(1 < 2, 'a', 'b');, OK}]} |
+|ifnull |{FAILED, [{SELECT ifnull(NULL, array('2'));, Unsupported}]} |
+|ilike |{PASSED, [{SELECT ilike('Spark', '_Park');, OK}]} |
+|in |{PASSED, [{SELECT 1 in(1, 2, 3);, OK}]} |
+|initcap |{PASSED, [{SELECT initcap('sPark sql');, OK}]} |
+|inline |{FAILED, [{SELECT inline(array(struct(1, 'a'), struct(2, 'b')));, Unsupported}]} |
+|inline_outer |{FAILED, [{SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b')));, Unsupported}]} |
+|input_file_block_length |{FAILED, [{SELECT input_file_block_length();, Unsupported}]} |
+|input_file_block_start |{FAILED, [{SELECT input_file_block_start();, Unsupported}]} |
+|input_file_name |{FAILED, [{SELECT input_file_name();, Unsupported}]} |
+|instr |{PASSED, [{SELECT instr('SparkSQL', 'SQL');, OK}]} |
+|int |{SKIPPED, []} |
+|isnan |{PASSED, [{SELECT isnan(cast('NaN' as double));, OK}]} |
+|isnotnull |{PASSED, [{SELECT isnotnull(1);, OK}]} |
+|isnull |{PASSED, [{SELECT isnull(1);, OK}]} |
+|java_method |{FAILED, [{SELECT java_method('java.util.UUID', 'randomUUID');, Unsupported}]} |
+|json_array_length |{PASSED, [{SELECT json_array_length('[1,2,3,4]');, OK}]} |
+|json_object_keys |{FAILED, [{SELECT json_object_keys('{}');, Unsupported}]} |
+|json_tuple |{FAILED, [{SELECT json_tuple('{"a":1, "b":2}', 'a', 'b');, Unsupported}]} |
+|kurtosis |{FAILED, [{SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);, Unsupported}]} |
+|lag |{FAILED, [{SELECT a, b, lag(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);, Unsupported}]} |
+|last |{FAILED, [{SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col);, Unsupported}]} |
+|last_day |{PASSED, [{SELECT last_day('2009-01-12');, OK}]} |
+|last_value |{FAILED, [{SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col);, Unsupported}]} |
+|lcase |{PASSED, [{SELECT lcase('SparkSql');, OK}]} |
+|lead |{FAILED, [{SELECT a, b, lead(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);, Unsupported}]} |
+|least |{PASSED, [{SELECT least(10, 9, 2, 4, 3);, OK}]} |
+|left |{PASSED, [{SELECT left('Spark SQL', 3);, OK}]} |
+|len |{PASSED, [{SELECT len('Spark SQL ');, OK}]} |
+|length |{PASSED, [{SELECT length('Spark SQL ');, OK}]} |
+|levenshtein |{PASSED, [{SELECT levenshtein('kitten', 'sitting');, OK}]} |
+|like |{PASSED, [{SELECT like('Spark', '_park');, OK}]} |
+|ln |{PASSED, [{SELECT ln(1);, OK}]} |
+|localtimestamp |{FAILED, [{SELECT localtimestamp();, Failed on native side}]} |
+|locate |{PASSED, [{SELECT locate('bar', 'foobarbar');, OK}]} |
+|log |{PASSED, [{SELECT log(10, 100);, OK}]} |
+|log10 |{PASSED, [{SELECT log10(10);, OK}]} |
+|log1p |{PASSED, [{SELECT log1p(0);, OK}]} |
+|log2 |{PASSED, [{SELECT log2(2);, OK}]} |
+|lower |{PASSED, [{SELECT lower('SparkSql');, OK}]} |
+|lpad |{PASSED, [{SELECT lpad('hi', 5, '??');, OK}]} |
+|ltrim |{PASSED, [{SELECT ltrim(' SparkSQL ');, OK}]} |
+|make_date |{PASSED, [{SELECT make_date(2013, 7, 15);, OK}]} |
+|make_dt_interval |{FAILED, [{SELECT make_dt_interval(1, 12, 30, 01.001001);, Unsupported}]} |
+|make_interval |{FAILED, [{SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001);, Unsupported}]} |
+|make_timestamp |{PASSED, [{SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887);, OK}]} |
+|make_timestamp_ltz |{PASSED, [{SELECT make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887);, OK}]} |
+|make_timestamp_ntz |{FAILED, [{SELECT make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887);, Failed on native side}]} |
+|make_ym_interval |{FAILED, [{SELECT make_ym_interval(1, 2);, Unsupported}]} |
+|map |{FAILED, [{SELECT map(1.0, '2', 3.0, '4');, Unsupported}]} |
+|map_concat |{FAILED, [{SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c'));, Unsupported}]} |
+|map_contains_key |{PASSED, [{SELECT map_contains_key(map(1, 'a', 2, 'b'), 1);, OK}]} |
+|map_entries |{FAILED, [{SELECT map_entries(map(1, 'a', 2, 'b'));, Unsupported}]} |
+|map_filter |{FAILED, [{SELECT map_filter(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v);, Unsupported}]} |
+|map_from_arrays |{FAILED, [{SELECT map_from_arrays(array(1.0, 3.0), array('2', '4'));, Unsupported}]} |
+|map_from_entries |{FAILED, [{SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b')));, Unsupported}]} |
+|map_keys |{FAILED, [{SELECT map_keys(map(1, 'a', 2, 'b'));, Unsupported}]} |
+|map_values |{FAILED, [{SELECT map_values(map(1, 'a', 2, 'b'));, Unsupported}]} |
+|map_zip_with |{FAILED, [{SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2));, Unsupported}]} |
+|mask |{PASSED, [{SELECT mask('abcd-EFGH-8765-4321');, OK}]} |
+|max |{FAILED, [{SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col);, Unsupported}]} |
+|max_by |{FAILED, [{SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y);, Unsupported}]} |
+|md5 |{PASSED, [{SELECT md5('Spark');, OK}]} |
+|mean |{FAILED, [{SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} |
+|median |{FAILED, [{SELECT median(col) FROM VALUES (0), (10) AS tab(col);, Unsupported}]} |
+|min |{FAILED, [{SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col);, Unsupported}]} |
+|min_by |{FAILED, [{SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y);, Unsupported}]} |
+|minute |{PASSED, [{SELECT minute('2009-07-30 12:58:59');, OK}]} |
+|mod |{PASSED, [{SELECT 2 % 1.8;, OK}]} |
+|mode |{FAILED, [{SELECT mode(col) FROM VALUES (0), (10), (10) AS tab(col);, Unsupported}]} |
+|monotonically_increasing_id|{FAILED, [{SELECT monotonically_increasing_id();, Unsupported}]} |
+|month |{PASSED, [{SELECT month('2016-07-30');, OK}]} |
+|months_between |{PASSED, [{SELECT months_between('1997-02-28 10:30:00', '1996-10-30');, OK}]} |
+|named_struct |{FAILED, [{SELECT named_struct("a", 1, "b", 2, "c", 3);, Unsupported}]} |
+|nanvl |{PASSED, [{SELECT nanvl(cast('NaN' as double), 123);, OK}]} |
+|negative |{PASSED, [{SELECT negative(1);, OK}]} |
+|next_day |{PASSED, [{SELECT next_day('2015-01-14', 'TU');, OK}]} |
+|not |{PASSED, [{SELECT not true;, OK}]} |
+|now |{FAILED, [{SELECT now();, Failed on something else. Check query manually}]} |
+|nth_value |{FAILED, [{SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);, Unsupported}]} |
+|ntile |{FAILED, [{SELECT a, b, ntile(2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);, Unsupported}]} |
+|nullif |{PASSED, [{SELECT nullif(2, 2);, OK}]} |
+|nvl |{FAILED, [{SELECT nvl(NULL, array('2'));, Unsupported}]} |
+|nvl2 |{PASSED, [{SELECT nvl2(NULL, 2, 1);, OK}]} |
+|octet_length |{PASSED, [{SELECT octet_length('Spark SQL');, OK}]} |
+|or |{PASSED, [{SELECT true or false;, OK}]} |
+|overlay |{PASSED, [{SELECT overlay('Spark SQL' PLACING '_' FROM 6);, OK}]} |
+|parse_url |{FAILED, [{SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST');, Unsupported}]} |
+|percent_rank |{FAILED, [{SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);, Unsupported}]} |
+|percentile |{FAILED, [{SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col);, Unsupported}]} |
+|percentile_approx |{FAILED, [{SELECT percentile_approx(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col);, Unsupported}]} |
+|pi |{PASSED, [{SELECT pi();, OK}]} |
+|pmod |{PASSED, [{SELECT pmod(10, 3);, OK}]} |
+|posexplode |{FAILED, [{SELECT posexplode(array(10,20));, Unsupported}]} |
+|posexplode_outer |{FAILED, [{SELECT posexplode_outer(array(10,20));, Unsupported}]} |
+|position |{PASSED, [{SELECT position('bar', 'foobarbar');, OK}]} |
+|positive |{PASSED, [{SELECT positive(1);, OK}]} |
+|pow |{PASSED, [{SELECT pow(2, 3);, OK}]} |
+|power |{PASSED, [{SELECT power(2, 3);, OK}]} |
+|printf |{PASSED, [{SELECT printf("Hello World %d %s", 100, "days");, OK}]} |
+|quarter |{PASSED, [{SELECT quarter('2016-08-31');, OK}]} |
+|radians |{PASSED, [{SELECT radians(180);, OK}]} |
+|raise_error |{FAILED, [{SELECT raise_error('custom error message');, Unsupported}]} |
+|rand |{FAILED, [{SELECT rand();, Unsupported}]} |
+|randn |{FAILED, [{SELECT randn();, Unsupported}]} |
+|random |{FAILED, [{SELECT random();, Unsupported}]} |
+|rank |{FAILED, [{SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);, Unsupported}]} |
+|reduce |{FAILED, [{SELECT reduce(array(1, 2, 3), 0, (acc, x) -> acc + x);, Unsupported}]} |
+|reflect |{FAILED, [{SELECT reflect('java.util.UUID', 'randomUUID');, Unsupported}]} |
+|regexp |{FAILED, [{SELECT regexp('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*');, Failed on something else. Check query manually}]} |
+|regexp_count |{PASSED, [{SELECT regexp_count('Steven Jones and Stephen Smith are the best players', 'Ste(v|ph)en');, OK}]} |
+|regexp_extract |{PASSED, [{SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1);, OK}]} |
+|regexp_extract_all |{FAILED, [{SELECT regexp_extract_all('100-200, 300-400', '(\\d+)-(\\d+)', 1);, Unsupported}]} |
+|regexp_instr |{PASSED, [{SELECT regexp_instr('user@spark.apache.org', '@[^.]*');, OK}]} |
+|regexp_like |{FAILED, [{SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*');, Failed on something else. Check query manually}]} |
+|regexp_replace |{PASSED, [{SELECT regexp_replace('100-200', '(\\d+)', 'num');, OK}]} |
+|regexp_substr |{PASSED, [{SELECT regexp_substr('Steven Jones and Stephen Smith are the best players', 'Ste(v|ph)en');, OK}]} |
+|regr_avgx |{FAILED, [{SELECT regr_avgx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);, Unsupported}]} |
+|regr_avgy |{FAILED, [{SELECT regr_avgy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);, Unsupported}]} |
+|regr_count |{FAILED, [{SELECT regr_count(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);, Unsupported}]} |
+|regr_intercept |{FAILED, [{SELECT regr_intercept(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x);, Unsupported}]} |
+|regr_r2 |{FAILED, [{SELECT regr_r2(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);, Unsupported}]} |
+|regr_slope |{FAILED, [{SELECT regr_slope(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x);, Unsupported}]} |
+|regr_sxx |{FAILED, [{SELECT regr_sxx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);, Unsupported}]} |
+|regr_sxy |{FAILED, [{SELECT regr_sxy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);, Unsupported}]} |
+|regr_syy |{FAILED, [{SELECT regr_syy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);, Unsupported}]} |
+|repeat |{PASSED, [{SELECT repeat('123', 2);, OK}]} |
+|replace |{PASSED, [{SELECT replace('ABCabc', 'abc', 'DEF');, OK}]} |
+|reverse |{PASSED, [{SELECT reverse('Spark SQL');, OK}]} |
+|right |{PASSED, [{SELECT right('Spark SQL', 3);, OK}]} |
+|rint |{PASSED, [{SELECT rint(12.3456);, OK}]} |
+|rlike |{FAILED, [{SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*');, Failed on something else. Check query manually}]} |
+|round |{PASSED, [{SELECT round(2.5, 0);, OK}]} |
+|row_number |{FAILED, [{SELECT a, b, row_number() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b);, Unsupported}]} |
+|rpad |{PASSED, [{SELECT rpad('hi', 5, '??');, OK}]} |
+|rtrim |{PASSED, [{SELECT rtrim(' SparkSQL ');, OK}]} |
+|schema_of_csv |{PASSED, [{SELECT schema_of_csv('1,abc');, OK}]} |
+|schema_of_json |{PASSED, [{SELECT schema_of_json('[{"col":0}]');, OK}]} |
+|sec |{PASSED, [{SELECT sec(0);, OK}]} |
+|second |{PASSED, [{SELECT second('2009-07-30 12:58:59');, OK}]} |
+|sentences |{FAILED, [{SELECT sentences('Hi there! Good morning.');, Unsupported}]} |
+|sequence |{FAILED, [{SELECT sequence(1, 5);, Unsupported}]} |
+|session_window |{FAILED, [{SELECT a, session_window.start, session_window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:10:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, session_window(b, '5 minutes') ORDER BY a, start;, Failed on something else. Check query manually}]} |
+|sha |{PASSED, [{SELECT sha('Spark');, OK}]} |
+|sha1 |{PASSED, [{SELECT sha1('Spark');, OK}]} |
+|sha2 |{PASSED, [{SELECT sha2('Spark', 256);, OK}]} |
+|shiftleft |{PASSED, [{SELECT shiftleft(2, 1);, OK}]} |
+|shiftright |{PASSED, [{SELECT shiftright(4, 1);, OK}]} |
+|shiftrightunsigned |{PASSED, [{SELECT shiftrightunsigned(4, 1);, OK}]} |
+|shuffle |{FAILED, [{SELECT shuffle(array(1, 20, 3, 5));, Unsupported}]} |
+|sign |{PASSED, [{SELECT sign(40);, OK}]} |
+|signum |{PASSED, [{SELECT signum(40);, OK}]} |
+|sin |{PASSED, [{SELECT sin(0);, OK}]} |
+|sinh |{PASSED, [{SELECT sinh(0);, OK}]} |
+|size |{PASSED, [{SELECT size(array('b', 'd', 'c', 'a'));, OK}]} |
+|skewness |{FAILED, [{SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);, Unsupported}]} |
+|slice |{FAILED, [{SELECT slice(array(1, 2, 3, 4), 2, 2);, Unsupported}]} |
+|smallint |{SKIPPED, []} |
+|some |{FAILED, [{SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col);, Unsupported}]} |
+|sort_array |{FAILED, [{SELECT sort_array(array('b', 'd', null, 'c', 'a'), true);, Unsupported}]} |
+|soundex |{PASSED, [{SELECT soundex('Miller');, OK}]} |
+|space |{PASSED, [{SELECT concat(space(2), '1');, OK}]} |
+|spark_partition_id |{FAILED, [{SELECT spark_partition_id();, Unsupported}]} |
+|split |{FAILED, [{SELECT split('oneAtwoBthreeC', '[ABC]');, Unsupported}]} |
+|split_part |{PASSED, [{SELECT split_part('11.12.13', '.', 3);, OK}]} |
+|sqrt |{PASSED, [{SELECT sqrt(4);, OK}]} |
+|stack |{FAILED, [{SELECT stack(2, 1, 2, 3);, Unsupported}]} |
+|startswith |{PASSED, [{SELECT startswith('Spark SQL', 'Spark');, OK}]} |
+|std |{FAILED, [{SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} |
+|stddev |{FAILED, [{SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} |
+|stddev_pop |{FAILED, [{SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} |
+|stddev_samp |{FAILED, [{SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} |
+|str_to_map |{FAILED, [{SELECT str_to_map('a:1,b:2,c:3', ',', ':');, Unsupported}]} |
+|string |{SKIPPED, []} |
+|struct |{FAILED, [{SELECT struct(1, 2, 3);, Unsupported}]} |
+|substr |{PASSED, [{SELECT substr('Spark SQL', 5);, OK}]} |
+|substring |{PASSED, [{SELECT substring('Spark SQL', 5);, OK}]} |
+|substring_index |{PASSED, [{SELECT substring_index('www.apache.org', '.', 2);, OK}]} |
+|sum |{FAILED, [{SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col);, Unsupported}]} |
+|tan |{PASSED, [{SELECT tan(0);, OK}]} |
+|tanh |{PASSED, [{SELECT tanh(0);, OK}]} |
+|timestamp |{SKIPPED, []} |
+|timestamp_micros |{PASSED, [{SELECT timestamp_micros(1230219000123123);, OK}]} |
+|timestamp_millis |{PASSED, [{SELECT timestamp_millis(1230219000123);, OK}]} |
+|timestamp_seconds |{PASSED, [{SELECT timestamp_seconds(1230219000);, OK}]} |
+|tinyint |{SKIPPED, []} |
+|to_binary |{PASSED, [{SELECT to_binary('abc', 'utf-8');, OK}]} |
+|to_char |{PASSED, [{SELECT to_char(454, '999');, OK}]} |
+|to_csv |{PASSED, [{SELECT to_csv(named_struct('a', 1, 'b', 2));, OK}]} |
+|to_date |{PASSED, [{SELECT to_date('2009-07-30 04:17:52');, OK}]} |
+|to_json |{PASSED, [{SELECT to_json(named_struct('a', 1, 'b', 2));, OK}]} |
+|to_number |{PASSED, [{SELECT to_number('454', '999');, OK}]} |
+|to_timestamp |{PASSED, [{SELECT to_timestamp('2016-12-31 00:12:00');, OK}]} |
+|to_timestamp_ltz |{PASSED, [{SELECT to_timestamp_ltz('2016-12-31 00:12:00');, OK}]} |
+|to_timestamp_ntz |{FAILED, [{SELECT to_timestamp_ntz('2016-12-31 00:12:00');, Failed on native side}]} |
+|to_unix_timestamp |{PASSED, [{SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd');, OK}]} |
+|to_utc_timestamp |{PASSED, [{SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul');, OK}]} |
+|transform |{FAILED, [{SELECT transform(array(1, 2, 3), x -> x + 1);, Unsupported}]} |
+|transform_keys |{FAILED, [{SELECT transform_keys(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> k + 1);, Unsupported}]} |
+|transform_values |{FAILED, [{SELECT transform_values(map_from_arrays(array(1, 2, 3), array(1, 2, 3)), (k, v) -> v + 1);, Unsupported}]} |
+|translate |{PASSED, [{SELECT translate('AaBbCc', 'abc', '123');, OK}]} |
+|trim |{PASSED, [{SELECT trim(' SparkSQL ');, OK}]} |
+|trunc |{PASSED, [{SELECT trunc('2019-08-04', 'week');, OK}]} |
+|try_add |{PASSED, [{SELECT try_add(1, 2);, OK}]} |
+|try_avg |{FAILED, [{SELECT try_avg(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} |
+|try_divide |{PASSED, [{SELECT try_divide(3, 2);, OK}]} |
+|try_element_at |{PASSED, [{SELECT try_element_at(array(1, 2, 3), 2);, OK}]} |
+|try_multiply |{PASSED, [{SELECT try_multiply(2, 3);, OK}]} |
+|try_subtract |{PASSED, [{SELECT try_subtract(2, 1);, OK}]} |
+|try_sum |{FAILED, [{SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col);, Unsupported}]} |
+|try_to_binary |{PASSED, [{SELECT try_to_binary('abc', 'utf-8');, OK}]} |
+|try_to_number |{PASSED, [{SELECT try_to_number('454', '999');, OK}]} |
+|try_to_timestamp |{PASSED, [{SELECT try_to_timestamp('2016-12-31 00:12:00');, OK}]} |
+|typeof |{PASSED, [{SELECT typeof(1);, OK}]} |
+|ucase |{PASSED, [{SELECT ucase('SparkSql');, OK}]} |
+|unbase64 |{PASSED, [{SELECT unbase64('U3BhcmsgU1FM');, OK}]} |
+|unhex |{PASSED, [{SELECT decode(unhex('537061726B2053514C'), 'UTF-8');, OK}]} |
+|unix_date |{PASSED, [{SELECT unix_date(DATE("1970-01-02"));, OK}]} |
+|unix_micros |{PASSED, [{SELECT unix_micros(TIMESTAMP('1970-01-01 00:00:01Z'));, OK}]} |
+|unix_millis |{PASSED, [{SELECT unix_millis(TIMESTAMP('1970-01-01 00:00:01Z'));, OK}]} |
+|unix_seconds |{PASSED, [{SELECT unix_seconds(TIMESTAMP('1970-01-01 00:00:01Z'));, OK}]} |
+|unix_timestamp |{PASSED, [{SELECT unix_timestamp();, OK}]} |
+|upper |{PASSED, [{SELECT upper('SparkSql');, OK}]} |
+|url_decode |{PASSED, [{SELECT url_decode('https%3A%2F%2Fspark.apache.org');, OK}]} |
+|url_encode |{PASSED, [{SELECT url_encode('https://spark.apache.org');, OK}]} |
+|user |{PASSED, [{SELECT user();, OK}]} |
+|uuid |{FAILED, [{SELECT uuid();, Unsupported}]} |
+|var_pop |{FAILED, [{SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} |
+|var_samp |{FAILED, [{SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} |
+|variance |{FAILED, [{SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col);, Unsupported}]} |
+|version |{PASSED, [{SELECT version();, OK}]} |
+|weekday |{PASSED, [{SELECT weekday('2009-07-30');, OK}]} |
+|weekofyear |{PASSED, [{SELECT weekofyear('2008-02-20');, OK}]} |
+|when |{PASSED, [{SELECT CASE WHEN 1 > 0 THEN 1 WHEN 2 > 0 THEN 2.0 ELSE 1.2 END;, OK}]} |
+|width_bucket |{PASSED, [{SELECT width_bucket(5.3, 0.2, 10.6, 5);, OK}]} |
+|window_time |{FAILED, [{SELECT a, window.start as start, window.end as end, window_time(window), cnt FROM (SELECT a, window, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, window.start);, Failed on something else. Check query manually}]}|
+|xpath |{FAILED, [{SELECT xpath('b1b2b3c1c2','a/b/text()');, Unsupported}]} |
+|xpath_boolean |{PASSED, [{SELECT xpath_boolean('1','a/b');, OK}]} |
+|xpath_double |{PASSED, [{SELECT xpath_double('12', 'sum(a/b)');, OK}]} |
+|xpath_float |{PASSED, [{SELECT xpath_float('12', 'sum(a/b)');, OK}]} |
+|xpath_int |{PASSED, [{SELECT xpath_int('12', 'sum(a/b)');, OK}]} |
+|xpath_long |{PASSED, [{SELECT xpath_long('12', 'sum(a/b)');, OK}]} |
+|xpath_number |{PASSED, [{SELECT xpath_number('12', 'sum(a/b)');, OK}]} |
+|xpath_short |{PASSED, [{SELECT xpath_short('12', 'sum(a/b)');, OK}]} |
+|xpath_string |{PASSED, [{SELECT xpath_string('bcc','a/c');, OK}]} |
+|xxhash64 |{PASSED, [{SELECT xxhash64('Spark', array(123), 2);, OK}]} |
+|year |{PASSED, [{SELECT year('2016-07-30');, OK}]} |
+|zip_with |{FAILED, [{SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x));, Unsupported}]} |
+|| |{PASSED, [{SELECT 3 | 5;, OK}]} |
+|~ |{PASSED, [{SELECT ~ 0;, OK}]} |
++---------------------------+----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
diff --git a/doc/spark_coverage_agg.txt b/doc/spark_coverage_agg.txt
new file mode 100644
index 000000000..5c5da67ad
--- /dev/null
+++ b/doc/spark_coverage_agg.txt
@@ -0,0 +1,9 @@
++-------+----------------------------------------------+---+
+|result |reason |cnt|
++-------+----------------------------------------------+---+
+|FAILED |Unsupported |137|
+|FAILED |Failed on native side |4 |
+|PASSED |OK |254|
+|SKIPPED|null |12 |
+|FAILED |Failed on something else. Check query manually|10 |
++-------+----------------------------------------------+---+
diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala
new file mode 100644
index 000000000..5b20f15ec
--- /dev/null
+++ b/spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Paths}
+import scala.collection.mutable
+import org.scalatest.exceptions.TestFailedException
+import org.apache.spark.sql.CometTestBase
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.scalatest.Ignore
+
+/**
+ * Manual test to calculate Spark builtin functions coverage support by the Comet
+ *
+ * The test will update files doc/spark_coverage.txt, doc/spark_coverage_agg.txt
+ */
+
+@Ignore
+class CometExpressionCoverageSuite extends CometTestBase with AdaptiveSparkPlanHelper {
+
+ import testImplicits._
+
+ test("Test Spark builtin functions coverage") {
+ val queryPattern = """(?i)SELECT (.+?);""".r
+ val valuesPattern = """(?i)FROM VALUES(.+?);""".r
+ val selectPattern = """(i?)SELECT(.+?)FROM""".r
+ val builtinExamplesMap = spark.sessionState.functionRegistry
+ .listFunction()
+ .map(spark.sessionState.catalog.lookupFunctionInfo(_))
+ .filter(_.getSource.toLowerCase == "built-in")
+ .filter(f =>
+ !List("window").contains(f.getName.toLowerCase)) // exclude exotics, will run it manually
+ .map(f => {
+ val selectRows = queryPattern.findAllMatchIn(f.getExamples).map(_.group(0)).toList
+ (f.getName, selectRows.filter(_.nonEmpty))
+ })
+ .toMap
+
+ // key - function name
+ // value - list of result shows if function supported by Comet
+ val resultsMap = new mutable.HashMap[String, CoverageResult]()
+
+ builtinExamplesMap.foreach {
+ case (funcName, q :: _) =>
+ val queryResult =
+ try {
+ // Example with predefined values
+ // e.g. SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col)
+ // better option is probably to parse the query and iterate through expressions
+ // but this is adhoc coverage test
+ if (q.toLowerCase.contains(" from values")) {
+ val select = selectPattern.findFirstMatchIn(q).map(_.group(0))
+ val values = valuesPattern.findFirstMatchIn(q).map(_.group(0))
+ (select, values) match {
+ case (Some(s), Some(v)) =>
+ testSingleLineQuery(s"select * $v", s"$s tbl")
+
+ case _ => sys.error(s"Query $q cannot be parsed properly")
+ }
+ } else {
+ // Plain example like SELECT cos(0);
+ testSingleLineQuery("select 'dummy' x", s"${q.dropRight(1)}, x from tbl")
+ }
+ CoverageResult("PASSED", Seq((q, "OK")))
+ } catch {
+ case e: TestFailedException
+ if e.message.getOrElse("").contains("Expected only Comet native operators") =>
+ CoverageResult("FAILED", Seq((q, "Unsupported")))
+ case e if e.getMessage.contains("CometNativeException") =>
+ CoverageResult("FAILED", Seq((q, "Failed on native side")))
+ case _ =>
+ CoverageResult("FAILED", Seq((q, "Failed on something else. Check query manually")))
+ }
+ resultsMap.put(funcName, queryResult)
+ case (funcName, List()) =>
+ resultsMap.put(funcName, CoverageResult("SKIPPED", Seq.empty))
+ }
+
+ // later we Convert resultMap into some HTML
+ resultsMap.toSeq.toDF("name", "details").createOrReplaceTempView("t")
+ val str_agg = showString(
+ spark.sql(
+ "select result, d._2 as reason, count(1) cnt from (select name, t.details.result, explode_outer(t.details.details) as d from t) group by 1, 2"),
+ 500,
+ 0)
+ Files.write(Paths.get("doc/spark_coverage_agg.txt"), str_agg.getBytes(StandardCharsets.UTF_8))
+
+ val str = showString(spark.sql("select * from t order by 1"), 500, 0)
+ Files.write(Paths.get("doc/spark_coverage.txt"), str.getBytes(StandardCharsets.UTF_8))
+ }
+}
+
+case class CoverageResult(result: String, details: Seq[(String, String)])
diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
index 803f30bed..bbe7edd3c 100644
--- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
@@ -19,15 +19,13 @@
package org.apache.comet
-import java.util
-
import org.apache.hadoop.fs.Path
import org.apache.spark.sql.{CometTestBase, DataFrame, Row}
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
import org.apache.spark.sql.functions.expr
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.internal.SQLConf.SESSION_LOCAL_TIMEZONE
-import org.apache.spark.sql.types.{Decimal, DecimalType, StructType}
+import org.apache.spark.sql.types.{Decimal, DecimalType}
import org.apache.comet.CometSparkSessionExtensions.{isSpark32, isSpark33Plus, isSpark34Plus}
@@ -1291,30 +1289,6 @@ class CometExpressionSuite extends CometTestBase with AdaptiveSparkPlanHelper {
}
}
- // tests one liner query without necessity to create external table
- def testSingleLineQuery(
- prepareQuery: String,
- testQuery: String,
- testName: String = "test",
- tableName: String = "tbl"): Unit = {
-
- withTempDir { dir =>
- val path = new Path(dir.toURI.toString, testName).toUri.toString
- var data: java.util.List[Row] = new util.ArrayList()
- var schema: StructType = null
-
- withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
- val df = spark.sql(prepareQuery)
- data = df.collectAsList()
- schema = df.schema
- }
-
- spark.createDataFrame(data, schema).repartition(1).write.parquet(path)
- readParquetFile(path, Some(schema)) { df => df.createOrReplaceTempView(tableName) }
- checkSparkAnswerAndOperator(testQuery)
- }
- }
-
test("Decimal random number tests") {
val rand = scala.util.Random
def makeNum(p: Int, s: Int): String = {
diff --git a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala
index 6fb81bc43..ff5cb6ec6 100644
--- a/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala
+++ b/spark/src/test/scala/org/apache/spark/sql/CometTestBase.scala
@@ -718,4 +718,36 @@ abstract class CometTestBase
Seq.empty
}
}
+
+ // tests one liner query without necessity to create external table
+ def testSingleLineQuery(
+ prepareQuery: String,
+ testQuery: String,
+ testName: String = "test",
+ tableName: String = "tbl"): Unit = {
+
+ withTempDir { dir =>
+ val path = new Path(dir.toURI.toString, testName).toUri.toString
+ var data: java.util.List[Row] = new java.util.ArrayList()
+ var schema: StructType = null
+
+ withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+ val df = spark.sql(prepareQuery)
+ data = df.collectAsList()
+ schema = df.schema
+ }
+
+ spark.createDataFrame(data, schema).repartition(1).write.parquet(path)
+ readParquetFile(path, Some(schema)) { df => df.createOrReplaceTempView(tableName) }
+ checkSparkAnswerAndOperator(testQuery)
+ }
+ }
+
+ def showString[T](
+ df: Dataset[T],
+ _numRows: Int,
+ truncate: Int = 20,
+ vertical: Boolean = false): String = {
+ df.showString(_numRows, truncate, vertical)
+ }
}