diff --git a/rbql-js/.eslintrc.json b/rbql-js/.eslintrc.json old mode 100644 new mode 100755 diff --git a/rbql-js/cli_parser.js b/rbql-js/cli_parser.js old mode 100644 new mode 100755 diff --git a/rbql-js/cli_rbql.js b/rbql-js/cli_rbql.js index cde5686..db25c12 100755 --- a/rbql-js/cli_rbql.js +++ b/rbql-js/cli_rbql.js @@ -3,8 +3,8 @@ const fs = require('fs'); const readline = require('readline'); -var rbql = null; -var rbql_csv = null; +const rbql = require('./rbql.js'); +const rbql_csv = require('./rbql_csv.js'); const csv_utils = require('./csv_utils.js'); const cli_parser = require('./cli_parser.js'); @@ -217,8 +217,6 @@ async function run_with_js(args) { [output_delim, output_policy] = output_format == 'input' ? [delim, policy] : rbql_csv.interpret_named_csv_format(output_format); } - if (args['debug-mode']) - rbql_csv.set_debug_mode(); let user_init_code = ''; if (init_source_file !== null) user_init_code = rbql_csv.read_user_init_code(init_source_file); @@ -319,13 +317,6 @@ Description of the available CSV split policies: async function do_main(args) { - if (args['auto-rebuild-engine']) { - let build_engine = require('./build_engine.js'); - build_engine.build_engine(); - } - - rbql = require('./rbql.js'); - rbql_csv = require('./rbql_csv.js'); if (args['version']) { console.log(rbql.version); @@ -373,8 +364,6 @@ function main() { '--out-policy': {'help': 'Output policy. Use with "out-delim". Overrides out-format', 'metavar': 'POLICY'}, '--error-format': {'default': 'hr', 'help': 'Errors and warnings format. [hr|json]', 'hidden': true}, '--version': {'boolean': true, 'help': 'Print RBQL version and exit'}, - '--auto-rebuild-engine': {'boolean': true, 'help': 'Auto rebuild engine', 'hidden': true}, - '--debug-mode': {'boolean': true, 'help': 'Run in debug mode', 'hidden': true}, '--init-source-file': {'help': 'Path to init source file to use instead of ~/.rbql_init_source.js', 'hidden': true} }; let args = cli_parser.parse_cmd_args(process.argv, scheme, tool_description, epilog); diff --git a/rbql-js/csv_utils.js b/rbql-js/csv_utils.js old mode 100644 new mode 100755 diff --git a/rbql-js/rbql.js b/rbql-js/rbql.js old mode 100644 new mode 100755 index 52f0c0d..16ab5a3 --- a/rbql-js/rbql.js +++ b/rbql-js/rbql.js @@ -1,18 +1,14 @@ -// DO NOT EDIT! -// This file was autogenerated from builder.js and template.js using build_engine.js script +(function(exports){ +// The magic line above is to make the module both browser and Node compatible, see https://stackoverflow.com/questions/3225251/how-can-i-share-code-between-node-js-and-the-browser -const external_js_template_text = `__RBQLMP__user_init_code +// This module works with records only. It is CSV-agnostic. +// Do not add CSV-related logic or variables/functions/objects like "delim", "separator" etc class RbqlParsingError extends Error {} class RbqlRuntimeError extends Error {} class AssertionError extends Error {} - - -function assert(condition, message) { - if (!condition) - throw new AssertionError(message); -} +class RbqlIOHandlingError extends Error {} class InternalBadFieldError extends Error { @@ -23,25 +19,58 @@ class InternalBadFieldError extends Error { } -var unnest_list = null; +function assert(condition, message=null) { + if (!condition) { + if (!message) { + message = 'Assertion error'; + } + throw new AssertionError(message); + } +} -var module_was_used_failsafe = false; -var aggregation_stage = 0; -var functional_aggregators = []; +function replace_all(src, search, replacement) { + return src.split(search).join(replacement); +} -var writer = null; -var NU = 0; // NU - Num Updated. Alternative variables: NW (Num Where) - Not Practical. NW (Num Written) - Impossible to implement. -var NR = 0; -var NF = 0; +class RBQLContext { + constructor(query_text, input_iterator, output_writer, user_init_code) { + this.query_text = query_text; + this.input_iterator = input_iterator; + this.writer = output_writer; + this.user_init_code = user_init_code; + this.unnest_list = null; + this.top_count = null; -const wrong_aggregation_usage_error = 'Usage of RBQL aggregation functions inside JavaScript expressions is not allowed, see the docs'; -const RBQL_VERSION = '__RBQLMP__version'; + this.like_regex_cache = new Map(); + + this.sort_key_expression = null; + + this.aggregation_stage = 0; + this.aggregation_key_expression = null; + this.functional_aggregators = []; + + this.join_map_impl = null; + this.join_map = null; + this.lhs_join_var_expression = null; + + this.where_expression = null; + this.select_expression = null; + + this.update_expressions = null; + + this.variables_init_code = null; + } +} -var like_regex_cache = new Map(); +var query_context = null; // Needs to be global for MIN(), MAX(), etc functions + + +const wrong_aggregation_usage_error = 'Usage of RBQL aggregation functions inside JavaScript expressions is not allowed, see the docs'; +const RBQL_VERSION = '0.17.0'; function stable_compare(a, b) { @@ -76,7 +105,7 @@ function safe_set(record, idx, value) { function regexp_escape(text) { // From here: https://stackoverflow.com/a/6969486/2898283 - return text.replace(/[.*+?^\${}()|[\\]\\\\]/g, '\\\\$&'); // $& means the whole matched text + return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched text } @@ -102,34 +131,37 @@ function like_to_regex(pattern) { function like(text, pattern) { - let matcher = like_regex_cache.get(pattern); + let matcher = query_context.like_regex_cache.get(pattern); if (matcher === undefined) { matcher = new RegExp(like_to_regex(pattern)); - like_regex_cache.set(pattern, matcher); + query_context.like_regex_cache.set(pattern, matcher); } return matcher.test(text); } -LIKE = like; +const LIKE = like; -function RBQLAggregationToken(marker_id, value) { - this.marker_id = marker_id; - this.value = value; - this.toString = function() { +class RBQLAggregationToken { + constructor(marker_id, value) { + this.marker_id = marker_id; + this.value = value; + } + + toString() { throw new RbqlParsingError(wrong_aggregation_usage_error); } } -function UnnestMarker() {} +class UnnestMarker {} function UNNEST(vals) { - if (unnest_list !== null) { + if (query_context.unnest_list !== null) { // Technically we can support multiple UNNEST's but the implementation/algorithm is more complex and just doesn't worth it throw new RbqlParsingError('Only one UNNEST is allowed per query'); } - unnest_list = vals; + query_context.unnest_list = vals; return new UnnestMarker(); } const unnest = UNNEST; @@ -138,19 +170,21 @@ const UNFOLD = UNNEST; // "UNFOLD" is deprecated, just for backward compatibilit function parse_number(val) { - // We can do a more pedantic number test like \`/^ *-{0,1}[0-9]+\\.{0,1}[0-9]* *$/.test(val)\`, but user will probably use just Number(val) or parseInt/parseFloat + // We can do a more pedantic number test like `/^ *-{0,1}[0-9]+\.{0,1}[0-9]* *$/.test(val)`, but user will probably use just Number(val) or parseInt/parseFloat let result = Number(val); if (isNaN(result)) { - throw new RbqlRuntimeError(\`Unable to convert value "\${val}" to a number. MIN, MAX, SUM, AVG, MEDIAN and VARIANCE aggregate functions convert their string arguments to numeric values\`); + throw new RbqlRuntimeError(`Unable to convert value "${val}" to a number. MIN, MAX, SUM, AVG, MEDIAN and VARIANCE aggregate functions convert their string arguments to numeric values`); } return result; } -function MinAggregator() { - this.stats = new Map(); +class MinAggregator { + constructor() { + this.stats = new Map(); + } - this.increment = function(key, val) { + increment(key, val) { val = parse_number(val); var cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { @@ -160,17 +194,19 @@ function MinAggregator() { } } - this.get_final = function(key) { + get_final(key) { return this.stats.get(key); } } -function MaxAggregator() { - this.stats = new Map(); +class MaxAggregator { + constructor() { + this.stats = new Map(); + } - this.increment = function(key, val) { + increment(key, val) { val = parse_number(val); var cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { @@ -180,16 +216,18 @@ function MaxAggregator() { } } - this.get_final = function(key) { + get_final(key) { return this.stats.get(key); } } -function SumAggregator() { - this.stats = new Map(); +class SumAggregator { + constructor() { + this.stats = new Map(); + } - this.increment = function(key, val) { + increment(key, val) { val = parse_number(val); var cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { @@ -199,16 +237,18 @@ function SumAggregator() { } } - this.get_final = function(key) { + get_final(key) { return this.stats.get(key); } } -function AvgAggregator() { - this.stats = new Map(); +class AvgAggregator { + constructor() { + this.stats = new Map(); + } - this.increment = function(key, val) { + increment(key, val) { val = parse_number(val); var cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { @@ -220,7 +260,7 @@ function AvgAggregator() { } } - this.get_final = function(key) { + get_final(key) { var cur_aggr = this.stats.get(key); var cur_sum = cur_aggr[0]; var cur_cnt = cur_aggr[1]; @@ -230,10 +270,12 @@ function AvgAggregator() { } -function VarianceAggregator() { - this.stats = new Map(); +class VarianceAggregator { + constructor() { + this.stats = new Map(); + } - this.increment = function(key, val) { + increment(key, val) { val = parse_number(val); var cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { @@ -246,7 +288,7 @@ function VarianceAggregator() { } } - this.get_final = function(key) { + get_final(key) { var cur_aggr = this.stats.get(key); var cur_sum = cur_aggr[0]; var cur_sum_sq = cur_aggr[1]; @@ -258,10 +300,12 @@ function VarianceAggregator() { } -function MedianAggregator() { - this.stats = new Map(); +class MedianAggregator { + constructor() { + this.stats = new Map(); + } - this.increment = function(key, val) { + increment(key, val) { val = parse_number(val); var cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { @@ -271,7 +315,7 @@ function MedianAggregator() { } } - this.get_final = function(key) { + get_final(key) { var cur_aggr = this.stats.get(key); cur_aggr.sort(function(a, b) { return a - b; }); var m = Math.floor(cur_aggr.length / 2); @@ -284,10 +328,12 @@ function MedianAggregator() { } -function CountAggregator() { - this.stats = new Map(); +class CountAggregator { + constructor() { + this.stats = new Map(); + } - this.increment = function(key, val) { + increment(key, val) { var cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { this.stats.set(key, 1); @@ -296,17 +342,19 @@ function CountAggregator() { } } - this.get_final = function(key) { + get_final(key) { return this.stats.get(key); } } -function ArrayAggAggregator(post_proc=null) { - this.post_proc = post_proc; - this.stats = new Map(); +class ArrayAggAggregator { + constructor(post_proc=null) { + this.post_proc = post_proc; + this.stats = new Map(); + } - this.increment = function(key, val) { + increment(key, val) { let cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { this.stats.set(key, [val]); @@ -315,7 +363,7 @@ function ArrayAggAggregator(post_proc=null) { } } - this.get_final = function(key) { + get_final(key) { let cur_aggr = this.stats.get(key); if (this.post_proc === null) return cur_aggr; @@ -324,82 +372,84 @@ function ArrayAggAggregator(post_proc=null) { } -function ConstGroupVerifier(output_index) { - this.output_index = output_index; - this.const_values = new Map(); +class ConstGroupVerifier { + constructor(output_index) { + this.output_index = output_index; + this.const_values = new Map(); + } - this.increment = function(key, value) { + increment(key, value) { var old_value = this.const_values.get(key); if (old_value === undefined) { this.const_values.set(key, value); } else if (old_value != value) { - throw new RbqlRuntimeError(\`Invalid aggregate expression: non-constant values in output column \${this.output_index + 1}. E.g. "\${old_value}" and "\${value}"\`); + throw new RbqlRuntimeError(`Invalid aggregate expression: non-constant values in output column ${this.output_index + 1}. E.g. "${old_value}" and "${value}"`); } } - this.get_final = function(key) { + get_final(key) { return this.const_values.get(key); } } function init_aggregator(generator_name, val, post_proc=null) { - aggregation_stage = 1; - var res = new RBQLAggregationToken(functional_aggregators.length, val); + query_context.aggregation_stage = 1; + var res = new RBQLAggregationToken(query_context.functional_aggregators.length, val); if (post_proc === null) { - functional_aggregators.push(new generator_name()); + query_context.functional_aggregators.push(new generator_name()); } else { - functional_aggregators.push(new generator_name(post_proc)); + query_context.functional_aggregators.push(new generator_name(post_proc)); } return res; } function MIN(val) { - return aggregation_stage < 2 ? init_aggregator(MinAggregator, val) : val; + return query_context.aggregation_stage < 2 ? init_aggregator(MinAggregator, val) : val; } const min = MIN; const Min = MIN; function MAX(val) { - return aggregation_stage < 2 ? init_aggregator(MaxAggregator, val) : val; + return query_context.aggregation_stage < 2 ? init_aggregator(MaxAggregator, val) : val; } const max = MAX; const Max = MAX; function COUNT(val) { - return aggregation_stage < 2 ? init_aggregator(CountAggregator, 1) : 1; + return query_context.aggregation_stage < 2 ? init_aggregator(CountAggregator, 1) : 1; } const count = COUNT; const Count = COUNT; function SUM(val) { - return aggregation_stage < 2 ? init_aggregator(SumAggregator, val) : val; + return query_context.aggregation_stage < 2 ? init_aggregator(SumAggregator, val) : val; } const sum = SUM; const Sum = SUM; function AVG(val) { - return aggregation_stage < 2 ? init_aggregator(AvgAggregator, val) : val; + return query_context.aggregation_stage < 2 ? init_aggregator(AvgAggregator, val) : val; } const avg = AVG; const Avg = AVG; function VARIANCE(val) { - return aggregation_stage < 2 ? init_aggregator(VarianceAggregator, val) : val; + return query_context.aggregation_stage < 2 ? init_aggregator(VarianceAggregator, val) : val; } const variance = VARIANCE; const Variance = VARIANCE; function MEDIAN(val) { - return aggregation_stage < 2 ? init_aggregator(MedianAggregator, val) : val; + return query_context.aggregation_stage < 2 ? init_aggregator(MedianAggregator, val) : val; } const median = MEDIAN; const Median = MEDIAN; function ARRAY_AGG(val, post_proc=null) { - return aggregation_stage < 2 ? init_aggregator(ArrayAggAggregator, val, post_proc) : val; + return query_context.aggregation_stage < 2 ? init_aggregator(ArrayAggAggregator, val, post_proc) : val; } const array_agg = ARRAY_AGG; const FOLD = ARRAY_AGG; // "FOLD" is deprecated, just for backward compatibility @@ -412,29 +462,34 @@ function add_to_set(dst_set, value) { } -function TopWriter(subwriter) { - this.subwriter = subwriter; - this.NW = 0; +class TopWriter { + constructor(subwriter, top_count) { + this.subwriter = subwriter; + this.NW = 0; + this.top_count = top_count; + } - this.write = function(record) { - if (__RBQLMP__top_count !== null && this.NW >= __RBQLMP__top_count) + write(record) { + if (this.top_count !== null && this.NW >= this.top_count) return false; this.subwriter.write(record); this.NW += 1; return true; } - this.finish = async function() { + async finish() { await this.subwriter.finish(); } } -function UniqWriter(subwriter) { - this.subwriter = subwriter; - this.seen = new Set(); +class UniqWriter { + constructor(subwriter) { + this.subwriter = subwriter; + this.seen = new Set(); + } - this.write = function(record) { + write(record) { if (!add_to_set(this.seen, JSON.stringify(record))) return true; if (!this.subwriter.write(record)) @@ -442,17 +497,19 @@ function UniqWriter(subwriter) { return true; } - this.finish = async function() { + async finish() { await this.subwriter.finish(); } } -function UniqCountWriter(subwriter) { - this.subwriter = subwriter; - this.records = new Map(); +class UniqCountWriter { + constructor(subwriter) { + this.subwriter = subwriter; + this.records = new Map(); + } - this.write = function(record) { + write(record) { var key = JSON.stringify(record); var old_val = this.records.get(key); if (old_val) { @@ -463,7 +520,7 @@ function UniqCountWriter(subwriter) { return true; } - this.finish = async function() { + async finish() { for (var [key, value] of this.records) { let [count, record] = value; record.unshift(count); @@ -475,19 +532,22 @@ function UniqCountWriter(subwriter) { } -function SortedWriter(subwriter) { - this.subwriter = subwriter; - this.unsorted_entries = []; +class SortedWriter { + constructor(subwriter, reverse_sort) { + this.subwriter = subwriter; + this.reverse_sort = reverse_sort; + this.unsorted_entries = []; + } - this.write = function(stable_entry) { + write(stable_entry) { this.unsorted_entries.push(stable_entry); return true; } - this.finish = async function() { + async finish() { var unsorted_entries = this.unsorted_entries; unsorted_entries.sort(stable_compare); - if (__RBQLMP__reverse_flag) + if (this.reverse_sort) unsorted_entries.reverse(); for (var i = 0; i < unsorted_entries.length; i++) { var entry = unsorted_entries[i]; @@ -499,12 +559,14 @@ function SortedWriter(subwriter) { } -function AggregateWriter(subwriter) { - this.subwriter = subwriter; - this.aggregators = []; - this.aggregation_keys = new Set(); +class AggregateWriter { + constructor(subwriter) { + this.subwriter = subwriter; + this.aggregators = []; + this.aggregation_keys = new Set(); + } - this.finish = async function() { + async finish() { var all_keys = Array.from(this.aggregation_keys); all_keys.sort(); for (var i = 0; i < all_keys.length; i++) { @@ -521,20 +583,24 @@ function AggregateWriter(subwriter) { } -function InnerJoiner(join_map) { - this.join_map = join_map; +class InnerJoiner { + constructor(join_map) { + this.join_map = join_map; + } - this.get_rhs = function(lhs_key) { + get_rhs(lhs_key) { return this.join_map.get_join_records(lhs_key); } } -function LeftJoiner(join_map) { - this.join_map = join_map; - this.null_record = [[null, join_map.max_record_len, Array(join_map.max_record_len).fill(null)]]; +class LeftJoiner { + constructor(join_map) { + this.join_map = join_map; + this.null_record = [[null, join_map.max_record_len, Array(join_map.max_record_len).fill(null)]]; + } - this.get_rhs = function(lhs_key) { + get_rhs(lhs_key) { let result = this.join_map.get_join_records(lhs_key); if (result.length == 0) { return this.null_record; @@ -544,10 +610,12 @@ function LeftJoiner(join_map) { } -function StrictLeftJoiner(join_map) { - this.join_map = join_map; +class StrictLeftJoiner { + constructor(join_map) { + this.join_map = join_map; + } - this.get_rhs = function(lhs_key) { + get_rhs(lhs_key) { let result = this.join_map.get_join_records(lhs_key); if (result.length != 1) { throw new RbqlRuntimeError('In "STRICT LEFT JOIN" each key in A must have exactly one match in B. Bad A key: "' + lhs_key + '"'); @@ -567,42 +635,13 @@ function select_except(src, except_fields) { } -function process_update_join(record_a, join_matches) { - if (join_matches.length > 1) - throw new RbqlRuntimeError('More than one record in UPDATE query matched a key from the input table in the join table'); - let record_b = null; - let bNR = null; - let bNF = null; - if (join_matches.length == 1) - [bNR, bNF, record_b] = join_matches[0]; - var up_fields = record_a; - __RBQLMP__init_column_vars_update - if (join_matches.length == 1 && (__RBQLMP__where_expression)) { - NU += 1; - __RBQLMP__update_statements - } - return writer.write(up_fields); -} - - -function process_update_simple(record_a, _join_matches) { - var up_fields = record_a; - __RBQLMP__init_column_vars_update - if (__RBQLMP__where_expression) { - NU += 1; - __RBQLMP__update_statements - } - return writer.write(up_fields); -} - - -function select_simple(sort_key, out_fields) { - if (__RBQLMP__sort_flag) { +function select_simple(sort_key, NR, out_fields) { + if (query_context.sort_key_expression !== null) { var sort_entry = sort_key.concat([NR, out_fields]); - if (!writer.write(sort_entry)) + if (!query_context.writer.write(sort_entry)) return false; } else { - if (!writer.write(out_fields)) + if (!query_context.writer.write(out_fields)) return false; } return true; @@ -613,156 +652,225 @@ function select_aggregated(key, transparent_values) { if (key !== null) { key = JSON.stringify(key); } - if (aggregation_stage === 1) { - if (!(writer instanceof TopWriter)) { + if (query_context.aggregation_stage === 1) { + if (!(query_context.writer instanceof TopWriter)) { throw new RbqlParsingError('"ORDER BY", "UPDATE" and "DISTINCT" keywords are not allowed in aggregate queries'); } - writer = new AggregateWriter(writer); + query_context.writer = new AggregateWriter(query_context.writer); let num_aggregators_found = 0; for (var i = 0; i < transparent_values.length; i++) { var trans_value = transparent_values[i]; if (trans_value instanceof RBQLAggregationToken) { - writer.aggregators.push(functional_aggregators[trans_value.marker_id]); - writer.aggregators[writer.aggregators.length - 1].increment(key, trans_value.value); + query_context.writer.aggregators.push(query_context.functional_aggregators[trans_value.marker_id]); + query_context.writer.aggregators[query_context.writer.aggregators.length - 1].increment(key, trans_value.value); num_aggregators_found += 1; } else { - writer.aggregators.push(new ConstGroupVerifier(writer.aggregators.length)); - writer.aggregators[writer.aggregators.length - 1].increment(key, trans_value); + query_context.writer.aggregators.push(new ConstGroupVerifier(query_context.writer.aggregators.length)); + query_context.writer.aggregators[query_context.writer.aggregators.length - 1].increment(key, trans_value); } } - if (num_aggregators_found != functional_aggregators.length) { + if (num_aggregators_found != query_context.functional_aggregators.length) { throw new RbqlParsingError(wrong_aggregation_usage_error); } - aggregation_stage = 2; + query_context.aggregation_stage = 2; } else { for (var i = 0; i < transparent_values.length; i++) { var trans_value = transparent_values[i]; - writer.aggregators[i].increment(key, trans_value); + query_context.writer.aggregators[i].increment(key, trans_value); } } - writer.aggregation_keys.add(key) + query_context.writer.aggregation_keys.add(key); } -function select_unnested(sort_key, folded_fields) { +function select_unnested(sort_key, NR, folded_fields) { let out_fields = folded_fields.slice(); let unnest_pos = folded_fields.findIndex(val => val instanceof UnnestMarker); - for (var i = 0; i < unnest_list.length; i++) { - out_fields[unnest_pos] = unnest_list[i]; - if (!select_simple(sort_key, out_fields.slice())) + for (var i = 0; i < query_context.unnest_list.length; i++) { + out_fields[unnest_pos] = query_context.unnest_list[i]; + if (!select_simple(sort_key, NR, out_fields.slice())) return false; } return true; } -function process_select_simple(record_a, join_match) { - unnest_list = null; - if (join_match === null) { - var star_fields = record_a; - } else { - var [bNR, bNF, record_b] = join_match; - var star_fields = record_a.concat(record_b); - } - __RBQLMP__init_column_vars_select - if (!(__RBQLMP__where_expression)) - return true; +const PROCESS_SELECT_COMMON = ` +__RBQLMP__variables_init_code +if (__RBQLMP__where_expression) { let out_fields = __RBQLMP__select_expression; - if (aggregation_stage > 0) { + if (query_context.aggregation_stage > 0) { let key = __RBQLMP__aggregation_key_expression; select_aggregated(key, out_fields); } else { let sort_key = [__RBQLMP__sort_key_expression]; - if (unnest_list !== null) { - if (!select_unnested(sort_key, out_fields)) - return false; + if (query_context.unnest_list !== null) { + if (!select_unnested(sort_key, NR, out_fields)) + stop_flag = true; } else { - if (!select_simple(sort_key, out_fields)) - return false; + if (!select_simple(sort_key, NR, out_fields)) + stop_flag = true; } } - return true; } +`; -function process_select_join(record_a, join_matches) { - for (let join_match of join_matches) { - if (!process_select_simple(record_a, join_match)) - return false; - } - return true; +const PROCESS_SELECT_SIMPLE = ` +let star_fields = record_a; +__CODE__ +`; + + +const PROCESS_SELECT_JOIN = ` +let join_matches = query_context.join_map.get_rhs(__RBQLMP__lhs_join_var_expression); +for (let join_match of join_matches) { + let [bNR, bNF, record_b] = join_match; + let star_fields = record_a.concat(record_b); + __CODE__ + if (stop_flag) + break; } +`; -async function rb_transform(input_iterator, join_map_impl, output_writer) { - if (module_was_used_failsafe) { - throw new Error('Module can only be used once'); - } - module_was_used_failsafe = true; - assert((join_map_impl === null) === (__RBQLMP__join_operation === null), 'JOIN inconsistency'); - let join_map = null; - if (join_map_impl !== null) { - await join_map_impl.build(); - let sql_join_type = {'JOIN': InnerJoiner, 'INNER JOIN': InnerJoiner, 'LEFT JOIN': LeftJoiner, 'STRICT LEFT JOIN': StrictLeftJoiner}[__RBQLMP__join_operation]; - join_map = new sql_join_type(join_map_impl); - } +const PROCESS_UPDATE_JOIN = ` +let join_matches = query_context.join_map.get_rhs(__RBQLMP__lhs_join_var_expression); +if (join_matches.length > 1) + throw new RbqlRuntimeError('More than one record in UPDATE query matched a key from the input table in the join table'); +let record_b = null; +let bNR = null; +let bNF = null; +if (join_matches.length == 1) + [bNR, bNF, record_b] = join_matches[0]; +let up_fields = record_a; +__RBQLMP__variables_init_code +if (join_matches.length == 1 && (__RBQLMP__where_expression)) { + NU += 1; + __RBQLMP__update_expressions +} +if (!query_context.writer.write(up_fields)) + stop_flag = true; +`; - let polymorphic_process = [[process_update_simple, process_update_join], [process_select_simple, process_select_join]][__RBQLMP__is_select_query][join_map ? 1 : 0]; - writer = new TopWriter(output_writer); - if (__RBQLMP__writer_type == 'uniq') { - writer = new UniqWriter(writer); - } else if (__RBQLMP__writer_type == 'uniq_count') { - writer = new UniqCountWriter(writer); - } +const PROCESS_UPDATE_SIMPLE = ` +let up_fields = record_a; +__RBQLMP__variables_init_code +if (__RBQLMP__where_expression) { + NU += 1; + __RBQLMP__update_expressions +} +if (!query_context.writer.write(up_fields)) + stop_flag = true; +`; - if (__RBQLMP__sort_flag) - writer = new SortedWriter(writer); - while (true) { - let record_a = await input_iterator.get_record(); - if (record_a === null) - break; - NR += 1; - NF = record_a.length; +const MAIN_LOOP_BODY = ` +__USER_INIT_CODE__ - try { - let join_matches = join_map ? join_map.get_rhs(__RBQLMP__lhs_join_var) : null; - if (!polymorphic_process(record_a, join_matches)) { - input_iterator.stop(); - break; - } - } catch (e) { - if (e.constructor.name === 'InternalBadFieldError') { - throw new RbqlRuntimeError(\`No "a\${e.bad_idx + 1}" field at record \${NR}\`); - } else if (e.constructor.name === 'RbqlParsingError') { - throw(e); - } else { - throw new RbqlRuntimeError(\`At record \${NR}, Details: \${e.message}\`); - } +let NU = 0; +let NR = 0; + +let stop_flag = false; +while (!stop_flag) { + let record_a = await query_context.input_iterator.get_record(); + if (record_a === null) + break; + NR += 1; + let NF = record_a.length; + query_context.unnest_list = null; // TODO optimize, don't need to set this every iteration + try { + __CODE__ + } catch (e) { + if (e.constructor.name === 'InternalBadFieldError') { + throw new RbqlRuntimeError(\`No "a\${e.bad_idx + 1}" field at record \${NR}\`); + } else if (e.constructor.name === 'RbqlParsingError') { + throw(e); + } else { + throw new RbqlRuntimeError(\`At record \${NR}, Details: \${e.message}\`); } } - await writer.finish(); } - -module.exports.rb_transform = rb_transform; `; -// ^ The expression above will cause builder.js and tempalte.js to be combined to autogenerate rbql.js: builder.js + template.js -> ../rbql.js -// Expression is written as a function to pacify the linter. -// Unit tests will ensure that rbql.js is indeed a concatenation of builder.js and template.js -// This module works with records only. It is CSV-agnostic. -// Do not add CSV-related logic or variables/functions/objects like "delim", "separator" etc +function embed_expression(parent_code, child_placeholder, child_expression) { + return replace_all(parent_code, child_placeholder, child_expression); +} -// TODO get rid of functions with "_js" suffix +function embed_code(parent_code, child_placeholder, child_code) { + let parent_lines = parent_code.split('\n'); + let child_lines = child_code.split('\n'); + for (let i = 0; i < parent_lines.length; i++) { + let pos = parent_lines[i].indexOf(child_placeholder); + if (pos == -1) + continue; + assert(pos % 4 == 0); + let placeholder_indentation = parent_lines[i].substring(0, pos); + child_lines = child_lines.map(l => placeholder_indentation + l); + let result_lines = parent_lines.slice(0, i).concat(child_lines).concat(parent_lines.slice(i + 1)); + return result_lines.join('\n') + '\n'; + } + assert(false); +} -// TODO replace prototypes with classes: this improves readability +function generate_main_loop_code(query_context) { + let is_select_query = query_context.select_expression !== null; + let is_join_query = query_context.join_map !== null; + let where_expression = query_context.where_expression === null ? 'true' : query_context.where_expression; + let aggregation_key_expression = query_context.aggregation_key_expression === null ? 'null' : query_context.aggregation_key_expression; + let sort_key_expression = query_context.sort_key_expression === null ? 'null' : query_context.sort_key_expression; + let js_code = embed_code(MAIN_LOOP_BODY, '__USER_INIT_CODE__', query_context.user_init_code); + if (is_select_query) { + if (is_join_query) { + js_code = embed_code(embed_code(js_code, '__CODE__', PROCESS_SELECT_JOIN), '__CODE__', PROCESS_SELECT_COMMON); + js_code = embed_expression(js_code, '__RBQLMP__lhs_join_var_expression', query_context.lhs_join_var_expression); + } else { + js_code = embed_code(embed_code(js_code, '__CODE__', PROCESS_SELECT_SIMPLE), '__CODE__', PROCESS_SELECT_COMMON); + } + js_code = embed_code(js_code, '__RBQLMP__variables_init_code', query_context.variables_init_code); + js_code = embed_expression(js_code, '__RBQLMP__select_expression', query_context.select_expression); + js_code = embed_expression(js_code, '__RBQLMP__where_expression', where_expression); + js_code = embed_expression(js_code, '__RBQLMP__aggregation_key_expression', aggregation_key_expression); + js_code = embed_expression(js_code, '__RBQLMP__sort_key_expression', sort_key_expression); + } else { + if (is_join_query) { + js_code = embed_code(js_code, '__CODE__', PROCESS_UPDATE_JOIN); + js_code = embed_expression(js_code, '__RBQLMP__lhs_join_var_expression', query_context.lhs_join_var_expression); + } else { + js_code = embed_code(js_code, '__CODE__', PROCESS_UPDATE_SIMPLE); + } + js_code = embed_code(js_code, '__RBQLMP__variables_init_code', query_context.variables_init_code); + js_code = embed_code(js_code, '__RBQLMP__update_expressions', query_context.update_expressions); + js_code = embed_expression(js_code, '__RBQLMP__where_expression', where_expression); + } + return "(async () => {" + js_code + "})()"; +} -const version = '0.16.1'; +async function compile_and_run(query_context) { + let main_loop_body = generate_main_loop_code(query_context); + try { + let main_loop_promise = eval(main_loop_body); + await main_loop_promise; + } catch (e) { + if (e instanceof SyntaxError) { + // SyntaxError's from eval() function do not contain detailed explanation of what has caused the syntax error, so to guess what was wrong we can only use the original query + // v8 issue to fix eval: https://bugs.chromium.org/p/v8/issues/detail?id=2589 + if (query_context.query_text.toLowerCase().indexOf(' having ') != -1) + throw new SyntaxError(e.message + "\nRBQL doesn't support \"HAVING\" keyword"); + if (query_context.query_text.toLowerCase().indexOf(' like ') != -1) + throw new SyntaxError(e.message + "\nRBQL doesn't support \"LIKE\" operator, use like() function instead e.g. ... WHERE like(a1, 'foo%bar') ... "); // UT JSON + if (query_context.query_text.toLowerCase().indexOf(' from ') != -1) + throw new SyntaxError(e.message + "\nRBQL doesn't use \"FROM\" keyword, e.g. you can query 'SELECT *' without FROM"); // UT JSON + } + throw e; + } +} + const GROUP_BY = 'GROUP BY'; const UPDATE = 'UPDATE'; @@ -770,6 +878,7 @@ const SELECT = 'SELECT'; const JOIN = 'JOIN'; const INNER_JOIN = 'INNER JOIN'; const LEFT_JOIN = 'LEFT JOIN'; +const LEFT_OUTER_JOIN = 'LEFT OUTER JOIN'; const STRICT_LEFT_JOIN = 'STRICT LEFT JOIN'; const ORDER_BY = 'ORDER BY'; const WHERE = 'WHERE'; @@ -777,29 +886,6 @@ const LIMIT = 'LIMIT'; const EXCEPT = 'EXCEPT'; -class RbqlParsingError extends Error {} -class RbqlIOHandlingError extends Error {} -class AssertionError extends Error {} -class RbqlRuntimeError extends Error {} - -var debug_mode = false; - -function assert(condition, message=null) { - if (!condition) { - if (!message) { - message = 'Assertion error'; - } - throw new AssertionError(message); - } -} - - -function regexp_escape(text) { - // From here: https://stackoverflow.com/a/6969486/2898283 - return text.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched text -} - - function get_ambiguous_error_msg(variable_name) { return `Ambiguous variable name: "${variable_name}" is present both in input and in join tables`; } @@ -815,26 +901,11 @@ function get_all_matches(regexp, text) { } -function replace_all(src, search, replacement) { - return src.split(search).join(replacement); -} - - function str_strip(src) { return src.replace(/^ +| +$/g, ''); } -function rbql_meta_format(template_src, meta_params) { - for (const [key, value] of Object.entries(meta_params)) { - var template_src_upd = replace_all(template_src, key, value); - assert(template_src_upd != template_src); - template_src = template_src_upd; - } - return template_src; -} - - function strip_comments(cline) { cline = cline.trim(); if (cline.startsWith('//')) @@ -1097,32 +1168,32 @@ function translate_update_expression(update_expression, input_variables_map, str let first_assignment_error = `Unable to parse "UPDATE" expression: the expression must start with assignment, but "${first_assignment}" does not look like an assignable field name`; let assignment_looking_rgx = /(?:^|,) *(a[.#a-zA-Z0-9\[\]_]*) *=(?=[^=])/g; - let update_statements = []; + let update_expressions = []; let pos = 0; while (true) { let match = assignment_looking_rgx.exec(update_expression); - if (update_statements.length == 0 && (match === null || match.index != 0)) { + if (update_expressions.length == 0 && (match === null || match.index != 0)) { throw new RbqlParsingError(first_assignment_error); } if (match === null) { - update_statements[update_statements.length - 1] += str_strip(update_expression.substr(pos)) + ');'; + update_expressions[update_expressions.length - 1] += str_strip(update_expression.substr(pos)) + ');'; break; } - if (update_statements.length) - update_statements[update_statements.length - 1] += str_strip(update_expression.substring(pos, match.index)) + ');'; + if (update_expressions.length) + update_expressions[update_expressions.length - 1] += str_strip(update_expression.substring(pos, match.index)) + ');'; let dst_var_name = combine_string_literals(str_strip(match[1]), string_literals); if (!input_variables_map.hasOwnProperty(dst_var_name)) throw new RbqlParsingError(`Unable to parse "UPDATE" expression: Unknown field name: "${dst_var_name}"`); let var_index = input_variables_map[dst_var_name].index; - let current_indent = update_statements.length ? indent : ''; - update_statements.push(`${current_indent}safe_set(up_fields, ${var_index}, `); + let current_indent = update_expressions.length ? indent : ''; + update_expressions.push(`${current_indent}safe_set(up_fields, ${var_index}, `); pos = match.index + match[0].length; } - return combine_string_literals(update_statements.join('\n'), string_literals); + return combine_string_literals(update_expressions.join('\n'), string_literals); } -function translate_select_expression_js(select_expression) { +function translate_select_expression(select_expression) { var translated = replace_star_count(select_expression); translated = replace_star_vars(translated); translated = str_strip(translated); @@ -1132,7 +1203,7 @@ function translate_select_expression_js(select_expression) { } -function separate_string_literals_js(rbql_expression) { +function separate_string_literals(rbql_expression) { // The regex consists of 3 almost identicall parts, the only difference is quote type var rgx = /('(\\(\\\\)*'|[^'])*')|("(\\(\\\\)*"|[^"])*")|(`(\\(\\\\)*`|[^`])*`)/g; var match_obj = null; @@ -1157,7 +1228,7 @@ function separate_string_literals_js(rbql_expression) { function locate_statements(rbql_expression) { let statement_groups = []; - statement_groups.push([STRICT_LEFT_JOIN, LEFT_JOIN, INNER_JOIN, JOIN]); + statement_groups.push([STRICT_LEFT_JOIN, LEFT_OUTER_JOIN, LEFT_JOIN, INNER_JOIN, JOIN]); statement_groups.push([SELECT]); statement_groups.push([ORDER_BY]); statement_groups.push([WHERE]); @@ -1200,7 +1271,7 @@ function separate_actions(rbql_expression) { assert(span_start <= span_end); var span = rbql_expression.substring(span_start, span_end); var statement_params = {}; - if ([STRICT_LEFT_JOIN, LEFT_JOIN, INNER_JOIN, JOIN].indexOf(statement) != -1) { + if ([STRICT_LEFT_JOIN, LEFT_OUTER_JOIN, LEFT_JOIN, INNER_JOIN, JOIN].indexOf(statement) != -1) { statement_params['join_subtype'] = statement; statement = JOIN; } @@ -1267,13 +1338,6 @@ function find_top(rb_actions) { } -function indent_user_init_code(user_init_code) { - let source_lines = user_init_code.split(/(?:\r\n)|\r|\n/); - source_lines = source_lines.map(line => ' ' + line); - return source_lines.join('\n'); -} - - function translate_except_expression(except_expression, input_variables_map, string_literals) { let skip_vars = except_expression.split(','); skip_vars = skip_vars.map(str_strip); @@ -1290,21 +1354,30 @@ function translate_except_expression(except_expression, input_variables_map, str } -function HashJoinMap(record_iterator, key_indices) { - this.max_record_len = 0; - this.hash_map = new Map(); - this.record_iterator = record_iterator; - this.key_index = null; - this.key_indices = null; - this.nr = 0; +class HashJoinMap { + constructor(record_iterator, key_indices) { + this.max_record_len = 0; + this.hash_map = new Map(); + this.record_iterator = record_iterator; + this.nr = 0; + if (key_indices.length == 1) { + this.key_index = key_indices[0]; + this.key_indices = null; + this.polymorphic_get_key = this.get_single_key; + } else { + this.key_index = null; + this.key_indices = key_indices; + this.polymorphic_get_key = this.get_multi_key; + } + } - this.get_single_key = function(nr, fields) { + get_single_key(nr, fields) { if (this.key_index >= fields.length) throw new RbqlRuntimeError(`No field with index ${this.key_index + 1} at record ${this.nr} in "B" table`); return this.key_index === -1 ? this.nr : fields[this.key_index]; }; - this.get_multi_key = function(nr, fields) { + get_multi_key(nr, fields) { let result = []; for (let ki of this.key_indices) { if (ki >= fields.length) @@ -1314,15 +1387,7 @@ function HashJoinMap(record_iterator, key_indices) { return JSON.stringify(result); }; - if (key_indices.length == 1) { - this.key_index = key_indices[0]; - this.polymorphic_get_key = this.get_single_key; - } else { - this.key_indices = key_indices; - this.polymorphic_get_key = this.get_multi_key; - } - - this.build = async function() { + async build() { while (true) { let fields = await this.record_iterator.get_record(); if (fields === null) @@ -1340,14 +1405,14 @@ function HashJoinMap(record_iterator, key_indices) { } }; - this.get_join_records = function(key) { + get_join_records(key) { let result = this.hash_map.get(key); if (result === undefined) return []; return result; }; - this.get_warnings = function() { + get_warnings() { return this.record_iterator.get_warnings(); }; } @@ -1365,123 +1430,6 @@ function remove_redundant_table_name(query_text) { } -async function parse_to_js(query_text, js_template_text, input_iterator, join_tables_registry, user_init_code) { - user_init_code = indent_user_init_code(user_init_code); - query_text = cleanup_query(query_text); - var [format_expression, string_literals] = separate_string_literals_js(query_text); - format_expression = remove_redundant_table_name(format_expression); - var input_variables_map = await input_iterator.get_variables_map(query_text); - - var rb_actions = separate_actions(format_expression); - - var js_meta_params = {}; - js_meta_params['__RBQLMP__user_init_code'] = user_init_code; - js_meta_params['__RBQLMP__version'] = version; - - if (rb_actions.hasOwnProperty(ORDER_BY) && rb_actions.hasOwnProperty(UPDATE)) - throw new RbqlParsingError('"ORDER BY" is not allowed in "UPDATE" queries'); - - - if (rb_actions.hasOwnProperty(GROUP_BY)) { - if (rb_actions.hasOwnProperty(ORDER_BY) || rb_actions.hasOwnProperty(UPDATE)) - throw new RbqlParsingError('"ORDER BY", "UPDATE" and "DISTINCT" keywords are not allowed in aggregate queries'); - var aggregation_key_expression = rb_actions[GROUP_BY]['text']; - js_meta_params['__RBQLMP__aggregation_key_expression'] = '[' + combine_string_literals(aggregation_key_expression, string_literals) + ']'; - } else { - js_meta_params['__RBQLMP__aggregation_key_expression'] = 'null'; - } - - let join_map = null; - let join_variables_map = null; - if (rb_actions.hasOwnProperty(JOIN)) { - var [rhs_table_id, variable_pairs] = parse_join_expression(rb_actions[JOIN]['text']); - if (join_tables_registry === null) - throw new RbqlParsingError('JOIN operations are not supported by the application'); - let join_record_iterator = join_tables_registry.get_iterator_by_table_id(rhs_table_id); - if (!join_record_iterator) - throw new RbqlParsingError(`Unable to find join table: "${rhs_table_id}"`); - join_variables_map = await join_record_iterator.get_variables_map(query_text); - let [lhs_variables, rhs_indices] = resolve_join_variables(input_variables_map, join_variables_map, variable_pairs, string_literals); - js_meta_params['__RBQLMP__join_operation'] = `"${rb_actions[JOIN]['join_subtype']}"`; - js_meta_params['__RBQLMP__lhs_join_var'] = lhs_variables.length == 1 ? lhs_variables[0] : 'JSON.stringify([' + lhs_variables.join(',') + '])'; - join_map = new HashJoinMap(join_record_iterator, rhs_indices); - } else { - js_meta_params['__RBQLMP__join_operation'] = 'null'; - js_meta_params['__RBQLMP__lhs_join_var'] = 'null'; - } - - if (rb_actions.hasOwnProperty(WHERE)) { - var where_expression = rb_actions[WHERE]['text']; - if (/[^!=]=[^=]/.exec(where_expression)) { - throw new RbqlParsingError('Assignments "=" are not allowed in "WHERE" expressions. For equality test use "==" or "==="'); - } - js_meta_params['__RBQLMP__where_expression'] = combine_string_literals(where_expression, string_literals); - } else { - js_meta_params['__RBQLMP__where_expression'] = 'true'; - } - - - if (rb_actions.hasOwnProperty(UPDATE)) { - var update_expression = translate_update_expression(rb_actions[UPDATE]['text'], input_variables_map, string_literals, ' '.repeat(8)); - js_meta_params['__RBQLMP__writer_type'] = '"simple"'; - js_meta_params['__RBQLMP__select_expression'] = 'null'; - js_meta_params['__RBQLMP__update_statements'] = combine_string_literals(update_expression, string_literals); - js_meta_params['__RBQLMP__is_select_query'] = '0'; - js_meta_params['__RBQLMP__top_count'] = 'null'; - js_meta_params['__RBQLMP__init_column_vars_update'] = combine_string_literals(generate_init_statements(format_expression, input_variables_map, join_variables_map, ' '.repeat(4)), string_literals); - js_meta_params['__RBQLMP__init_column_vars_select'] = ''; - } - - if (rb_actions.hasOwnProperty(SELECT)) { - js_meta_params['__RBQLMP__init_column_vars_update'] = ''; - js_meta_params['__RBQLMP__init_column_vars_select'] = combine_string_literals(generate_init_statements(format_expression, input_variables_map, join_variables_map, ' '.repeat(4)), string_literals); - var top_count = find_top(rb_actions); - js_meta_params['__RBQLMP__top_count'] = top_count === null ? 'null' : String(top_count); - if (rb_actions[SELECT].hasOwnProperty('distinct_count')) { - js_meta_params['__RBQLMP__writer_type'] = '"uniq_count"'; - } else if (rb_actions[SELECT].hasOwnProperty('distinct')) { - js_meta_params['__RBQLMP__writer_type'] = '"uniq"'; - } else { - js_meta_params['__RBQLMP__writer_type'] = '"simple"'; - } - if (rb_actions.hasOwnProperty(EXCEPT)) { - js_meta_params['__RBQLMP__select_expression'] = translate_except_expression(rb_actions[EXCEPT]['text'], input_variables_map, string_literals); - } else { - let select_expression = translate_select_expression_js(rb_actions[SELECT]['text']); - js_meta_params['__RBQLMP__select_expression'] = combine_string_literals(select_expression, string_literals); - } - js_meta_params['__RBQLMP__update_statements'] = ''; - js_meta_params['__RBQLMP__is_select_query'] = '1'; - } - - if (rb_actions.hasOwnProperty(ORDER_BY)) { - var order_expression = rb_actions[ORDER_BY]['text']; - js_meta_params['__RBQLMP__sort_key_expression'] = combine_string_literals(order_expression, string_literals); - js_meta_params['__RBQLMP__reverse_flag'] = rb_actions[ORDER_BY]['reverse'] ? 'true' : 'false'; - js_meta_params['__RBQLMP__sort_flag'] = 'true'; - } else { - js_meta_params['__RBQLMP__sort_key_expression'] = 'null'; - js_meta_params['__RBQLMP__reverse_flag'] = 'false'; - js_meta_params['__RBQLMP__sort_flag'] = 'false'; - } - var js_code = rbql_meta_format(js_template_text, js_meta_params); - return [js_code, join_map]; -} - - -function load_module_from_file(js_code) { - let os = require('os'); - let path = require('path'); - let fs = require('fs'); - var tmp_dir = os.tmpdir(); - var script_filename = 'rbconvert_' + String(Math.random()).replace('.', '_') + '.js'; - let tmp_worker_module_path = path.join(tmp_dir, script_filename); - fs.writeFileSync(tmp_worker_module_path, js_code); - let worker_module = require(tmp_worker_module_path); - return worker_module; -} - - function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) { let keys = Object.keys(inconsistent_records_info); let entries = []; @@ -1500,22 +1448,24 @@ function make_inconsistent_num_fields_warning(table_name, inconsistent_records_i } -function TableIterator(table, column_names=null, normalize_column_names=true, variable_prefix='a') { - this.table = table; - this.column_names = column_names; - this.normalize_column_names = normalize_column_names; - this.variable_prefix = variable_prefix; - this.nr = 0; - this.fields_info = new Object(); - this.stopped = false; +class TableIterator { + constructor(table, column_names=null, normalize_column_names=true, variable_prefix='a') { + this.table = table; + this.column_names = column_names; + this.normalize_column_names = normalize_column_names; + this.variable_prefix = variable_prefix; + this.nr = 0; + this.fields_info = new Object(); + this.stopped = false; + } - this.stop = function() { + stop() { this.stopped = true; }; - this.get_variables_map = async function(query_text) { + async get_variables_map(query_text) { let variable_map = new Object(); parse_basic_variables(query_text, this.variable_prefix, variable_map); parse_array_variables(query_text, this.variable_prefix, variable_map); @@ -1533,7 +1483,7 @@ function TableIterator(table, column_names=null, normalize_column_names=true, va }; - this.get_record = async function() { + async get_record() { if (this.stopped) return null; if (this.nr >= this.table.length) @@ -1546,7 +1496,7 @@ function TableIterator(table, column_names=null, normalize_column_names=true, va return record; }; - this.get_warnings = function() { + get_warnings() { if (Object.keys(this.fields_info).length > 1) return [make_inconsistent_num_fields_warning('input', this.fields_info)]; return []; @@ -1554,60 +1504,121 @@ function TableIterator(table, column_names=null, normalize_column_names=true, va } -function TableWriter(external_table) { - this.table = external_table; +class TableWriter { + constructor(external_table) { + this.table = external_table; + } - this.write = function(fields) { + write(fields) { this.table.push(fields); + return true; }; - this.get_warnings = function() { + get_warnings() { return []; }; - this.finish = async function() {}; + async finish() {}; } -function SingleTableRegistry(table, column_names=null, normalize_column_names=true, table_id='B') { - this.table = table; - this.table_id = table_id; - this.column_names = column_names; - this.normalize_column_names = normalize_column_names; +class SingleTableRegistry { + constructor(table, column_names=null, normalize_column_names=true, table_id='b') { + this.table = table; + this.table_id = table_id; + this.column_names = column_names; + this.normalize_column_names = normalize_column_names; + } - this.get_iterator_by_table_id = function(table_id) { - if (table_id !== this.table_id) + get_iterator_by_table_id(table_id) { + if (table_id.toLowerCase() !== this.table_id) throw new RbqlIOHandlingError(`Unable to find join table: "${table_id}"`); return new TableIterator(this.table, this.column_names, this.normalize_column_names, 'b'); }; } -async function query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry=null, user_init_code='') { - let [js_code, join_map] = await parse_to_js(query_text, external_js_template_text, input_iterator, join_tables_registry, user_init_code); - let rbql_worker = null; - try { - if (debug_mode) { - // This version works a little faster than eval below. The downside is that a temporary file is created - rbql_worker = load_module_from_file(js_code); - } else { - let module = {'exports': {}}; - eval('(function(){' + js_code + '})()'); - rbql_worker = module.exports; +async function shallow_parse_input_query(query_text, input_iterator, join_tables_registry, query_context) { + query_text = cleanup_query(query_text); + var [format_expression, string_literals] = separate_string_literals(query_text); + format_expression = remove_redundant_table_name(format_expression); + var input_variables_map = await input_iterator.get_variables_map(query_text); + + var rb_actions = separate_actions(format_expression); + + if (rb_actions.hasOwnProperty(ORDER_BY) && rb_actions.hasOwnProperty(UPDATE)) + throw new RbqlParsingError('"ORDER BY" is not allowed in "UPDATE" queries'); + + if (rb_actions.hasOwnProperty(GROUP_BY)) { + if (rb_actions.hasOwnProperty(ORDER_BY) || rb_actions.hasOwnProperty(UPDATE)) + throw new RbqlParsingError('"ORDER BY", "UPDATE" and "DISTINCT" keywords are not allowed in aggregate queries'); + query_context.aggregation_key_expression = '[' + combine_string_literals(rb_actions[GROUP_BY]['text'], string_literals) + ']'; + } + + let join_variables_map = null; + if (rb_actions.hasOwnProperty(JOIN)) { + var [rhs_table_id, variable_pairs] = parse_join_expression(rb_actions[JOIN]['text']); + if (join_tables_registry === null) + throw new RbqlParsingError('JOIN operations are not supported by the application'); + let join_record_iterator = join_tables_registry.get_iterator_by_table_id(rhs_table_id); + if (!join_record_iterator) + throw new RbqlParsingError(`Unable to find join table: "${rhs_table_id}"`); + join_variables_map = await join_record_iterator.get_variables_map(query_text); + let [lhs_variables, rhs_indices] = resolve_join_variables(input_variables_map, join_variables_map, variable_pairs, string_literals); + let sql_join_type = {'JOIN': InnerJoiner, 'INNER JOIN': InnerJoiner, 'LEFT JOIN': LeftJoiner, 'LEFT OUTER JOIN': LeftJoiner, 'STRICT LEFT JOIN': StrictLeftJoiner}[rb_actions[JOIN]['join_subtype']]; + query_context.lhs_join_var_expression = lhs_variables.length == 1 ? lhs_variables[0] : 'JSON.stringify([' + lhs_variables.join(',') + '])'; + query_context.join_map_impl = new HashJoinMap(join_record_iterator, rhs_indices); + await query_context.join_map_impl.build(); + query_context.join_map = new sql_join_type(query_context.join_map_impl); + } + + query_context.variables_init_code = combine_string_literals(generate_init_statements(format_expression, input_variables_map, join_variables_map, ' '.repeat(4)), string_literals); + + if (rb_actions.hasOwnProperty(WHERE)) { + var where_expression = rb_actions[WHERE]['text']; + if (/[^!=]=[^=]/.exec(where_expression)) + throw new RbqlParsingError('Assignments "=" are not allowed in "WHERE" expressions. For equality test use "==" or "==="'); + query_context.where_expression = combine_string_literals(where_expression, string_literals); + } + + if (rb_actions.hasOwnProperty(UPDATE)) { + var update_expression = translate_update_expression(rb_actions[UPDATE]['text'], input_variables_map, string_literals, ' '.repeat(8)); + query_context.update_expressions = combine_string_literals(update_expression, string_literals); + } + + if (rb_actions.hasOwnProperty(SELECT)) { + query_context.top_count = find_top(rb_actions); + query_context.writer = new TopWriter(query_context.writer, query_context.top_count); + + if (rb_actions[SELECT].hasOwnProperty('distinct_count')) { + query_context.writer = new UniqCountWriter(query_context.writer); + } else if (rb_actions[SELECT].hasOwnProperty('distinct')) { + query_context.writer = new UniqWriter(query_context.writer); } - } catch (e) { - if (e instanceof SyntaxError) { - if (query_text.toLowerCase().indexOf(' like ') != -1) - throw new SyntaxError(e.message + "\nRBQL doesn't support LIKE operator, use like() function instead e.g. ... WHERE like(a1, 'foo%bar') ... "); // UT JSON - if (query_text.toLowerCase().indexOf(' from ') != -1) - throw new SyntaxError(e.message + "\nRBQL doesn't use \"FROM\" keyword, e.g. you can query 'SELECT *' without FROM"); // UT JSON + if (rb_actions.hasOwnProperty(EXCEPT)) { + query_context.select_expression = translate_except_expression(rb_actions[EXCEPT]['text'], input_variables_map, string_literals); + } else { + let select_expression = translate_select_expression(rb_actions[SELECT]['text']); + query_context.select_expression = combine_string_literals(select_expression, string_literals); } - throw e; } - await rbql_worker.rb_transform(input_iterator, join_map, output_writer); + + if (rb_actions.hasOwnProperty(ORDER_BY)) { + query_context.sort_key_expression = combine_string_literals(rb_actions[ORDER_BY]['text'], string_literals); + let reverse_sort = rb_actions[ORDER_BY]['reverse']; + query_context.writer = new SortedWriter(query_context.writer, reverse_sort); + } +} + + +async function query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry=null, user_init_code='') { + query_context = new RBQLContext(query_text, input_iterator, output_writer, user_init_code); + await shallow_parse_input_query(query_text, input_iterator, join_tables_registry, query_context); + await compile_and_run(query_context); + await query_context.writer.finish(); output_warnings.push(...input_iterator.get_warnings()); - if (join_map) - output_warnings.push(...join_map.get_warnings()); + if (query_context.join_map_impl) + output_warnings.push(...query_context.join_map_impl.get_warnings()); output_warnings.push(...output_writer.get_warnings()); } @@ -1622,11 +1633,6 @@ async function query_table(query_text, input_table, output_table, output_warning } -function set_debug_mode() { - debug_mode = true; -} - - function exception_to_error_info(e) { let exceptions_type_map = { 'RbqlRuntimeError': 'query execution', @@ -1643,34 +1649,31 @@ function exception_to_error_info(e) { } -module.exports.version = version; -module.exports.query = query; -module.exports.query_table = query_table; - -module.exports.TableIterator = TableIterator; -module.exports.TableWriter = TableWriter; -module.exports.SingleTableRegistry = SingleTableRegistry; -module.exports.parse_basic_variables = parse_basic_variables; -module.exports.parse_array_variables = parse_array_variables; -module.exports.parse_dictionary_variables = parse_dictionary_variables; -module.exports.parse_attribute_variables = parse_attribute_variables; -module.exports.get_all_matches = get_all_matches; - -module.exports.strip_comments = strip_comments; -module.exports.separate_actions = separate_actions; -module.exports.separate_string_literals_js = separate_string_literals_js; -module.exports.combine_string_literals = combine_string_literals; -module.exports.translate_except_expression = translate_except_expression; -module.exports.parse_join_expression = parse_join_expression; -module.exports.resolve_join_variables = resolve_join_variables; -module.exports.translate_update_expression = translate_update_expression; -module.exports.translate_select_expression_js = translate_select_expression_js; - -module.exports.exception_to_error_info = exception_to_error_info; +exports.query = query; +exports.query_table = query_table; -module.exports.set_debug_mode = set_debug_mode; +exports.version = RBQL_VERSION; +exports.TableIterator = TableIterator; +exports.TableWriter = TableWriter; +exports.SingleTableRegistry = SingleTableRegistry; +exports.exception_to_error_info = exception_to_error_info; -// DO NOT EDIT! -// This file was autogenerated from builder.js and template.js using build_engine.js script +// The functions below are exported just for unit tests, they are not part of the rbql API +exports.parse_basic_variables = parse_basic_variables; +exports.parse_array_variables = parse_array_variables; +exports.parse_dictionary_variables = parse_dictionary_variables; +exports.parse_attribute_variables = parse_attribute_variables; +exports.get_all_matches = get_all_matches; +exports.strip_comments = strip_comments; +exports.separate_actions = separate_actions; +exports.separate_string_literals = separate_string_literals; +exports.combine_string_literals = combine_string_literals; +exports.translate_except_expression = translate_except_expression; +exports.parse_join_expression = parse_join_expression; +exports.resolve_join_variables = resolve_join_variables; +exports.translate_update_expression = translate_update_expression; +exports.translate_select_expression = translate_select_expression; +exports.like_to_regex = like_to_regex; +}(typeof exports === 'undefined' ? this.rbql = {} : exports)); diff --git a/rbql-js/rbql_csv.js b/rbql-js/rbql_csv.js old mode 100644 new mode 100755 index 6baef56..3ba2d9c --- a/rbql-js/rbql_csv.js +++ b/rbql-js/rbql_csv.js @@ -10,10 +10,7 @@ const csv_utils = require('./csv_utils.js'); const utf_decoding_error = 'Unable to decode input table as UTF-8. Use binary (latin-1) encoding instead'; -var debug_mode = false; - class RbqlIOHandlingError extends Error {} -class RbqlParsingError extends Error {} class AssertionError extends Error {} @@ -135,6 +132,7 @@ function find_table_path(table_id) { class RecordQueue { + // TODO compare performance with a linked list constructor() { this.push_stack = []; this.pull_stack = []; @@ -161,56 +159,59 @@ class RecordQueue { } -function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_headers=false, table_name='input', variable_prefix='a') { +class CSVRecordIterator { // CSVRecordIterator implements typical async producer-consumer model with an internal buffer: // get_record() - consumer // stream.on('data') - producer + constructor(stream, csv_path, encoding, delim, policy, skip_headers=false, table_name='input', variable_prefix='a') { + this.stream = stream; + this.csv_path = csv_path; + assert((this.stream === null) != (this.csv_path === null)); + this.encoding = encoding; + this.delim = delim; + this.policy = policy; + this.skip_headers = skip_headers; + this.table_name = table_name; + this.variable_prefix = variable_prefix; + + this.decoder = null; + if (encoding == 'utf-8' && this.csv_path === null) { + // Unfortunately util.TextDecoder has serious flaws: + // 1. It doesn't work in Node without ICU: https://nodejs.org/api/util.html#util_new_textdecoder_encoding_options + // 2. It is broken in Electron: https://github.com/electron/electron/issues/18733 + + // Technically we can implement our own custom streaming text decoder, using the 3 following technologies: + // 1. decode-encode validation method from https://stackoverflow.com/a/32279283/2898283 + // 2. Scanning buffer chunks for non-continuation utf-8 bytes from the end of the buffer: + // src_buffer -> (buffer_before, buffer_after) where buffer_after is very small(a couple of bytes) and buffer_before is large and ends with a non-continuation bytes + // 3. Internal buffer to store small tail part from the previous buffer + this.decoder = new util.TextDecoder(encoding, {fatal: true, stream: true}); + } - this.stream = stream; - this.csv_path = csv_path; - assert((this.stream === null) != (this.csv_path === null)); - this.encoding = encoding; - this.delim = delim; - this.policy = policy; - this.skip_headers = skip_headers; - this.table_name = table_name; - this.variable_prefix = variable_prefix; - - this.decoder = null; - if (encoding == 'utf-8' && this.csv_path === null) { - // Unfortunately util.TextDecoder has serious flaws: - // 1. It doesn't work in Node without ICU: https://nodejs.org/api/util.html#util_new_textdecoder_encoding_options - // 2. It is broken in Electron: https://github.com/electron/electron/issues/18733 - - // Technically we can implement our own custom streaming text decoder, using 3 following technologies: - // 1. decode-encode validation method from https://stackoverflow.com/a/32279283/2898283 - // 2. Scanning buffer chunks for non-continuation utf-8 bytes from the end of the buffer: - // src_buffer -> (buffer_before, buffer_after) where buffer_after is very small(a couple of bytes) and buffer_before is large and ends with a non-continuation bytes - // 3. Internal buffer to store small tail part from the previous buffer - this.decoder = new util.TextDecoder(encoding, {fatal: true, stream: true}); - } + this.input_exhausted = false; + this.started = false; - this.input_exhausted = false; - this.started = false; + this.utf8_bom_removed = false; // BOM doesn't get automatically removed by the decoder when utf-8 file is treated as latin-1 + this.first_defective_line = null; - this.utf8_bom_removed = false; // BOM doesn't get automatically removed by decoder when utf-8 file is treated as latin-1 - this.first_defective_line = null; + this.fields_info = new Object(); + this.NR = 0; // Record num + this.NL = 0; // Line num (can be different from record num for rfc dialect) - this.fields_info = new Object(); - this.NR = 0; // Record num - this.NL = 0; // Line num (can be different from record num for rfc dialect) + this.rfc_line_buffer = []; - this.rfc_line_buffer = []; + this.partially_decoded_line = ''; - this.partially_decoded_line = ''; + this.resolve_current_record = null; + this.reject_current_record = null; + this.current_exception = null; - this.resolve_current_record = null; - this.reject_current_record = null; - this.current_exception = null; + this.produced_records_queue = new RecordQueue(); - this.produced_records_queue = new RecordQueue(); + this.process_line_polymorphic = policy == 'quoted_rfc' ? this.process_partial_rfc_record_line : this.process_record_line; + } - this.handle_exception = function(exception) { + handle_exception(exception) { if (this.reject_current_record) { let reject = this.reject_current_record; this.reject_current_record = null; @@ -222,7 +223,7 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade } - this.preread_header = async function() { + async preread_header() { let header_record = await this.get_record(); if (header_record === null) return null; @@ -234,7 +235,7 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade }; - this.get_variables_map = async function(query_text) { + async get_variables_map(query_text) { let variable_map = new Object(); rbql.parse_basic_variables(query_text, this.variable_prefix, variable_map); rbql.parse_array_variables(query_text, this.variable_prefix, variable_map); @@ -248,7 +249,7 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade }; - this.try_resolve_next_record = function() { + try_resolve_next_record() { if (this.resolve_current_record === null) return; let record = this.produced_records_queue.dequeue(); @@ -261,7 +262,7 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade }; - this.get_record = async function() { + async get_record() { if (!this.started) await this.start(); if (this.stream && this.stream.isPaused()) @@ -280,7 +281,7 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade }; - this.get_all_records = async function(num_records=null) { + async get_all_records(num_records=null) { let records = []; while (true) { let record = await this.get_record(); @@ -296,7 +297,7 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade }; - this.process_record_line = function(line) { + process_record_line(line) { this.NR += 1; var [record, warning] = csv_utils.smart_split(line, this.delim, this.policy, false); if (warning) { @@ -314,7 +315,7 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade }; - this.process_partial_rfc_record_line = function(line) { + process_partial_rfc_record_line(line) { let match_list = line.match(/"/g); let has_unbalanced_double_quote = match_list && match_list.length % 2 == 1; if (this.rfc_line_buffer.length == 0 && !has_unbalanced_double_quote) { @@ -332,10 +333,7 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade }; - this.process_line_polymorphic = policy == 'quoted_rfc' ? this.process_partial_rfc_record_line : this.process_record_line; - - - this.process_line = function(line) { + process_line(line) { if (this.NL === 0) { var clean_line = remove_utf8_bom(line, this.encoding); if (clean_line != line) { @@ -348,7 +346,7 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade }; - this.process_data_stream_chunk = function(data_chunk) { + process_data_stream_chunk(data_chunk) { let decoded_string = null; if (this.decoder) { try { @@ -373,7 +371,7 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade }; - this.process_data_bulk = function(data_chunk) { + process_data_bulk(data_chunk) { let decoded_string = data_chunk.toString(this.encoding); if (this.encoding == 'utf-8') { // Using hacky comparison method from here: https://stackoverflow.com/a/32279283/2898283 @@ -398,7 +396,7 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade } - this.process_data_stream_end = function() { + process_data_stream_end() { this.input_exhausted = true; if (this.partially_decoded_line.length) { let last_line = this.partially_decoded_line; @@ -412,13 +410,13 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade }; - this.stop = function() { + stop() { if (this.stream) this.stream.destroy(); // TODO consider using pause() instead }; - this.start = async function() { + async start() { if (this.started) return; this.started = true; @@ -441,7 +439,7 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade }; - this.get_warnings = function() { + get_warnings() { let result = []; if (this.first_defective_line !== null) result.push(`Inconsistent double quote escaping in ${this.table_name} table. E.g. at line ${this.first_defective_line}`); @@ -454,37 +452,53 @@ function CSVRecordIterator(stream, csv_path, encoding, delim, policy, skip_heade } -function CSVWriter(stream, close_stream_on_finish, encoding, delim, policy, line_separator='\n') { - this.stream = stream; - this.encoding = encoding; - if (encoding) - this.stream.setDefaultEncoding(encoding); - this.delim = delim; - this.policy = policy; - this.line_separator = line_separator; - this.sub_array_delim = delim == '|' ? ';' : '|'; - - this.close_stream_on_finish = close_stream_on_finish; - - this.null_in_output = false; - this.delim_in_simple_output = false; +class CSVWriter { + constructor(stream, close_stream_on_finish, encoding, delim, policy, line_separator='\n') { + this.stream = stream; + this.encoding = encoding; + if (encoding) + this.stream.setDefaultEncoding(encoding); + this.delim = delim; + this.policy = policy; + this.line_separator = line_separator; + this.sub_array_delim = delim == '|' ? ';' : '|'; + + this.close_stream_on_finish = close_stream_on_finish; + + this.null_in_output = false; + this.delim_in_simple_output = false; + + if (policy == 'simple') { + this.polymorphic_join = this.simple_join; + } else if (policy == 'quoted') { + this.polymorphic_join = this.quoted_join; + } else if (policy == 'quoted_rfc') { + this.polymorphic_join = this.quoted_join_rfc; + } else if (policy == 'monocolumn') { + this.polymorphic_join = this.mono_join; + } else if (policy == 'whitespace') { + this.polymorphic_join = this.simple_join; + } else { + throw new RbqlIOHandlingError('Unknown output csv policy'); + } + } - this.quoted_join = function(fields) { + quoted_join(fields) { let delim = this.delim; var quoted_fields = fields.map(function(v) { return csv_utils.quote_field(String(v), delim); }); return quoted_fields.join(this.delim); }; - this.quoted_join_rfc = function(fields) { + quoted_join_rfc(fields) { let delim = this.delim; var quoted_fields = fields.map(function(v) { return csv_utils.rfc_quote_field(String(v), delim); }); return quoted_fields.join(this.delim); }; - this.mono_join = function(fields) { + mono_join(fields) { if (fields.length > 1) { throw new RbqlIOHandlingError('Unable to use "Monocolumn" output format: some records have more than one field'); } @@ -492,7 +506,7 @@ function CSVWriter(stream, close_stream_on_finish, encoding, delim, policy, line }; - this.simple_join = function(fields) { + simple_join(fields) { var res = fields.join(this.delim); if (fields.join('').indexOf(this.delim) != -1) { this.delim_in_simple_output = true; @@ -501,22 +515,7 @@ function CSVWriter(stream, close_stream_on_finish, encoding, delim, policy, line }; - if (policy == 'simple') { - this.polymorphic_join = this.simple_join; - } else if (policy == 'quoted') { - this.polymorphic_join = this.quoted_join; - } else if (policy == 'quoted_rfc') { - this.polymorphic_join = this.quoted_join_rfc; - } else if (policy == 'monocolumn') { - this.polymorphic_join = this.mono_join; - } else if (policy == 'whitespace') { - this.polymorphic_join = this.simple_join; - } else { - throw new RbqlIOHandlingError('Unknown output csv policy'); - } - - - this.normalize_fields = function(out_fields) { + normalize_fields(out_fields) { for (var i = 0; i < out_fields.length; i++) { if (out_fields[i] == null) { this.null_in_output = true; @@ -529,21 +528,22 @@ function CSVWriter(stream, close_stream_on_finish, encoding, delim, policy, line }; - this.write = function(fields) { + write(fields) { this.normalize_fields(fields); this.stream.write(this.polymorphic_join(fields)); this.stream.write(this.line_separator); + return true; }; - this._write_all = function(table) { + _write_all(table) { for (let i = 0; i < table.length; i++) { this.write(table[i]); } }; - this.finish = async function() { + async finish() { let close_stream_on_finish = this.close_stream_on_finish; let output_stream = this.stream; let output_encoding = this.encoding; @@ -558,7 +558,7 @@ function CSVWriter(stream, close_stream_on_finish, encoding, delim, policy, line }; - this.get_warnings = function() { + get_warnings() { let result = []; if (this.null_in_output) result.push('null values in output were replaced by empty strings'); @@ -570,19 +570,21 @@ function CSVWriter(stream, close_stream_on_finish, encoding, delim, policy, line } -function FileSystemCSVRegistry(delim, policy, encoding, skip_headers=false, options=null) { - this.delim = delim; - this.policy = policy; - this.encoding = encoding; - this.skip_headers = skip_headers; - this.stream = null; - this.record_iterator = null; +class FileSystemCSVRegistry { + constructor(delim, policy, encoding, skip_headers=false, options=null) { + this.delim = delim; + this.policy = policy; + this.encoding = encoding; + this.skip_headers = skip_headers; + this.stream = null; + this.record_iterator = null; - this.options = options; - this.bulk_input_path = null; - this.table_path = null; + this.options = options; + this.bulk_input_path = null; + this.table_path = null; + } - this.get_iterator_by_table_id = function(table_id) { + get_iterator_by_table_id(table_id) { this.table_path = find_table_path(table_id); if (this.table_path === null) { throw new RbqlIOHandlingError(`Unable to find join table "${table_id}"`); @@ -592,11 +594,11 @@ function FileSystemCSVRegistry(delim, policy, encoding, skip_headers=false, opti } else { this.stream = fs.createReadStream(this.table_path); } - this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, skip_headers, table_id, 'b'); + this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.skip_headers, table_id, 'b'); return this.record_iterator; }; - this.get_warnings = function(output_warnings) { + get_warnings(output_warnings) { if (this.record_iterator && this.skip_headers) { output_warnings.push(`The first (header) record was also skipped in the JOIN file: ${path.basename(this.table_path)}`); } @@ -631,18 +633,11 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, skip_headers); let output_writer = new CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy); - if (debug_mode) - rbql.set_debug_mode(); await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code); join_tables_registry.get_warnings(output_warnings); } -function set_debug_mode() { - debug_mode = true; -} - - module.exports.is_ascii = is_ascii; module.exports.CSVRecordIterator = CSVRecordIterator; module.exports.CSVWriter = CSVWriter; @@ -650,6 +645,5 @@ module.exports.FileSystemCSVRegistry = FileSystemCSVRegistry; module.exports.interpret_named_csv_format = interpret_named_csv_format; module.exports.read_user_init_code = read_user_init_code; module.exports.query_csv = query_csv; -module.exports.set_debug_mode = set_debug_mode; module.exports.RecordQueue = RecordQueue; module.exports.exception_to_error_info = rbql.exception_to_error_info; diff --git a/rbql/_version.py b/rbql/_version.py index 6ad14f0..4bf366c 100644 --- a/rbql/_version.py +++ b/rbql/_version.py @@ -1,3 +1,3 @@ # Explanation of this file purpose: https://stackoverflow.com/a/16084844/2898283 -__version__ = '0.16.1' +__version__ = '0.17.0' diff --git a/rbql/rbql_engine.py b/rbql/rbql_engine.py index 9e1ed32..9e22a43 100755 --- a/rbql/rbql_engine.py +++ b/rbql/rbql_engine.py @@ -8,18 +8,16 @@ import time from collections import OrderedDict, defaultdict, namedtuple +import datetime # For date operations inside user queries +import os # For system operations inside user queries +import math # For math operations inside user queries + from ._version import __version__ -########################################################################## -# -# RBQL: RainBow Query Language -# Authors: Dmitry Ignatovich, ... -# -# -########################################################################## # This module must be both python2 and python3 compatible + # This module works with records only. It is CSV-agnostic. # Do not add CSV-related logic or variables/functions/objects like "delim", "separator" etc @@ -47,12 +45,16 @@ # TODO add "inconsistent number of fields in output table" warning. Useful for queries like this: `*a1.split("|")` or `...a1.split("|")`, where num of fields in a1 is variable +# FIXME refactor this module in sync with the JS version. There wasn't any cleanup after the last redesign + + GROUP_BY = 'GROUP BY' UPDATE = 'UPDATE' SELECT = 'SELECT' JOIN = 'JOIN' INNER_JOIN = 'INNER JOIN' LEFT_JOIN = 'LEFT JOIN' +LEFT_OUTER_JOIN = 'LEFT OUTER JOIN' STRICT_LEFT_JOIN = 'STRICT LEFT JOIN' ORDER_BY = 'ORDER BY' WHERE = 'WHERE' @@ -74,11 +76,17 @@ class RbqlIOHandlingError(Exception): pass -VariableInfo = namedtuple('VariableInfo', ['initialize', 'index']) +class InternalBadFieldError(Exception): + def __init__(self, bad_idx): + self.bad_idx = bad_idx + +class InternalBadKeyError(Exception): + def __init__(self, bad_key): + self.bad_key = bad_key -query_context = None +VariableInfo = namedtuple('VariableInfo', ['initialize', 'index']) class RBQLContext: @@ -93,7 +101,6 @@ def __init__(self, input_iterator, output_writer, user_init_code): self.like_regex_cache = dict() self.sort_key_expression = None - self.reverse_sort = False self.aggregation_stage = 0 self.aggregation_key_expression = None @@ -101,7 +108,6 @@ def __init__(self, input_iterator, output_writer, user_init_code): self.join_map_impl = None self.join_map = None - self.join_operation = None self.lhs_join_var_expression = None self.where_expression = None @@ -113,13 +119,7 @@ def __init__(self, input_iterator, output_writer, user_init_code): self.variables_init_code = None - -###################################### - - -import datetime # For date operations -import os # For system operations -import math # For math operations +query_context = None # Needs to be global for MIN(), MAX(), etc functions RBQL_VERSION = __version__ @@ -138,16 +138,6 @@ def iteritems6(x): return x.iteritems() -class InternalBadFieldError(Exception): - def __init__(self, bad_idx): - self.bad_idx = bad_idx - - -class InternalBadKeyError(Exception): - def __init__(self, bad_key): - self.bad_key = bad_key - - class RBQLRecord: def __init__(self): self.storage = dict() @@ -585,8 +575,9 @@ def finish(self): class SortedWriter(object): - def __init__(self, subwriter): + def __init__(self, subwriter, reverse_sort): self.subwriter = subwriter + self.reverse_sort = reverse_sort self.unsorted_entries = list() def write(self, sort_key_value, record): @@ -595,7 +586,7 @@ def write(self, sort_key_value, record): def finish(self): sorted_entries = sorted(self.unsorted_entries, key=lambda x: x[0]) - if query_context.reverse_sort: + if self.reverse_sort: sorted_entries.reverse() for e in sorted_entries: if not self.subwriter.write(e[1]): @@ -816,7 +807,6 @@ def embed_code(parent_code, child_placeholder, child_code): assert parent_code.count(child_placeholder) == 1 parent_lines = parent_code.strip().split('\n') child_lines = child_code.strip().split('\n') - placeholder_indentation = None for i in range(len(parent_lines)): pos = parent_lines[i].find(child_placeholder) if pos == -1: @@ -825,15 +815,14 @@ def embed_code(parent_code, child_placeholder, child_code): placeholder_indentation = parent_lines[i][:pos] assert placeholder_indentation == ' ' * pos child_lines = [placeholder_indentation + cl for cl in child_lines] - result = parent_lines[:i] + child_lines + parent_lines[i + 1:] - return '\n'.join(result) + '\n' + result_lines = parent_lines[:i] + child_lines + parent_lines[i + 1:] + return '\n'.join(result_lines) + '\n' assert False def generate_main_loop_code(): is_select_query = query_context.select_expression is not None is_join_query = query_context.join_map is not None - python_code = None where_expression = 'True' if query_context.where_expression is None else query_context.where_expression aggregation_key_expression = 'None' if query_context.aggregation_key_expression is None else query_context.aggregation_key_expression sort_key_expression = 'None' if query_context.sort_key_expression is None else query_context.sort_key_expression @@ -861,7 +850,6 @@ def generate_main_loop_code(): return python_code - def compile_and_run(): # TODO consider putting mad_max stuff here instead of keeping it in the global scope main_loop_body = generate_main_loop_code() @@ -870,10 +858,6 @@ def compile_and_run(): -############################################################ - - - def exception_to_error_info(e): exceptions_type_map = { 'RbqlRuntimeError': 'query execution', @@ -887,8 +871,10 @@ def exception_to_error_info(e): if len(error_strings) and re.search('File.*line', error_strings[0]) is not None: error_strings[0] = '\n' error_msg = ''.join(error_strings).rstrip() + if re.search(' having ', error_msg, flags=re.IGNORECASE) is not None: + error_msg += "\nRBQL doesn't support \"HAVING\" keyword" if re.search(' like[ (]', error_msg, flags=re.IGNORECASE) is not None: - error_msg += "\nRBQL doesn't support LIKE operator, use like() function instead e.g. ... WHERE like(a1, 'foo%bar') ... " # UT JSON + error_msg += "\nRBQL doesn't support \"LIKE\" operator, use like() function instead e.g. ... WHERE like(a1, 'foo%bar') ... " # UT JSON if error_msg.lower().find(' from ') != -1: error_msg += "\nRBQL doesn't use \"FROM\" keyword, e.g. you can query 'SELECT *' without FROM" # UT JSON return ('syntax error', error_msg) @@ -1123,7 +1109,7 @@ def translate_update_expression(update_expression, input_variables_map, string_l return combine_string_literals('\n'.join(update_expressions), string_literals) -def translate_select_expression_py(select_expression): +def translate_select_expression(select_expression): translated = replace_star_count(select_expression) translated = replace_star_vars(translated) translated = translated.strip() @@ -1132,7 +1118,7 @@ def translate_select_expression_py(select_expression): return '[{}]'.format(translated) -def separate_string_literals_py(rbql_expression): +def separate_string_literals(rbql_expression): # The regex is improved expression from here: https://stackoverflow.com/a/14366904/2898283 string_literals_regex = r'''(\"\"\"|\'\'\'|\"|\')((?