From 17342fa9c31cdc7b0503008eebfe396d78a9341f Mon Sep 17 00:00:00 2001 From: databasecorescript Date: Tue, 3 Sep 2024 18:27:54 +0000 Subject: [PATCH] Automated sync from Dataforge Cloud using commit 15571f150efbe6f6c95ef55f80015a897b2a6713 --- .../meta/svc-service/svc_parse_enrichment.sql | 2 +- .../u_enr_query_add_enrichment.sql | 7 +- .../u_enr_query_generate_query.sql | 5 +- .../code/meta/u-utility/u_add_backticks.sql | 19 +++++ .../u-utility/u_build_datatype_test_expr.sql | 13 ++- ...u_build_datatype_test_expr_from_parsed.sql | 2 +- .../u-utility/u_datatype_test_expression.sql | 6 +- .../u-utility/u_get_next_relation_path.sql | 2 +- .../code/meta/u-utility/u_get_parameter.sql | 79 +++++++++++++++---- .../meta/u-utility/u_get_schema_from_type.sql | 9 ++- .../u-utility/u_get_struct_key_datatype.sql | 1 - .../u-utility/u_lookup_source_attribute.sql | 14 ++-- .../u-utility/u_output_generate_query.sql | 2 +- database/schema/0 createTypes.sql | 2 +- database/schema/10 aggregateTable.sql | 9 ++- database/schema/14 rawAttribute.sql | 2 +- database/schema/9 outputColumnTable.sql | 2 + 17 files changed, 136 insertions(+), 40 deletions(-) create mode 100644 database/code/meta/u-utility/u_add_backticks.sql diff --git a/database/code/meta/svc-service/svc_parse_enrichment.sql b/database/code/meta/svc-service/svc_parse_enrichment.sql index 0d00828..a3913c3 100644 --- a/database/code/meta/svc-service/svc_parse_enrichment.sql +++ b/database/code/meta/svc-service/svc_parse_enrichment.sql @@ -415,7 +415,7 @@ RAISE DEBUG 'Parsed % aggregates',(SELECT COUNT(1) FROM _aggs_parsed); -- create test expression - v_ret_expression := meta.u_build_datatype_test_expr(in_enr.expression); + v_ret_expression := meta.u_build_datatype_test_expr(in_enr.expression, in_enr.datatype, in_enr.cast_datatype); SELECT json_agg(p) INTO v_ret_params FROM _params p; diff --git a/database/code/meta/u-utility/enr-query-generator/u_enr_query_add_enrichment.sql b/database/code/meta/u-utility/enr-query-generator/u_enr_query_add_enrichment.sql index dda38c6..f069008 100644 --- a/database/code/meta/u-utility/enr-query-generator/u_enr_query_add_enrichment.sql +++ b/database/code/meta/u-utility/enr-query-generator/u_enr_query_add_enrichment.sql @@ -71,7 +71,12 @@ FOR v_parameter IN INSERT INTO elements ( type, expression, alias, attribute_id, parent_ids, data_type) VALUES ( 'enrichment', v_expression, in_enr.attribute_name, in_enr.enrichment_id, v_parent_element_ids, --Check for explicit casts or numeric/decimal types that need to be cast to 38,12 - (SELECT CASE WHEN COALESCE(NULLIF(in_enr.cast_datatype,''), in_enr.datatype) = 'decimal' OR (COALESCE(in_enr.cast_datatype,'') <> '' AND in_enr.cast_datatype <> in_enr.datatype) THEN at.hive_ddl_type END FROM meta.attribute_type at WHERE at.hive_type = COALESCE(NULLIF(in_enr.cast_datatype,''), in_enr.datatype) )) + CASE WHEN NULLIF(in_enr.cast_datatype,'') IS NOT NULL THEN + (SELECT at.hive_ddl_type FROM meta.attribute_type at WHERE at.hive_type = in_enr.cast_datatype) + WHEN in_enr.datatype = 'decimal' THEN + (SELECT at.hive_ddl_type FROM meta.attribute_type at WHERE at.hive_type = in_enr.datatype) + END + ) RETURNING id ) SELECT id INTO v_ret_element_id FROM cte; diff --git a/database/code/meta/u-utility/enr-query-generator/u_enr_query_generate_query.sql b/database/code/meta/u-utility/enr-query-generator/u_enr_query_generate_query.sql index c63d0c1..265944a 100644 --- a/database/code/meta/u-utility/enr-query-generator/u_enr_query_generate_query.sql +++ b/database/code/meta/u-utility/enr-query-generator/u_enr_query_generate_query.sql @@ -75,13 +75,12 @@ FOR v_cte IN 0 .. v_cte_max LOOP v_sql := v_sql || E'\nFROM ' || CASE WHEN v_cte = 0 THEN meta.u_get_source_table_name(in_source_id) ELSE 'cte' || (v_cte - 1) END || ' T'; -- Add current CTE joins - v_sql := v_sql || COALESCE((SELECT string_agg( E'\nLEFT JOIN ' || CASE WHEN in_source_id = e.source_id AND v_cte > 0 THEN 'cte' || (v_cte - 1) -- self-join + v_sql := v_sql || COALESCE((SELECT string_agg( E'\nLEFT JOIN ' || CASE WHEN in_source_id = e.source_id AND v_cte > 0 AND cardinality(e.relation_ids) = 1 THEN 'cte' || (v_cte - 1) -- self-join ELSE meta.u_get_hub_table_name(e.source_id) END || ' ' || e.alias || ' ON ' || e.expression,' ' ORDER BY e.alias) FROM elements e WHERE e.type = 'join' AND e.cte = v_cte),''); -- Add current CTE many-joins - v_sql := v_sql || COALESCE((SELECT string_agg( E'\nLEFT JOIN ' || e.alias || '_AGG' - || ' ON ' || + v_sql := v_sql || COALESCE((SELECT string_agg( E'\nLEFT JOIN ' || e.alias || '_AGG' || ' ON ' || -- Aggregate join list (SELECT string_agg('T.' || DL.att || ' = ' || e.alias || '_AGG.' || DL.att, ' AND ') FROM unnest(e.many_join_list) DL(att) ) diff --git a/database/code/meta/u-utility/u_add_backticks.sql b/database/code/meta/u-utility/u_add_backticks.sql new file mode 100644 index 0000000..6d88931 --- /dev/null +++ b/database/code/meta/u-utility/u_add_backticks.sql @@ -0,0 +1,19 @@ +CREATE OR REPLACE FUNCTION meta.u_add_backticks(in_column text) + RETURNS TEXT + LANGUAGE plpgsql +AS +$function$ + +DECLARE + v_updated_name text; + +BEGIN + + IF in_column !~ ('^[a-zA-Z_]+[a-zA-Z0-9_]*$') THEN + RETURN '`' || in_column || '`'; + ELSE + RETURN in_column; + END IF; + +END; +$function$; \ No newline at end of file diff --git a/database/code/meta/u-utility/u_build_datatype_test_expr.sql b/database/code/meta/u-utility/u_build_datatype_test_expr.sql index 2f20d7c..d071aae 100644 --- a/database/code/meta/u-utility/u_build_datatype_test_expr.sql +++ b/database/code/meta/u-utility/u_build_datatype_test_expr.sql @@ -1,4 +1,4 @@ -CREATE OR REPLACE FUNCTION meta.u_build_datatype_test_expr(in_expression text) +CREATE OR REPLACE FUNCTION meta.u_build_datatype_test_expr(in_expression text, in_datatype text, in_cast_datatype text) RETURNS text LANGUAGE 'plpgsql' @@ -19,6 +19,7 @@ DECLARE v_exp_test text; v_ret_expression text := ''; -- test expression v_attribute_name text; + v_exp_test_select text; BEGIN @@ -41,7 +42,11 @@ BEGIN v_ret_expression := v_ret_expression || v_add; -- add parameter with datatype - v_exp_test_select_list := v_exp_test_select_list || (meta.u_datatype_test_expression(v_datatype,v_datatype_schema) || ' ' || v_attribute_name); + v_exp_test_select := meta.u_datatype_test_expression(v_datatype,v_datatype_schema) || ' ' || v_attribute_name; + + IF NOT v_exp_test_select = ANY(v_exp_test_select_list) THEN + v_exp_test_select_list := v_exp_test_select_list || v_exp_test_select; + END IF; v_ret_expression := v_ret_expression || CASE WHEN v_aggregates_exist_flag AND v_aggregate_id IS NULL THEN 'first_value(' || v_attribute_name || ')' -- wrap non-aggregated parameter into aggregate for data type testing purposes only @@ -52,6 +57,10 @@ BEGIN -- add remaining trailing charaters v_ret_expression := v_ret_expression || substr(in_expression,v_last_end); + IF NULLIF(in_cast_datatype,'') IS NOT NULL THEN + v_ret_expression := format('CAST(%s as %s)',v_ret_expression,in_cast_datatype); + END IF; + RAISE DEBUG 'v_exp_test_select_list: %',v_exp_test_select_list; IF cardinality(v_exp_test_select_list) > 0 THEN diff --git a/database/code/meta/u-utility/u_build_datatype_test_expr_from_parsed.sql b/database/code/meta/u-utility/u_build_datatype_test_expr_from_parsed.sql index 9a9a456..cc07e31 100644 --- a/database/code/meta/u-utility/u_build_datatype_test_expr_from_parsed.sql +++ b/database/code/meta/u-utility/u_build_datatype_test_expr_from_parsed.sql @@ -37,7 +37,7 @@ BEGIN FROM meta.enrichment_parameter ep WHERE parent_enrichment_id = in_enr.enrichment_id ORDER BY enrichment_parameter_id LOOP v_param := meta.u_get_parameter(v_ep); - v_attribute_name := meta.u_enr_query_get_enrichment_parameter_name(v_ep); + v_attribute_name := v_param.name; RAISE DEBUG 'v_param: %',v_param; IF v_param.error IS NOT NULL THEN RETURN v_param.error; diff --git a/database/code/meta/u-utility/u_datatype_test_expression.sql b/database/code/meta/u-utility/u_datatype_test_expression.sql index 06f63f1..f711c95 100644 --- a/database/code/meta/u-utility/u_datatype_test_expression.sql +++ b/database/code/meta/u-utility/u_datatype_test_expression.sql @@ -20,8 +20,8 @@ IF in_datatype = 'struct' THEN -- Create expression as struct( field1.typeExp AS field1.name, field2.typeExp AS field2.name, ... ) - SELECT 'struct(' || string_agg(CASE WHEN jsonb_typeof(field->'type') = 'string' THEN format('%s AS %s',meta.u_datatype_test_expression(field->>'type', null), field->>'name') - WHEN jsonb_typeof(field->'type') = 'object' THEN format('%s AS %s',meta.u_datatype_test_expression(field->'type'->>'type', field->'type'), field->>'name') + SELECT 'struct(' || string_agg(CASE WHEN jsonb_typeof(field->'type') = 'string' THEN format('%s AS `%s`',meta.u_datatype_test_expression(field->>'type', null), field->>'name') + WHEN jsonb_typeof(field->'type') = 'object' THEN format('%s AS `%s`',meta.u_datatype_test_expression(field->'type'->>'type', field->'type'), field->>'name') ELSE format('ERROR: Invalid type %s of field %s',field->>'type', field->>'name') END,', ') || ')' INTO v_exp FROM jsonb_array_elements(v_fields) field; @@ -45,7 +45,7 @@ ELSEIF in_datatype = 'array' THEN v_exp := format('array(%s,%s)',v_array_sub_exp,v_array_sub_exp); ELSEIF in_datatype like 'decimal(%' THEN - v_exp := format('CAST(`decimal` AS %s)',in_datatype); + v_exp := format('CAST(`decimal` AS decimal(38,12))',in_datatype); ELSE v_exp := '`' || in_datatype || '`'; END IF; diff --git a/database/code/meta/u-utility/u_get_next_relation_path.sql b/database/code/meta/u-utility/u_get_next_relation_path.sql index 3ce1b2e..f348cc5 100644 --- a/database/code/meta/u-utility/u_get_next_relation_path.sql +++ b/database/code/meta/u-utility/u_get_next_relation_path.sql @@ -1,7 +1,7 @@ DROP FUNCTION IF EXISTS meta.u_get_next_relation_path; CREATE OR REPLACE FUNCTION meta.u_get_next_relation_path(in_from_source_id int, in_to_source_id int, -in_cardinality text = '1', in_start_path int[] = '{}', in_max_length int = 10) +in_cardinality text = '1', in_start_path int[] = '{}', in_max_length int = 5) RETURNS json LANGUAGE plpgsql AS $function$ diff --git a/database/code/meta/u-utility/u_get_parameter.sql b/database/code/meta/u-utility/u_get_parameter.sql index 1822645..c18b906 100644 --- a/database/code/meta/u-utility/u_get_parameter.sql +++ b/database/code/meta/u-utility/u_get_parameter.sql @@ -4,22 +4,22 @@ CREATE OR REPLACE FUNCTION meta.u_get_parameter(in_p meta.enrichment_parameter) AS $function$ DECLARE - v_ret parameter_map = ROW(in_p.type, in_p.raw_attribute_id, in_p.enrichment_id, in_p.system_attribute_id, in_p.source_id, null::text, null::text, null::jsonb)::parameter_map; + v_ret parameter_map = ROW(null::text, in_p.type, in_p.raw_attribute_id, in_p.enrichment_id, in_p.system_attribute_id, in_p.source_id, null::text, null::text, null::jsonb)::parameter_map; BEGIN IF in_p.type = 'raw' THEN - SELECT r.data_type, r.datatype_schema - INTO v_ret.datatype, v_ret.datatype_schema + SELECT r.column_alias, r.data_type, r.datatype_schema + INTO v_ret.name, v_ret.datatype, v_ret.datatype_schema FROM meta.raw_attribute r WHERE r.raw_attribute_id = in_p.raw_attribute_id; ELSEIF in_p.type = 'enrichment' THEN - SELECT e.datatype, e.datatype_schema - INTO v_ret.datatype, v_ret.datatype_schema + SELECT e.attribute_name, e.datatype, e.datatype_schema + INTO v_ret.name, v_ret.datatype, v_ret.datatype_schema FROM meta.enrichment e WHERE e.enrichment_id = in_p.enrichment_id; ELSEIF in_p.type = 'system' THEN - SELECT data_type, meta.u_get_schema_from_type(null, data_type) - INTO v_ret.datatype, v_ret.datatype_schema + SELECT s.name, data_type, meta.u_get_schema_from_type(null, data_type) + INTO v_ret.name, v_ret.datatype, v_ret.datatype_schema FROM meta.system_attribute s WHERE s.system_attribute_id = in_p.system_attribute_id; ELSE @@ -38,22 +38,22 @@ CREATE OR REPLACE FUNCTION meta.u_get_parameter(in_p meta.source_relation_parame AS $function$ DECLARE - v_ret parameter_map = ROW(in_p.type, in_p.raw_attribute_id, in_p.enrichment_id, in_p.system_attribute_id, in_p.source_id, null::text, null::text, null::jsonb)::parameter_map; + v_ret parameter_map = ROW(null::text, in_p.type, in_p.raw_attribute_id, in_p.enrichment_id, in_p.system_attribute_id, in_p.source_id, null::text, null::text, null::jsonb)::parameter_map; BEGIN IF in_p.type = 'raw' THEN - SELECT r.data_type, r.datatype_schema - INTO v_ret.datatype, v_ret.datatype_schema + SELECT r.column_alias, r.data_type, r.datatype_schema + INTO v_ret.name, v_ret.datatype, v_ret.datatype_schema FROM meta.raw_attribute r WHERE r.raw_attribute_id = in_p.raw_attribute_id; ELSEIF in_p.type = 'enrichment' THEN - SELECT e.datatype, e.datatype_schema - INTO v_ret.datatype, v_ret.datatype_schema + SELECT e.attribute_name, e.datatype, e.datatype_schema + INTO v_ret.name, v_ret.datatype, v_ret.datatype_schema FROM meta.enrichment e WHERE e.enrichment_id = in_p.enrichment_id; ELSEIF in_p.type = 'system' THEN - SELECT data_type, meta.u_get_schema_from_type(null, data_type) - INTO v_ret.datatype, v_ret.datatype_schema + SELECT s.name, data_type, meta.u_get_schema_from_type(null, data_type) + INTO v_ret.name, v_ret.datatype, v_ret.datatype_schema FROM meta.system_attribute s WHERE s.system_attribute_id = in_p.system_attribute_id; ELSE @@ -64,4 +64,55 @@ BEGIN END; +$function$; + +CREATE OR REPLACE FUNCTION meta.u_get_parameter(osc meta.output_source_column) + RETURNS parameter_map + LANGUAGE plpgsql +AS $function$ + +DECLARE + v_ret parameter_map = ROW(null::text, osc.type, osc.raw_attribute_id, osc.enrichment_id, osc.system_attribute_id, null, null::text, null::text, null::jsonb)::parameter_map; + v_schema jsonb; +BEGIN + + + IF osc.type = 'raw' THEN + SELECT r.column_alias,r.data_type, r.datatype_schema, r.source_id + INTO v_ret.name, v_ret.datatype, v_ret.datatype_schema, v_ret.source_id + FROM meta.raw_attribute r + WHERE r.raw_attribute_id = osc.raw_attribute_id; + ELSEIF osc.type = 'enrichment' THEN + SELECT e.attribute_name, e.datatype, e.datatype_schema, e.source_id + INTO v_ret.name, v_ret.datatype, v_ret.datatype_schema, v_ret.source_id + FROM meta.enrichment e + WHERE e.enrichment_id = osc.enrichment_id; + ELSEIF osc.type = 'system' THEN + SELECT s.name, data_type, meta.u_get_schema_from_type(null, data_type) + INTO v_ret.name, v_ret.datatype, v_ret.datatype_schema + FROM meta.system_attribute s + WHERE s.system_attribute_id = osc.system_attribute_id; + + SELECT os.source_id INTO v_ret.source_id + FROM meta.output_source os WHERE os.output_source_id = osc.output_source_id; + ELSE + v_ret.error := 'Invalid parameter ' || osc::text; + END IF; + + IF v_ret.error IS NOT NULL OR COALESCE(osc.keys,'') = '' THEN + RETURN v_ret; + END IF; + + -- update datatype and schema for struct key expression + v_schema := meta.u_get_struct_key_datatype(v_ret.datatype_schema, osc.keys); + + v_ret.error = v_schema->>'error'; + v_ret.datatype := v_schema->>'datatype'; + v_ret.datatype_schema := v_schema->'datatype_schema'; + + + RETURN v_ret; + +END; + $function$; \ No newline at end of file diff --git a/database/code/meta/u-utility/u_get_schema_from_type.sql b/database/code/meta/u-utility/u_get_schema_from_type.sql index eed6d86..3dc360e 100644 --- a/database/code/meta/u-utility/u_get_schema_from_type.sql +++ b/database/code/meta/u-utility/u_get_schema_from_type.sql @@ -12,7 +12,11 @@ DECLARE BEGIN IF in_schema IS NOT NULL THEN - RETURN in_schema; + IF jsonb_typeof(in_schema) = 'string' AND in_schema->>0 like 'decimal%' THEN + RETURN to_jsonb('decimal(38,12)'::text); + ELSE + RETURN in_schema; + END IF; END IF; @@ -20,10 +24,13 @@ IF in_datatype = 'int' THEN RETURN to_jsonb('integer'::text); ELSEIF in_datatype LIKE 'decimal%' THEN RETURN to_jsonb('decimal(38,12)'::text); +ELSEIF in_datatype IN ('struct','array') THEN + RETURN null; END IF; RETURN to_jsonb(in_datatype); + END; $BODY$; \ No newline at end of file diff --git a/database/code/meta/u-utility/u_get_struct_key_datatype.sql b/database/code/meta/u-utility/u_get_struct_key_datatype.sql index d2234c4..8d7265c 100644 --- a/database/code/meta/u-utility/u_get_struct_key_datatype.sql +++ b/database/code/meta/u-utility/u_get_struct_key_datatype.sql @@ -7,7 +7,6 @@ AS $BODY$ DECLARE v_key text; v_struct_keys_array text[]; - v_parameter parameter_map; v_schema jsonb := in_datatype_schema; BEGIN diff --git a/database/code/meta/u-utility/u_lookup_source_attribute.sql b/database/code/meta/u-utility/u_lookup_source_attribute.sql index b0dd3da..3d58aa9 100644 --- a/database/code/meta/u-utility/u_lookup_source_attribute.sql +++ b/database/code/meta/u-utility/u_lookup_source_attribute.sql @@ -4,15 +4,15 @@ CREATE OR REPLACE FUNCTION meta.u_lookup_source_attribute(in_source_id int, in_a AS $function$ DECLARE - v_ret parameter_map = ROW(null::text, null::int, null::int, null::int, in_source_id, null::text, null::text, null::jsonb)::parameter_map; + v_ret parameter_map = ROW(null::text, null::text, null::int, null::int, null::int, in_source_id, null::text, null::text, null::jsonb)::parameter_map; v_attribute_name_substituted text; v_attribute_name_substituted_json json; BEGIN v_attribute_name_substituted := in_attribute_name; - SELECT r.raw_attribute_id, r.data_type, r.datatype_schema - INTO v_ret.raw_attribute_id, v_ret.datatype, v_ret.datatype_schema + SELECT r.column_alias, r.raw_attribute_id, r.data_type, r.datatype_schema + INTO v_ret.name, v_ret.raw_attribute_id, v_ret.datatype, v_ret.datatype_schema FROM meta.raw_attribute r WHERE r.source_id = in_source_id AND r.column_alias = v_attribute_name_substituted; @@ -21,8 +21,8 @@ BEGIN RETURN v_ret; END IF; - SELECT enrichment_id, COALESCE(NULLIF(e.cast_datatype,''), e.datatype), datatype_schema - INTO v_ret.enrichment_id, v_ret.datatype, v_ret.datatype_schema + SELECT attribute_name, enrichment_id, e.datatype, datatype_schema + INTO v_ret.name, v_ret.enrichment_id, v_ret.datatype, v_ret.datatype_schema FROM meta.enrichment e WHERE e.source_id = in_source_id AND e.active_flag AND e.attribute_name = v_attribute_name_substituted; @@ -31,8 +31,8 @@ BEGIN RETURN v_ret; END IF; - SELECT s.system_attribute_id, s.data_type - INTO v_ret.system_attribute_id, v_ret.datatype + SELECT s.name, s.system_attribute_id, s.data_type + INTO v_ret.name, v_ret.system_attribute_id, v_ret.datatype FROM meta.system_attribute s JOIN meta.source src ON src.source_id = in_source_id AND s.refresh_type @> ARRAY[src.refresh_type] AND s.table_type @> ARRAY['hub'] diff --git a/database/code/meta/u-utility/u_output_generate_query.sql b/database/code/meta/u-utility/u_output_generate_query.sql index a0e76da..53de226 100644 --- a/database/code/meta/u-utility/u_output_generate_query.sql +++ b/database/code/meta/u-utility/u_output_generate_query.sql @@ -104,7 +104,7 @@ END IF; v_system_fields := ''; v_cte_structure := CASE WHEN v_aggregate_flag AND v_output_type = 'table' THEN ' WITH agg_cte AS( ' ELSE '' END; - SELECT string_agg(meta.u_output_query_column_select(osc, null) || ' as ' || meta.u_add_backticks_output_column(oc.name) + SELECT string_agg(meta.u_output_query_column_select(osc, null) || ' as ' || meta.u_add_backticks(oc.name) , ', ' ORDER BY oc.position) || ' ' || CASE WHEN v_output_type <> 'table' THEN '' ELSE v_system_fields END INTO v_select_statement diff --git a/database/schema/0 createTypes.sql b/database/schema/0 createTypes.sql index 5ffde1f..b268708 100644 --- a/database/schema/0 createTypes.sql +++ b/database/schema/0 createTypes.sql @@ -3,6 +3,6 @@ BEGIN IF EXISTS (SELECT 1 FROM pg_type WHERE typname = 'parameter_map') THEN DROP TYPE parameter_map CASCADE; END IF; - CREATE TYPE parameter_map AS (type text, raw_attribute_id int, enrichment_id int, system_attribute_id int, source_id int, error text, datatype text, datatype_schema jsonb); + CREATE TYPE parameter_map AS (name text, type text, raw_attribute_id int, enrichment_id int, system_attribute_id int, source_id int, error text, datatype text, datatype_schema jsonb); END$$; \ No newline at end of file diff --git a/database/schema/10 aggregateTable.sql b/database/schema/10 aggregateTable.sql index 95c2966..9ff579f 100644 --- a/database/schema/10 aggregateTable.sql +++ b/database/schema/10 aggregateTable.sql @@ -9,14 +9,17 @@ CREATE TABLE IF NOT EXISTS meta.aggregate ( TRUNCATE TABLE meta.aggregate; INSERT INTO meta.aggregate(aggregate_name, description, numeric_flag, data_type) VALUES ('any', 'Returns true if at least one value of `expr` is true.', FALSE, 'boolean'), + ('any_value', 'Returns some value of expr for a group of rows. This function is non-deterministic.', FALSE, 'default'), ('approx_count_distinct', 'Returns the estimated cardinality by HyperLogLog++', FALSE, 'long'), -('approx_percentile', 'Returns the approximate `percentile` of the numeric column `col` which is the smallest value in the ordered `col` values (sorted from least to greatest) such that no more than `percentage` of `col` values is less than the value or equal to that value.', TRUE, 'decimal'), + ('approx_percentile', 'Returns the approximate `percentile` of the numeric column `col` which is the smallest value in the ordered `col` values (sorted from least to greatest) such that no more than `percentage` of `col` values is less than the value or equal to that value.', TRUE, 'decimal'), ('avg', 'Returns the mean calculated from values of a group.', TRUE, 'double'), ('bit_or', 'Returns the bitwise OR of all non-null input values, or null if none.', FALSE, 'boolean'), ('bit_xor', 'Returns the bitwise XOR of all non-null input values, or null if none.', FALSE, 'boolean'), ('bool_and', 'Returns true if all values of `expr` are true.', FALSE, 'boolean'), ('bool_or', 'Returns true if at least one value of `expr` is true.', FALSE, 'boolean'), + ('some', 'Returns true if at least one value of `expr` is true.', FALSE, 'boolean'), ('collect_list', 'Collects and returns a list of non-unique elements', FALSE, 'array'), + ('array_agg', 'Collects and returns an array of elements', FALSE, 'array'), ('collect_set', 'Collects and returns a set of unique elements', FALSE, 'array'), ('corr', 'Returns Pearson coefficient of correlation between a set of number pairs.', TRUE, 'decimal'), ('count', 'Returns the number of rows for which the column expression is non-null.', FALSE, 'long'), @@ -33,16 +36,18 @@ VALUES ('any', 'Returns true if at least one value of `expr` is true.', FALSE, ' ('max', 'Returns the maximum value of column', FALSE, 'default'), ('max_by', 'Returns the value of `x` associated with the maximum value of `y`.', FALSE, 'default'), ('mean', 'Returns the mean calculated from values of a group', TRUE, 'double'), + ('median', 'Returns the median calculated from values of a group', TRUE, 'double'), + ('mode', 'Returns the most frequent, not NULL, value of expr in a group', FALSE, 'default'), ('min', 'Returns the minimum value of column', FALSE, 'default'), ('min_by', 'Returns the value of `x` associated with the minimum value of `y`.', FALSE, 'default'), ('percentile', 'Returns the exact percentile value of numeric column `col` at the given percentage. The value of percentage must be between 0.0 and 1.0. The value of frequency should be positive integral', TRUE, 'decimal'), ('percentile_approx', 'Returns the approximate percentile value of numeric column `col` at the given percentage. The value of percentage must be between 0.0 and 1.0. The `accuracy` parameter (default: 10000) is a positive numeric literal which controls approximation accuracy at the cost of memory. Higher value of `accuracy` yields better accuracy.', TRUE, 'decimal'), ('skewness', 'Returns the skewness value calculated from values of a group', TRUE, 'double'), - ('some', 'Returns true if at least one value of `expr` is true.', FALSE, 'boolean'), ('std', 'Returns the sample standard deviation calculated from values of a group', TRUE, 'double'), ('stddev', 'Returns the sample standard deviation calculated from values of a group', TRUE, 'double'), ('stddev_pop', 'Returns the population standard deviation calculated from values of a group.', TRUE, 'double'), ('stddev_samp', 'Returns the sample standard deviation calculated from values of a group.', TRUE, 'double'), + ('schema_of_json_agg', 'Returns the combined schema of JSON strings in a group in DDL format.', FALSE, 'string'), ('sum', 'Returns the sum calculated from values of a group.', TRUE, 'default'), ('var_pop', 'Returns the population variance calculated from values of a group', TRUE, 'double'), ('var_samp', 'Returns the sample variance calculated from values of a group.', TRUE, 'double'), diff --git a/database/schema/14 rawAttribute.sql b/database/schema/14 rawAttribute.sql index ad1b048..8daef17 100644 --- a/database/schema/14 rawAttribute.sql +++ b/database/schema/14 rawAttribute.sql @@ -17,10 +17,10 @@ CREATE TABLE IF NOT EXISTS meta.raw_attribute( CONSTRAINT raw_attribute_pkey PRIMARY KEY (raw_attribute_id), CONSTRAINT raw_attribute_source_id_fkey FOREIGN KEY (source_id) REFERENCES meta.source(source_id), CONSTRAINT raw_attribute_data_type_fkey FOREIGN KEY (data_type) REFERENCES meta.attribute_type (hive_type), - CONSTRAINT raw_attribute_raw_attribute_name_data_type UNIQUE(source_id, raw_attribute_name, data_type, datatype_schema), CONSTRAINT raw_attribute_column_alias UNIQUE(column_alias, source_id) ); CREATE INDEX IF NOT EXISTS ix_raw_attribute_source_id ON meta.raw_attribute (source_id); + diff --git a/database/schema/9 outputColumnTable.sql b/database/schema/9 outputColumnTable.sql index 00266d0..a7dd1b9 100644 --- a/database/schema/9 outputColumnTable.sql +++ b/database/schema/9 outputColumnTable.sql @@ -5,6 +5,8 @@ CREATE TABLE IF NOT EXISTS meta.output_column position integer NOT NULL, name text COLLATE pg_catalog."default" NOT NULL, datatype text COLLATE pg_catalog."default" DEFAULT 'text'::text, + datatype_schema jsonb, + complex_flag boolean, precision smallint, scale smallint, max_length smallint,