From ae8f1aa331fcb0386224c1222b7c149efc7c6433 Mon Sep 17 00:00:00 2001 From: Pat Nadolny Date: Mon, 14 Aug 2023 13:38:12 -0400 Subject: [PATCH] fix: Numbers to floats (#106) Closes https://github.com/MeltanoLabs/target-snowflake/issues/103 - Replaces https://github.com/MeltanoLabs/target-snowflake/pull/104 - This implementations is based on this comment https://github.com/MeltanoLabs/target-snowflake/issues/103#issuecomment-1668404718. Wise treats all `number` json types as floats in snowflake. - As part of this I reverted the changes from https://github.com/MeltanoLabs/target-snowflake/pull/97 since they arent needed anymore. We'll let the json schema validation enforce this vs having strict snowflake schema enforcement. Warning: I think this is a breaking change because I believe the target will now try to update the type of existing columns from NUMBER to DOUBLE which throws a `cannot change column NUM_COL from type NUMBER(38,0) to FLOAT`. @edgarrmondragon how do you think we should handle this in terms of breaking changes? Should this be the 1.0 release? --- target_snowflake/connector.py | 30 ++----------------- tests/core.py | 9 ++---- .../type_edge_cases.singer | 4 +-- 3 files changed, 7 insertions(+), 36 deletions(-) diff --git a/target_snowflake/connector.py b/target_snowflake/connector.py index 5eb69b8..469dace 100644 --- a/target_snowflake/connector.py +++ b/target_snowflake/connector.py @@ -14,8 +14,6 @@ from target_snowflake.snowflake_types import NUMBER, TIMESTAMP_NTZ, VARIANT SNOWFLAKE_MAX_STRING_LENGTH = 16777216 -SNOWFLAKE_MAX_NUMBER_PRECISION = 38 -SNOWFLAKE_MAX_NUMBER_SCALE = 0 class TypeMap: def __init__(self, operator, map_value, match_value=None): @@ -95,7 +93,7 @@ def _convert_type(self, sql_type): if isinstance(sql_type, sct.TIMESTAMP_NTZ): return TIMESTAMP_NTZ elif isinstance(sql_type, sct.NUMBER): - return NUMBER(precision=sql_type.precision, scale=sql_type.scale) + return NUMBER elif isinstance(sql_type, sct.VARIANT): return VARIANT else: @@ -220,28 +218,6 @@ def _conform_max_length(jsonschema_type): jsonschema_type["maxLength"] = SNOWFLAKE_MAX_STRING_LENGTH return jsonschema_type - @staticmethod - def _get_numeric_precision(jsonschema_type): - return SNOWFLAKE_MAX_NUMBER_PRECISION - - @staticmethod - def _get_numeric_scale(jsonschema_type): - precision = SNOWFLAKE_MAX_NUMBER_SCALE - if jsonschema_type.get("exclusiveMinimum"): - if str(jsonschema_type[attrib])[-1] == 1: - return len(str(jsonschema_type[attrib]).split(".")[1]) - 1 - else: - return len(str(jsonschema_type[attrib]).split(".")[1]) - attribs_to_check = [ - "multipleOf", - "min", - "max", - ] - for attrib in attribs_to_check: - if jsonschema_type.get(attrib): - precision = max(precision, len(str(jsonschema_type[attrib]).split(".")[1])) - return precision - @staticmethod def to_sql_type(jsonschema_type: dict) -> sqlalchemy.types.TypeEngine: """Return a JSON Schema representation of the provided type. @@ -260,8 +236,6 @@ def to_sql_type(jsonschema_type: dict) -> sqlalchemy.types.TypeEngine: # snowflake max and default varchar length # https://docs.snowflake.com/en/sql-reference/intro-summary-data-types.html maxlength = jsonschema_type.get("maxLength", SNOWFLAKE_MAX_STRING_LENGTH) - num_precision = SnowflakeConnector._get_numeric_precision(jsonschema_type) - num_scale = SnowflakeConnector._get_numeric_scale(jsonschema_type) # define type maps string_submaps = [ TypeMap(eq, TIMESTAMP_NTZ(), "date-time"), @@ -273,7 +247,7 @@ def to_sql_type(jsonschema_type: dict) -> sqlalchemy.types.TypeEngine: TypeMap(th._jsonschema_type_check, NUMBER(), ("integer",)), TypeMap(th._jsonschema_type_check, VARIANT(), ("object",)), TypeMap(th._jsonschema_type_check, VARIANT(), ("array",)), - TypeMap(th._jsonschema_type_check, NUMBER(precision=num_precision, scale=num_scale), ("number",)), + TypeMap(th._jsonschema_type_check, sct.DOUBLE(), ("number",)), ] # apply type maps if th._jsonschema_type_check(jsonschema_type, ("string",)): diff --git a/tests/core.py b/tests/core.py index be66fc3..b935c69 100644 --- a/tests/core.py +++ b/tests/core.py @@ -255,7 +255,7 @@ def validate(self) -> None: table_schema = connector.get_table(table) expected_types = { "id": sct.NUMBER, - "a1": sct.NUMBER, + "a1": sct.DOUBLE, "a2": sct.STRING, "a3": sqlalchemy.types.BOOLEAN, "a4": sct.VARIANT, @@ -435,7 +435,8 @@ def validate(self) -> None: expected_types = { "id": sct.NUMBER, "col_max_length_str": sct.STRING, - "col_multiple_of": sct.NUMBER, + "col_multiple_of": sct.DOUBLE, + "col_multiple_of_int": sct.DOUBLE, "_sdc_extracted_at": sct.TIMESTAMP_NTZ, "_sdc_batched_at": sct.TIMESTAMP_NTZ, "_sdc_received_at": sct.TIMESTAMP_NTZ, @@ -446,10 +447,6 @@ def validate(self) -> None: for column in table_schema.columns: assert column.name in expected_types isinstance(column.type, expected_types[column.name]) - if column.name == "col_multiple_of": - assert column.type.precision == 38 - assert column.type.scale == 4 - target_tests = TestSuite( kind="target", diff --git a/tests/target_test_streams/type_edge_cases.singer b/tests/target_test_streams/type_edge_cases.singer index ab42207..72d5f17 100644 --- a/tests/target_test_streams/type_edge_cases.singer +++ b/tests/target_test_streams/type_edge_cases.singer @@ -1,2 +1,2 @@ -{"type": "SCHEMA", "stream": "type_edge_cases", "key_properties": ["id"], "schema": {"required": ["id"], "type": "object", "properties": {"id": {"type": "integer"}, "col_max_length_str": {"maxLength": 4294967295, "type": [ "null", "string" ] }, "col_multiple_of": {"multipleOf": 0.0001, "type": [ "null", "number" ] }}}} -{"type": "RECORD", "stream": "type_edge_cases", "record": {"id": 1, "col_max_length_str": "foo", "col_multiple_of": 123.456}} +{"type": "SCHEMA", "stream": "type_edge_cases", "key_properties": ["id"], "schema": {"required": ["id"], "type": "object", "properties": {"id": {"type": "integer"}, "col_max_length_str": {"maxLength": 4294967295, "type": [ "null", "string" ] }, "col_multiple_of": {"multipleOf": 0.0001, "type": [ "null", "number" ] }, "col_multiple_of_int": {"multipleOf": 10, "type": [ "null", "number" ] }}}} +{"type": "RECORD", "stream": "type_edge_cases", "record": {"id": 1, "col_max_length_str": "foo", "col_multiple_of": 123.456, "col_multiple_of_int": 100}} \ No newline at end of file