Skip to content

Commit

Permalink
Fix conversion of list to dict when overriding schema exception (#68)
Browse files Browse the repository at this point in the history
* fix

* fix
  • Loading branch information
BTheunissen authored Nov 9, 2023
1 parent 340751e commit 128a446
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 70 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "shaped-target-clickhouse"
version = "0.1.3"
version = "0.1.4"
description = "`target-clickhouse` is a Singer target for clickhouse, built with the Meltano Singer SDK."
readme = "README.md"
authors = ["Ben Theunissen"]
Expand Down
4 changes: 2 additions & 2 deletions target_clickhouse/sinks.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def _validate_and_parse(self, record: dict) -> dict:
self._validator.validate(record)
except jsonschema_exceptions.ValidationError as e:
record = handle_validation_error(record, e, self.logger)
self._validator.validate(record)
return self._validate_and_parse(record)

self._parse_timestamps_in_record(
record=record,
Expand Down Expand Up @@ -186,7 +186,7 @@ def handle_validation_error(record,

# Convert the problematic value to string only if it's not null
if problem_value is not None:
if isinstance(problem_value, dict):
if isinstance(problem_value, (dict, list)):
# Convert the dict to JSON string
current_level[problem_key] = json.dumps(problem_value)
else:
Expand Down
121 changes: 54 additions & 67 deletions tests/test_validation.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import logging
from typing import Optional

from jsonschema import Draft7Validator, ValidationError

from target_clickhouse.sinks import handle_validation_error

# Schema that allows a field to be either a string or null
schema = {
"type": "object",
Expand All @@ -20,12 +21,7 @@
"name": {"type": ["string", "null"]},
"age": {"type": "number"},
"address": {
"type": "object",
"properties": {
"street": {"type": "string"},
"city": {"type": "string"},
},
"required": ["street", "city"],
"type": ["string", "null"],
},
},
"required": ["name", "age", "address"],
Expand All @@ -35,36 +31,7 @@
# Validator instance
validator = Draft7Validator(schema)

# Function to handle validation errors
def handle_validation_error(record,
e: ValidationError,
logger: Optional[logging.Logger] = None):
if "'string'" in e.message:
if logger:
logger.warning(
f"Received non valid record for types 'string', {e.path}, "
f"attempting conversion for record, {record}",
)


key_path = list(e.path)

# Access the problematic value using the key_path
current_level = record
for key in key_path[:-1]: # Go to parent of the problematic key
current_level = current_level[key]

problem_key = key_path[-1]
problem_value = current_level[problem_key]

# Convert the problematic value to string only if it's not null.
if problem_value is not None:
current_level[problem_key] = str(problem_value)
if logger:
logger.warning("Validating converted record")
return record
return None
return None
nested_validator = Draft7Validator(nested_schema)

# Set up the logger
logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -130,33 +97,53 @@ def test_nested_dict_with_nested_non_string():
), "The 'city' should have been converted to a string."
validator.validate(updated_record) # This should not raise an error

def test_single_level_schema_nested_dict_to_string():
record = {"name": {"first": "John", "last": "Doe"}, "age": 30}
try:
validator.validate(record)
except ValidationError as e:
updated_record = handle_validation_error(record, e, logger)
assert (
isinstance(updated_record["name"], str)
), "The 'name' should have been converted to a JSON string."
assert (
json.loads(updated_record["name"]) == {"first": "John", "last": "Doe"}
), "The JSON string is not correct."

def test_single_level_schema_deeply_nested_dict_to_string():
record = {"name":
{"first": "John", "last": "Doe",
"nicknames": {"short": "JD", "long": "Johnny"},
},
"age": 30,
}
try:
validator.validate(record)
except ValidationError as e:
updated_record = handle_validation_error(record, e, logger)
assert (
isinstance(updated_record["name"], str)
), "The 'name' field should have been converted to a JSON string."
assert (
"nicknames" in json.loads(updated_record["name"])
), "The JSON string does not correctly represent the nested dict."
def test_single_level_schema_nested_dict_to_string():
record = {"name": {"first": "John", "last": "Doe"}, "age": 30}
try:
nested_validator.validate(record)
except ValidationError as e:
updated_record = handle_validation_error(record, e, logger)
assert (
isinstance(updated_record["name"], str)
), "The 'name' should have been converted to a JSON string."
assert (
json.loads(updated_record["name"]) == {"first": "John", "last": "Doe"}
), "The JSON string is not correct."

def test_single_level_schema_deeply_nested_dict_to_string():
record = {
"name": "John",
"age": 30,
"address": {"street": "Main", "city": {"name": "New York"}},
}
try:
nested_validator.validate(record)
except ValidationError as e:
updated_record = handle_validation_error(record, e, logger)
assert (
isinstance(updated_record["address"], str)
), "The 'address' field should have been converted to a JSON string."
assert (
"street" in json.loads(updated_record["address"])
), "The JSON string does not correctly represent the nested dict."

def test_single_level_schema_deeply_nested_list_of_dicts_to_string():
record = {
"name": "John",
"age": 30,
"address": [
{"street": "Main", "city": {"name": "New York"}},
{"street": "Second", "city": {"name": "Los Angeles"}},
],
}
address_str = json.dumps(record["address"])
try:
nested_validator.validate(record)
except ValidationError as e:
updated_record = handle_validation_error(record, e, logger)
assert (
isinstance(updated_record["address"], str)
), "The 'address' field should have been converted to a JSON string."
assert (
updated_record["address"] == address_str
), "The JSON string does not correctly represent the nested list of dicts."

0 comments on commit 128a446

Please sign in to comment.