From 46abd300e8ad769e681c7643cefb3a0df069672d Mon Sep 17 00:00:00 2001 From: Wyatt Cannon Date: Fri, 22 Mar 2024 15:56:57 -0700 Subject: [PATCH 1/5] Added create_valid_type to fix issues with JSON Schema serializing DataFrame type --- src/hayhooks/server/pipelines/models.py | 11 +++---- .../server/utils/create_valid_type.py | 32 +++++++++++++++++++ 2 files changed, 37 insertions(+), 6 deletions(-) create mode 100644 src/hayhooks/server/utils/create_valid_type.py diff --git a/src/hayhooks/server/pipelines/models.py b/src/hayhooks/server/pipelines/models.py index 816d363..b3f8e81 100644 --- a/src/hayhooks/server/pipelines/models.py +++ b/src/hayhooks/server/pipelines/models.py @@ -1,7 +1,9 @@ -from typing import get_args, get_origin, List - +from typing import get_args, get_origin, List, get_type_hints +from hayhooks.server.utils.create_valid_type import create_valid_pydantic_model, create_valid_type +from pandas import DataFrame from pydantic import BaseModel, create_model, ConfigDict from haystack.dataclasses import Document +import json class HaystackDocument(BaseModel): @@ -54,10 +56,7 @@ def get_response_model(pipeline_name: str, pipeline_outputs): component_model = {} for name, typedef in outputs.items(): output_type = typedef["type"] - if get_origin(output_type) == list and get_args(output_type)[0] == Document: - component_model[name] = (List[HaystackDocument], ...) - else: - component_model[name] = (typedef["type"], ...) + component_model[name] = (create_valid_type(output_type, { DataFrame: List}), ...) response_model[component_name] = (create_model('ComponentParams', **component_model, __config__=config), ...) return create_model(f'{pipeline_name.capitalize()}RunResponse', **response_model, __config__=config) diff --git a/src/hayhooks/server/utils/create_valid_type.py b/src/hayhooks/server/utils/create_valid_type.py new file mode 100644 index 0000000..e985e8a --- /dev/null +++ b/src/hayhooks/server/utils/create_valid_type.py @@ -0,0 +1,32 @@ +from typing import get_type_hints, Dict, get_origin, get_args +from typing_extensions import TypedDict +from types import GenericAlias +from inspect import isclass + +def create_valid_type(typed_object:type, invalid_types:Dict[type, type]): + """ + Returns a new type specification, replacing invalid_types in typed_object. + example: replace_invalid_types(ExtractedAnswer, {DataFrame: List}]) returns + a TypedDict with DataFrame types replaced with List + """ + def validate_type(v): + child_typing = [] + for t in get_args(v): + if t in invalid_types: + result = invalid_types[t] + elif isclass(t): + result = create_valid_type(t, invalid_types) + else: result = t + child_typing.append(result) + return GenericAlias(get_origin(v), tuple(child_typing)) + if isclass(typed_object): + new_typing = {} + for k, v in get_type_hints(typed_object).items(): + if(get_args(v) != ()): + new_typing[k] = validate_type(v) + else: new_typing[k] = v + if new_typing == {}: + return typed_object + else: return TypedDict(typed_object.__name__, new_typing) + else: + return validate_type(typed_object) \ No newline at end of file From 79e1b00c14c95c4a28c36609b8d68e686e8429ad Mon Sep 17 00:00:00 2001 From: Wyatt Cannon Date: Mon, 25 Mar 2024 11:16:17 -0700 Subject: [PATCH 2/5] Added type validator --- src/hayhooks/server/pipelines/models.py | 15 +++--- .../server/utils/create_valid_type.py | 52 +++++++++---------- 2 files changed, 32 insertions(+), 35 deletions(-) diff --git a/src/hayhooks/server/pipelines/models.py b/src/hayhooks/server/pipelines/models.py index b3f8e81..04e591a 100644 --- a/src/hayhooks/server/pipelines/models.py +++ b/src/hayhooks/server/pipelines/models.py @@ -1,5 +1,5 @@ from typing import get_args, get_origin, List, get_type_hints -from hayhooks.server.utils.create_valid_type import create_valid_pydantic_model, create_valid_type +from hayhooks.server.utils.create_valid_type import create_valid_type from pandas import DataFrame from pydantic import BaseModel, create_model, ConfigDict from haystack.dataclasses import Document @@ -56,7 +56,7 @@ def get_response_model(pipeline_name: str, pipeline_outputs): component_model = {} for name, typedef in outputs.items(): output_type = typedef["type"] - component_model[name] = (create_valid_type(output_type, { DataFrame: List}), ...) + component_model[name] = (create_valid_type(output_type, { DataFrame: dict}), ...) response_model[component_name] = (create_model('ComponentParams', **component_model, __config__=config), ...) return create_model(f'{pipeline_name.capitalize()}RunResponse', **response_model, __config__=config) @@ -79,13 +79,10 @@ def convert_component_output(component_output): result[output_name] = data # Output contains a list of Document - if type(data) is list and type(data[0]) is Document: - result[output_name] = [HaystackDocument(id=d.id, content=d.content) for d in data] - # Output is a single Document - elif type(data) is Document: - result[output_name] = HaystackDocument(id=data.id, content=data.content or "") - # Any other type: do nothing + if type(data) is list: + result[output_name] = [d.to_dict() for d in data] else: - result[output_name] = data + result[output_name] = data.to_dict() + # Output is a single Document return result diff --git a/src/hayhooks/server/utils/create_valid_type.py b/src/hayhooks/server/utils/create_valid_type.py index e985e8a..26815b5 100644 --- a/src/hayhooks/server/utils/create_valid_type.py +++ b/src/hayhooks/server/utils/create_valid_type.py @@ -4,29 +4,29 @@ from inspect import isclass def create_valid_type(typed_object:type, invalid_types:Dict[type, type]): - """ - Returns a new type specification, replacing invalid_types in typed_object. - example: replace_invalid_types(ExtractedAnswer, {DataFrame: List}]) returns - a TypedDict with DataFrame types replaced with List - """ - def validate_type(v): - child_typing = [] - for t in get_args(v): - if t in invalid_types: - result = invalid_types[t] - elif isclass(t): - result = create_valid_type(t, invalid_types) - else: result = t - child_typing.append(result) - return GenericAlias(get_origin(v), tuple(child_typing)) - if isclass(typed_object): - new_typing = {} - for k, v in get_type_hints(typed_object).items(): - if(get_args(v) != ()): - new_typing[k] = validate_type(v) - else: new_typing[k] = v - if new_typing == {}: - return typed_object - else: return TypedDict(typed_object.__name__, new_typing) - else: - return validate_type(typed_object) \ No newline at end of file + """ + Returns a new type specification, replacing invalid_types in typed_object. + example: replace_invalid_types(ExtractedAnswer, {DataFrame: List}]) returns + a TypedDict with DataFrame types replaced with List + """ + def validate_type(v): + child_typing = [] + for t in get_args(v): + if t in invalid_types: + result = invalid_types[t] + elif isclass(t): + result = create_valid_type(t, invalid_types) + else: result = t + child_typing.append(result) + return GenericAlias(get_origin(v), tuple(child_typing)) + if isclass(typed_object): + new_typing = {} + for k, v in get_type_hints(typed_object).items(): + if(get_args(v) != ()): + new_typing[k] = validate_type(v) + else: new_typing[k] = v + if new_typing == {}: + return typed_object + else: return TypedDict(typed_object.__name__, new_typing) + else: + return validate_type(typed_object) \ No newline at end of file From 1f081bbf0897c48c00f7b87d25c39f50e53db952 Mon Sep 17 00:00:00 2001 From: Wyatt Cannon Date: Mon, 25 Mar 2024 12:09:28 -0700 Subject: [PATCH 3/5] Fixed convert_component_output to provide a dict, matching expected output from get_response_model --- src/hayhooks/server/pipelines/models.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/hayhooks/server/pipelines/models.py b/src/hayhooks/server/pipelines/models.py index 04e591a..7249aab 100644 --- a/src/hayhooks/server/pipelines/models.py +++ b/src/hayhooks/server/pipelines/models.py @@ -2,7 +2,7 @@ from hayhooks.server.utils.create_valid_type import create_valid_type from pandas import DataFrame from pydantic import BaseModel, create_model, ConfigDict -from haystack.dataclasses import Document +from haystack.dataclasses import Document, ExtractedAnswer import json @@ -10,7 +10,6 @@ class HaystackDocument(BaseModel): id: str content: str - class PipelineDefinition(BaseModel): name: str source_code: str @@ -51,7 +50,6 @@ def get_response_model(pipeline_name: str, pipeline_outputs): """ response_model = {} config = ConfigDict(arbitrary_types_allowed=True) - for component_name, outputs in pipeline_outputs.items(): component_model = {} for name, typedef in outputs.items(): @@ -77,12 +75,10 @@ def convert_component_output(component_output): # Empty containers, None values, empty strings and the likes: do nothing if not data: result[output_name] = data - + get_value = lambda data: data.to_dict()["init_parameters"] if hasattr(data, "to_dict") else data # Output contains a list of Document if type(data) is list: - result[output_name] = [d.to_dict() for d in data] + result[output_name] = [get_value(d) for d in data] else: - result[output_name] = data.to_dict() - # Output is a single Document - + result[output_name] = get_value(data) return result From 49b98b2190f864931c03fd59380a8e8c26a3d88a Mon Sep 17 00:00:00 2001 From: Wyatt Cannon Date: Mon, 25 Mar 2024 12:36:53 -0700 Subject: [PATCH 4/5] Removed extra imports and fixed formatting --- src/hayhooks/server/pipelines/models.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/hayhooks/server/pipelines/models.py b/src/hayhooks/server/pipelines/models.py index 7249aab..8dc3463 100644 --- a/src/hayhooks/server/pipelines/models.py +++ b/src/hayhooks/server/pipelines/models.py @@ -1,15 +1,8 @@ -from typing import get_args, get_origin, List, get_type_hints from hayhooks.server.utils.create_valid_type import create_valid_type from pandas import DataFrame from pydantic import BaseModel, create_model, ConfigDict -from haystack.dataclasses import Document, ExtractedAnswer -import json -class HaystackDocument(BaseModel): - id: str - content: str - class PipelineDefinition(BaseModel): name: str source_code: str @@ -62,21 +55,18 @@ def get_response_model(pipeline_name: str, pipeline_outputs): def convert_component_output(component_output): """ + Converts outputs from a component as a dict so that it can be validated against response model + Component output has this form: "documents":[ {"id":"818170...", "content":"RapidAPI for Mac is a full-featured HTTP client."} ] - We inspect the output and convert haystack.Document into the HaystackDocument pydantic model as needed """ result = {} for output_name, data in component_output.items(): - # Empty containers, None values, empty strings and the likes: do nothing - if not data: - result[output_name] = data get_value = lambda data: data.to_dict()["init_parameters"] if hasattr(data, "to_dict") else data - # Output contains a list of Document if type(data) is list: result[output_name] = [get_value(d) for d in data] else: From 96a27e1ef6f88adc1fe6742a5c8e964b3abd202f Mon Sep 17 00:00:00 2001 From: Silvano Cerza Date: Thu, 9 May 2024 17:23:34 +0200 Subject: [PATCH 5/5] Add some docs, enhance naming and handle Pipeline inputs too --- src/hayhooks/server/pipelines/models.py | 22 ++++--- .../server/utils/create_valid_type.py | 63 +++++++++++-------- 2 files changed, 50 insertions(+), 35 deletions(-) diff --git a/src/hayhooks/server/pipelines/models.py b/src/hayhooks/server/pipelines/models.py index 8dc3463..f06f9a8 100644 --- a/src/hayhooks/server/pipelines/models.py +++ b/src/hayhooks/server/pipelines/models.py @@ -1,6 +1,7 @@ -from hayhooks.server.utils.create_valid_type import create_valid_type from pandas import DataFrame -from pydantic import BaseModel, create_model, ConfigDict +from pydantic import BaseModel, ConfigDict, create_model + +from hayhooks.server.utils.create_valid_type import handle_unsupported_types class PipelineDefinition(BaseModel): @@ -23,13 +24,16 @@ def get_request_model(pipeline_name: str, pipeline_inputs): config = ConfigDict(arbitrary_types_allowed=True) for component_name, inputs in pipeline_inputs.items(): - component_model = {} for name, typedef in inputs.items(): - component_model[name] = (typedef["type"], typedef.get("default_value", ...)) - request_model[component_name] = (create_model('ComponentParams', **component_model, __config__=config), ...) + input_type = handle_unsupported_types(typedef["type"], {DataFrame: dict}) + component_model[name] = ( + input_type, + typedef.get("default_value", ...), + ) + request_model[component_name] = (create_model("ComponentParams", **component_model, __config__=config), ...) - return create_model(f'{pipeline_name.capitalize()}RunRequest', **request_model, __config__=config) + return create_model(f"{pipeline_name.capitalize()}RunRequest", **request_model, __config__=config) def get_response_model(pipeline_name: str, pipeline_outputs): @@ -47,10 +51,10 @@ def get_response_model(pipeline_name: str, pipeline_outputs): component_model = {} for name, typedef in outputs.items(): output_type = typedef["type"] - component_model[name] = (create_valid_type(output_type, { DataFrame: dict}), ...) - response_model[component_name] = (create_model('ComponentParams', **component_model, __config__=config), ...) + component_model[name] = (handle_unsupported_types(output_type, {DataFrame: dict}), ...) + response_model[component_name] = (create_model("ComponentParams", **component_model, __config__=config), ...) - return create_model(f'{pipeline_name.capitalize()}RunResponse', **response_model, __config__=config) + return create_model(f"{pipeline_name.capitalize()}RunResponse", **response_model, __config__=config) def convert_component_output(component_output): diff --git a/src/hayhooks/server/utils/create_valid_type.py b/src/hayhooks/server/utils/create_valid_type.py index 26815b5..95307f7 100644 --- a/src/hayhooks/server/utils/create_valid_type.py +++ b/src/hayhooks/server/utils/create_valid_type.py @@ -1,32 +1,43 @@ -from typing import get_type_hints, Dict, get_origin, get_args -from typing_extensions import TypedDict -from types import GenericAlias from inspect import isclass +from types import GenericAlias +from typing import Dict, Union, get_args, get_origin, get_type_hints + +from typing_extensions import TypedDict + -def create_valid_type(typed_object:type, invalid_types:Dict[type, type]): - """ - Returns a new type specification, replacing invalid_types in typed_object. - example: replace_invalid_types(ExtractedAnswer, {DataFrame: List}]) returns - a TypedDict with DataFrame types replaced with List +def handle_unsupported_types(type_: type, types_mapping: Dict[type, type]) -> Union[GenericAlias, type]: """ - def validate_type(v): + Recursively handle types that are not supported by Pydantic by replacing them with the given types mapping. + + :param type_: Type to replace if not supported + :param types_mapping: Mapping of types to replace + """ + + def _handle_generics(t_) -> GenericAlias: + """ + Handle generics recursively + """ child_typing = [] - for t in get_args(v): - if t in invalid_types: - result = invalid_types[t] + for t in get_args(t_): + if t in types_mapping: + result = types_mapping[t] elif isclass(t): - result = create_valid_type(t, invalid_types) - else: result = t + result = handle_unsupported_types(t, types_mapping) + else: + result = t child_typing.append(result) - return GenericAlias(get_origin(v), tuple(child_typing)) - if isclass(typed_object): - new_typing = {} - for k, v in get_type_hints(typed_object).items(): - if(get_args(v) != ()): - new_typing[k] = validate_type(v) - else: new_typing[k] = v - if new_typing == {}: - return typed_object - else: return TypedDict(typed_object.__name__, new_typing) - else: - return validate_type(typed_object) \ No newline at end of file + return GenericAlias(get_origin(t_), tuple(child_typing)) + + if isclass(type_): + new_type = {} + for arg_name, arg_type in get_type_hints(type_).items(): + if get_args(arg_type): + new_type[arg_name] = _handle_generics(arg_type) + else: + new_type[arg_name] = arg_type + if new_type: + return TypedDict(type_.__name__, new_type) + + return type_ + + return _handle_generics(type_)