From aded694d3358f89e06be42becbb05a6d1ba002a8 Mon Sep 17 00:00:00 2001 From: octavia-squidington-iii Date: Sun, 10 Nov 2024 04:30:26 +0000 Subject: [PATCH] Auto-fix lint and format issues --- airbyte_cdk/connector.py | 6 +- airbyte_cdk/connector_builder/main.py | 4 +- airbyte_cdk/destinations/destination.py | 1 - .../destinations/vector_db_based/embedder.py | 23 +- airbyte_cdk/models/airbyte_protocol.py | 6 +- .../sources/connector_state_manager.py | 3 +- .../concurrent_declarative_source.py | 2 - .../incremental/datetime_based_cursor.py | 4 +- .../models/declarative_component_schema.py | 1356 ++++++++--------- .../parsers/model_to_component_factory.py | 7 +- .../substream_partition_router.py | 5 +- .../error_handlers/default_error_handler.py | 2 - .../default_http_response_filter.py | 2 - .../error_handlers/http_response_filter.py | 10 +- .../declarative/requesters/http_requester.py | 1 - ...datetime_based_request_options_provider.py | 4 +- ...erpolated_nested_request_input_provider.py | 1 - .../interpolated_request_input_provider.py | 1 - .../declarative/retrievers/async_retriever.py | 1 - ...efault_file_based_availability_strategy.py | 4 +- .../file_based/config/unstructured_format.py | 5 +- .../abstract_discovery_policy.py | 6 +- .../file_based/file_types/jsonl_parser.py | 1 - .../file_based/file_types/parquet_parser.py | 1 - .../stream/abstract_file_based_stream.py | 3 +- .../abstract_concurrent_file_based_cursor.py | 33 +- .../stream/default_file_based_stream.py | 1 - airbyte_cdk/sources/source.py | 6 +- .../sources/streams/concurrent/cursor.py | 3 +- .../abstract_stream_state_converter.py | 3 +- .../datetime_stream_state_converter.py | 12 +- .../sources/streams/http/exceptions.py | 1 - airbyte_cdk/sources/streams/http/http.py | 1 - .../sources/streams/http/http_client.py | 6 +- airbyte_cdk/test/catalog_builder.py | 6 +- airbyte_cdk/utils/message_utils.py | 3 +- airbyte_cdk/utils/traced_exception.py | 4 +- bin/generate_component_manifest_files.py | 3 +- unit_tests/conftest.py | 4 +- .../test_connector_builder_handler.py | 14 +- unit_tests/destinations/test_destination.py | 7 +- .../test_concurrent_source_adapter.py | 4 +- .../declarative/async_job/test_integration.py | 17 +- .../async_job/test_job_orchestrator.py | 55 +- .../test_concurrency_level.py | 23 +- .../test_pagination_decoder_decorator.py | 10 +- .../declarative/decoders/test_xml_decoder.py | 39 +- .../extractors/test_record_filter.py | 211 ++- .../test_response_to_file_extractor.py | 2 +- .../test_model_to_component_factory.py | 64 +- .../test_parent_state_stream.py | 10 +- .../test_default_http_response_filter.py | 3 - .../paginators/test_default_paginator.py | 20 +- ...datetime_based_request_options_provider.py | 9 +- ...t_interpolated_request_options_provider.py | 20 +- .../requesters/test_http_job_repository.py | 28 +- .../test_concurrent_declarative_source.py | 697 ++++----- .../file_based/in_memory_files_source.py | 1 - .../file_based/scenarios/csv_scenarios.py | 26 +- .../stream/concurrent/test_adapters.py | 10 +- .../stream/test_default_file_based_stream.py | 4 +- unit_tests/sources/message/test_repository.py | 1 - .../mock_server_tests/mock_source_fixture.py | 1 - .../scenarios/stream_facade_builder.py | 14 +- ...hread_based_concurrent_stream_scenarios.py | 79 +- .../streams/concurrent/test_adapters.py | 7 +- .../sources/streams/concurrent/test_cursor.py | 140 +- .../test_default_backoff_strategy.py | 1 - .../test_http_status_error_handler.py | 6 - .../test_json_error_message_parser.py | 3 +- unit_tests/sources/streams/http/test_http.py | 4 - unit_tests/test/test_entrypoint_wrapper.py | 6 +- unit_tests/test_entrypoint.py | 7 +- unit_tests/utils/test_traced_exception.py | 8 +- 74 files changed, 1481 insertions(+), 1615 deletions(-) diff --git a/airbyte_cdk/connector.py b/airbyte_cdk/connector.py index 658a0b16..299f814e 100644 --- a/airbyte_cdk/connector.py +++ b/airbyte_cdk/connector.py @@ -96,8 +96,7 @@ def check(self, logger: logging.Logger, config: TConfig) -> AirbyteConnectionSta class _WriteConfigProtocol(Protocol): @staticmethod - def write_config(config: Mapping[str, Any], config_path: str) -> None: - ... + def write_config(config: Mapping[str, Any], config_path: str) -> None: ... class DefaultConnectorMixin: @@ -108,5 +107,4 @@ def configure(self: _WriteConfigProtocol, config: Mapping[str, Any], temp_dir: s return config -class Connector(DefaultConnectorMixin, BaseConnector[Mapping[str, Any]], ABC): - ... +class Connector(DefaultConnectorMixin, BaseConnector[Mapping[str, Any]], ABC): ... diff --git a/airbyte_cdk/connector_builder/main.py b/airbyte_cdk/connector_builder/main.py index 54e0b1e0..9e6fe188 100644 --- a/airbyte_cdk/connector_builder/main.py +++ b/airbyte_cdk/connector_builder/main.py @@ -74,7 +74,9 @@ def handle_request(args: List[str]) -> str: command, config, catalog, state = get_config_and_catalog_from_args(args) limits = get_limits(config) source = create_source(config, limits) - return orjson.dumps(AirbyteMessageSerializer.dump(handle_connector_builder_request(source, command, config, catalog, state, limits))).decode() # type: ignore[no-any-return] # Serializer.dump() always returns AirbyteMessage + return orjson.dumps( + AirbyteMessageSerializer.dump(handle_connector_builder_request(source, command, config, catalog, state, limits)) + ).decode() # type: ignore[no-any-return] # Serializer.dump() always returns AirbyteMessage if __name__ == "__main__": diff --git a/airbyte_cdk/destinations/destination.py b/airbyte_cdk/destinations/destination.py index 336a54a9..308fb2e2 100644 --- a/airbyte_cdk/destinations/destination.py +++ b/airbyte_cdk/destinations/destination.py @@ -85,7 +85,6 @@ def parse_args(self, args: List[str]) -> argparse.Namespace: return parsed_args def run_cmd(self, parsed_args: argparse.Namespace) -> Iterable[AirbyteMessage]: - cmd = parsed_args.command if cmd not in self.VALID_CMDS: raise Exception(f"Unrecognized command: {cmd}") diff --git a/airbyte_cdk/destinations/vector_db_based/embedder.py b/airbyte_cdk/destinations/vector_db_based/embedder.py index 7fb880fa..c18592f3 100644 --- a/airbyte_cdk/destinations/vector_db_based/embedder.py +++ b/airbyte_cdk/destinations/vector_db_based/embedder.py @@ -109,7 +109,19 @@ def __init__(self, config: OpenAIEmbeddingConfigModel, chunk_size: int): class AzureOpenAIEmbedder(BaseOpenAIEmbedder): def __init__(self, config: AzureOpenAIEmbeddingConfigModel, chunk_size: int): # Azure OpenAI API has — as of 20230927 — a limit of 16 documents per request - super().__init__(OpenAIEmbeddings(openai_api_key=config.openai_key, chunk_size=16, max_retries=15, openai_api_type="azure", openai_api_version="2023-05-15", openai_api_base=config.api_base, deployment=config.deployment, disallowed_special=()), chunk_size) # type: ignore + super().__init__( + OpenAIEmbeddings( + openai_api_key=config.openai_key, + chunk_size=16, + max_retries=15, + openai_api_type="azure", + openai_api_version="2023-05-15", + openai_api_base=config.api_base, + deployment=config.deployment, + disallowed_special=(), + ), + chunk_size, + ) # type: ignore COHERE_VECTOR_SIZE = 1024 @@ -167,7 +179,13 @@ def __init__(self, config: OpenAICompatibleEmbeddingConfigModel): self.config = config # Client is set internally # Always set an API key even if there is none defined in the config because the validator will fail otherwise. Embedding APIs that don't require an API key don't fail if one is provided, so this is not breaking usage. - self.embeddings = LocalAIEmbeddings(model=config.model_name, openai_api_key=config.api_key or "dummy-api-key", openai_api_base=config.base_url, max_retries=15, disallowed_special=()) # type: ignore + self.embeddings = LocalAIEmbeddings( + model=config.model_name, + openai_api_key=config.api_key or "dummy-api-key", + openai_api_base=config.base_url, + max_retries=15, + disallowed_special=(), + ) # type: ignore def check(self) -> Optional[str]: deployment_mode = os.environ.get("DEPLOYMENT_MODE", "") @@ -254,7 +272,6 @@ def create_from_config( ], processing_config: ProcessingConfigModel, ) -> Embedder: - if embedding_config.mode == "azure_openai" or embedding_config.mode == "openai": return cast(Embedder, embedder_map[embedding_config.mode](embedding_config, processing_config.chunk_size)) else: diff --git a/airbyte_cdk/models/airbyte_protocol.py b/airbyte_cdk/models/airbyte_protocol.py index 6c0cdbb1..7f12da5b 100644 --- a/airbyte_cdk/models/airbyte_protocol.py +++ b/airbyte_cdk/models/airbyte_protocol.py @@ -62,9 +62,9 @@ class AirbyteGlobalState: class AirbyteStateMessage: type: Optional[AirbyteStateType] = None # type: ignore [name-defined] stream: Optional[AirbyteStreamState] = None - global_: Annotated[ - AirbyteGlobalState | None, Alias("global") - ] = None # "global" is a reserved keyword in python ⇒ Alias is used for (de-)serialization + global_: Annotated[AirbyteGlobalState | None, Alias("global")] = ( + None # "global" is a reserved keyword in python ⇒ Alias is used for (de-)serialization + ) data: Optional[Dict[str, Any]] = None sourceStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined] destinationStats: Optional[AirbyteStateStats] = None # type: ignore [name-defined] diff --git a/airbyte_cdk/sources/connector_state_manager.py b/airbyte_cdk/sources/connector_state_manager.py index 547f4bb2..2396029d 100644 --- a/airbyte_cdk/sources/connector_state_manager.py +++ b/airbyte_cdk/sources/connector_state_manager.py @@ -113,7 +113,8 @@ def _extract_from_state_message( else: streams = { HashableStreamDescriptor( - name=per_stream_state.stream.stream_descriptor.name, namespace=per_stream_state.stream.stream_descriptor.namespace # type: ignore[union-attr] # stream has stream_descriptor + name=per_stream_state.stream.stream_descriptor.name, + namespace=per_stream_state.stream.stream_descriptor.namespace, # type: ignore[union-attr] # stream has stream_descriptor ): per_stream_state.stream.stream_state # type: ignore[union-attr] # stream has stream_state for per_stream_state in state if per_stream_state.type == AirbyteStateType.STREAM and hasattr(per_stream_state, "stream") # type: ignore # state is always a list of AirbyteStateMessage if is_per_stream is True diff --git a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py index d6bdf505..8c8239ba 100644 --- a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py +++ b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py @@ -31,7 +31,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): - # By default, we defer to a value of 1 which represents running a connector using the Concurrent CDK engine on only one thread. SINGLE_THREADED_CONCURRENCY_LEVEL = 1 @@ -99,7 +98,6 @@ def read( catalog: ConfiguredAirbyteCatalog, state: Optional[Union[List[AirbyteStateMessage]]] = None, ) -> Iterator[AirbyteMessage]: - # ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of the concurrent # streams must be saved so that they can be removed from the catalog before starting synchronous streams if self._concurrent_streams: diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index 6505260c..e58d2256 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -317,7 +317,9 @@ def _get_request_options(self, option_type: RequestOptionType, stream_slice: Opt self._partition_field_start.eval(self.config) ) if self.end_time_option and self.end_time_option.inject_into == option_type: - options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(self._partition_field_end.eval(self.config)) # type: ignore # field_name is always casted to an interpolated string + options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( + self._partition_field_end.eval(self.config) + ) # type: ignore # field_name is always casted to an interpolated string return options def should_be_synced(self, record: Record) -> bool: diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index eeb773b2..75f34878 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -11,314 +11,314 @@ class AuthFlowType(Enum): - oauth2_0 = 'oauth2.0' - oauth1_0 = 'oauth1.0' + oauth2_0 = "oauth2.0" + oauth1_0 = "oauth1.0" class BasicHttpAuthenticator(BaseModel): - type: Literal['BasicHttpAuthenticator'] + type: Literal["BasicHttpAuthenticator"] username: str = Field( ..., - description='The username that will be combined with the password, base64 encoded and used to make requests. Fill it in the user inputs.', + description="The username that will be combined with the password, base64 encoded and used to make requests. Fill it in the user inputs.", examples=["{{ config['username'] }}", "{{ config['api_key'] }}"], - title='Username', + title="Username", ) password: Optional[str] = Field( - '', - description='The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.', - examples=["{{ config['password'] }}", ''], - title='Password', + "", + description="The password that will be combined with the username, base64 encoded and used to make requests. Fill it in the user inputs.", + examples=["{{ config['password'] }}", ""], + title="Password", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class BearerAuthenticator(BaseModel): - type: Literal['BearerAuthenticator'] + type: Literal["BearerAuthenticator"] api_token: str = Field( ..., - description='Token to inject as request header for authenticating with the API.', + description="Token to inject as request header for authenticating with the API.", examples=["{{ config['api_key'] }}", "{{ config['token'] }}"], - title='Bearer Token', + title="Bearer Token", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CheckStream(BaseModel): - type: Literal['CheckStream'] + type: Literal["CheckStream"] stream_names: List[str] = Field( ..., - description='Names of the streams to try reading from when running a check operation.', - examples=[['users'], ['users', 'contacts']], - title='Stream Names', + description="Names of the streams to try reading from when running a check operation.", + examples=[["users"], ["users", "contacts"]], + title="Stream Names", ) class ConcurrencyLevel(BaseModel): - type: Optional[Literal['ConcurrencyLevel']] = None + type: Optional[Literal["ConcurrencyLevel"]] = None default_concurrency: Union[int, str] = Field( ..., - description='The amount of concurrency that will applied during a sync. This value can be hardcoded or user-defined in the config if different users have varying volume thresholds in the target API.', + description="The amount of concurrency that will applied during a sync. This value can be hardcoded or user-defined in the config if different users have varying volume thresholds in the target API.", examples=[10, "{{ config['num_workers'] or 10 }}"], - title='Default Concurrency', + title="Default Concurrency", ) max_concurrency: Optional[int] = Field( None, - description='The maximum level of concurrency that will be used during a sync. This becomes a required field when the default_concurrency derives from the config, because it serves as a safeguard against a user-defined threshold that is too high.', + description="The maximum level of concurrency that will be used during a sync. This becomes a required field when the default_concurrency derives from the config, because it serves as a safeguard against a user-defined threshold that is too high.", examples=[20, 100], - title='Max Concurrency', + title="Max Concurrency", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ConstantBackoffStrategy(BaseModel): - type: Literal['ConstantBackoffStrategy'] + type: Literal["ConstantBackoffStrategy"] backoff_time_in_seconds: Union[float, str] = Field( ..., - description='Backoff time in seconds.', + description="Backoff time in seconds.", examples=[30, 30.5, "{{ config['backoff_time'] }}"], - title='Backoff Time', + title="Backoff Time", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CursorPagination(BaseModel): - type: Literal['CursorPagination'] + type: Literal["CursorPagination"] cursor_value: str = Field( ..., - description='Value of the cursor defining the next page to fetch.', + description="Value of the cursor defining the next page to fetch.", examples=[ - '{{ headers.link.next.cursor }}', + "{{ headers.link.next.cursor }}", "{{ last_record['key'] }}", "{{ response['nextPage'] }}", ], - title='Cursor Value', + title="Cursor Value", ) page_size: Optional[int] = Field( None, - description='The number of records to include in each pages.', + description="The number of records to include in each pages.", examples=[100], - title='Page Size', + title="Page Size", ) stop_condition: Optional[str] = Field( None, - description='Template string evaluating when to stop paginating.', + description="Template string evaluating when to stop paginating.", examples=[ - '{{ response.data.has_more is false }}', + "{{ response.data.has_more is false }}", "{{ 'next' not in headers['link'] }}", ], - title='Stop Condition', + title="Stop Condition", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomAuthenticator(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomAuthenticator'] + type: Literal["CustomAuthenticator"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom authentication strategy. Has to be a sub class of DeclarativeAuthenticator. The format is `source_..`.', - examples=['source_railz.components.ShortLivedTokenAuthenticator'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom authentication strategy. Has to be a sub class of DeclarativeAuthenticator. The format is `source_..`.", + examples=["source_railz.components.ShortLivedTokenAuthenticator"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomBackoffStrategy(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomBackoffStrategy'] + type: Literal["CustomBackoffStrategy"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom backoff strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomBackoffStrategy'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom backoff strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomBackoffStrategy"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomErrorHandler(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomErrorHandler'] + type: Literal["CustomErrorHandler"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom error handler. The format is `source_..`.', - examples=['source_railz.components.MyCustomErrorHandler'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom error handler. The format is `source_..`.", + examples=["source_railz.components.MyCustomErrorHandler"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomIncrementalSync(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomIncrementalSync'] + type: Literal["CustomIncrementalSync"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_..`.', - examples=['source_railz.components.MyCustomIncrementalSync'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_..`.", + examples=["source_railz.components.MyCustomIncrementalSync"], + title="Class Name", ) cursor_field: str = Field( ..., - description='The location of the value on a record that will be used as a bookmark during sync.', + description="The location of the value on a record that will be used as a bookmark during sync.", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomPaginationStrategy(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomPaginationStrategy'] + type: Literal["CustomPaginationStrategy"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom pagination strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomPaginationStrategy'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom pagination strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomPaginationStrategy"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRecordExtractor(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRecordExtractor'] + type: Literal["CustomRecordExtractor"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom record extraction strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomRecordExtractor'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom record extraction strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomRecordExtractor"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRecordFilter(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRecordFilter'] + type: Literal["CustomRecordFilter"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom record filter strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomCustomRecordFilter'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom record filter strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomCustomRecordFilter"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRequester(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRequester'] + type: Literal["CustomRequester"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom requester strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomRecordExtractor'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom requester strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomRecordExtractor"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomRetriever(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomRetriever'] + type: Literal["CustomRetriever"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom retriever strategy. The format is `source_..`.', - examples=['source_railz.components.MyCustomRetriever'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom retriever strategy. The format is `source_..`.", + examples=["source_railz.components.MyCustomRetriever"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomPartitionRouter(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomPartitionRouter'] + type: Literal["CustomPartitionRouter"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom partition router. The format is `source_..`.', - examples=['source_railz.components.MyCustomPartitionRouter'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom partition router. The format is `source_..`.", + examples=["source_railz.components.MyCustomPartitionRouter"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomSchemaLoader(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomSchemaLoader'] + type: Literal["CustomSchemaLoader"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom schema loader. The format is `source_..`.', - examples=['source_railz.components.MyCustomSchemaLoader'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom schema loader. The format is `source_..`.", + examples=["source_railz.components.MyCustomSchemaLoader"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomStateMigration(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomStateMigration'] + type: Literal["CustomStateMigration"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom state migration. The format is `source_..`.', - examples=['source_railz.components.MyCustomStateMigration'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom state migration. The format is `source_..`.", + examples=["source_railz.components.MyCustomStateMigration"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class CustomTransformation(BaseModel): class Config: extra = Extra.allow - type: Literal['CustomTransformation'] + type: Literal["CustomTransformation"] class_name: str = Field( ..., - description='Fully-qualified name of the class that will be implementing the custom transformation. The format is `source_..`.', - examples=['source_railz.components.MyCustomTransformation'], - title='Class Name', + description="Fully-qualified name of the class that will be implementing the custom transformation. The format is `source_..`.", + examples=["source_railz.components.MyCustomTransformation"], + title="Class Name", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class LegacyToPerPartitionStateMigration(BaseModel): class Config: extra = Extra.allow - type: Optional[Literal['LegacyToPerPartitionStateMigration']] = None + type: Optional[Literal["LegacyToPerPartitionStateMigration"]] = None class Algorithm(Enum): - HS256 = 'HS256' - HS384 = 'HS384' - HS512 = 'HS512' - ES256 = 'ES256' - ES256K = 'ES256K' - ES384 = 'ES384' - ES512 = 'ES512' - RS256 = 'RS256' - RS384 = 'RS384' - RS512 = 'RS512' - PS256 = 'PS256' - PS384 = 'PS384' - PS512 = 'PS512' - EdDSA = 'EdDSA' + HS256 = "HS256" + HS384 = "HS384" + HS512 = "HS512" + ES256 = "ES256" + ES256K = "ES256K" + ES384 = "ES384" + ES512 = "ES512" + RS256 = "RS256" + RS384 = "RS384" + RS512 = "RS512" + PS256 = "PS256" + PS384 = "PS384" + PS512 = "PS512" + EdDSA = "EdDSA" class JwtHeaders(BaseModel): @@ -327,21 +327,21 @@ class Config: kid: Optional[str] = Field( None, - description='Private key ID for user account.', + description="Private key ID for user account.", examples=["{{ config['kid'] }}"], - title='Key Identifier', + title="Key Identifier", ) typ: Optional[str] = Field( - 'JWT', - description='The media type of the complete JWT.', - examples=['JWT'], - title='Type', + "JWT", + description="The media type of the complete JWT.", + examples=["JWT"], + title="Type", ) cty: Optional[str] = Field( None, - description='Content type of JWT header.', - examples=['JWT'], - title='Content Type', + description="Content type of JWT header.", + examples=["JWT"], + title="Content Type", ) @@ -351,28 +351,28 @@ class Config: iss: Optional[str] = Field( None, - description='The user/principal that issued the JWT. Commonly a value unique to the user.', + description="The user/principal that issued the JWT. Commonly a value unique to the user.", examples=["{{ config['iss'] }}"], - title='Issuer', + title="Issuer", ) sub: Optional[str] = Field( None, - description='The subject of the JWT. Commonly defined by the API.', - title='Subject', + description="The subject of the JWT. Commonly defined by the API.", + title="Subject", ) aud: Optional[str] = Field( None, - description='The recipient that the JWT is intended for. Commonly defined by the API.', - examples=['appstoreconnect-v1'], - title='Audience', + description="The recipient that the JWT is intended for. Commonly defined by the API.", + examples=["appstoreconnect-v1"], + title="Audience", ) class JwtAuthenticator(BaseModel): - type: Literal['JwtAuthenticator'] + type: Literal["JwtAuthenticator"] secret_key: str = Field( ..., - description='Secret used to sign the JSON web token.', + description="Secret used to sign the JSON web token.", examples=["{{ config['secret_key'] }}"], ) base64_encode_secret_key: Optional[bool] = Field( @@ -381,521 +381,515 @@ class JwtAuthenticator(BaseModel): ) algorithm: Algorithm = Field( ..., - description='Algorithm used to sign the JSON web token.', - examples=['ES256', 'HS256', 'RS256', "{{ config['algorithm'] }}"], + description="Algorithm used to sign the JSON web token.", + examples=["ES256", "HS256", "RS256", "{{ config['algorithm'] }}"], ) token_duration: Optional[int] = Field( 1200, - description='The amount of time in seconds a JWT token can be valid after being issued.', + description="The amount of time in seconds a JWT token can be valid after being issued.", examples=[1200, 3600], - title='Token Duration', + title="Token Duration", ) header_prefix: Optional[str] = Field( None, - description='The prefix to be used within the Authentication header.', - examples=['Bearer', 'Basic'], - title='Header Prefix', + description="The prefix to be used within the Authentication header.", + examples=["Bearer", "Basic"], + title="Header Prefix", ) jwt_headers: Optional[JwtHeaders] = Field( None, - description='JWT headers used when signing JSON web token.', - title='JWT Headers', + description="JWT headers used when signing JSON web token.", + title="JWT Headers", ) additional_jwt_headers: Optional[Dict[str, Any]] = Field( None, - description='Additional headers to be included with the JWT headers object.', - title='Additional JWT Headers', + description="Additional headers to be included with the JWT headers object.", + title="Additional JWT Headers", ) jwt_payload: Optional[JwtPayload] = Field( None, - description='JWT Payload used when signing JSON web token.', - title='JWT Payload', + description="JWT Payload used when signing JSON web token.", + title="JWT Payload", ) additional_jwt_payload: Optional[Dict[str, Any]] = Field( None, - description='Additional properties to be added to the JWT payload.', - title='Additional JWT Payload Properties', + description="Additional properties to be added to the JWT payload.", + title="Additional JWT Payload Properties", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class RefreshTokenUpdater(BaseModel): refresh_token_name: Optional[str] = Field( - 'refresh_token', - description='The name of the property which contains the updated refresh token in the response from the token refresh endpoint.', - examples=['refresh_token'], - title='Refresh Token Property Name', + "refresh_token", + description="The name of the property which contains the updated refresh token in the response from the token refresh endpoint.", + examples=["refresh_token"], + title="Refresh Token Property Name", ) access_token_config_path: Optional[List[str]] = Field( - ['credentials', 'access_token'], - description='Config path to the access token. Make sure the field actually exists in the config.', - examples=[['credentials', 'access_token'], ['access_token']], - title='Config Path To Access Token', + ["credentials", "access_token"], + description="Config path to the access token. Make sure the field actually exists in the config.", + examples=[["credentials", "access_token"], ["access_token"]], + title="Config Path To Access Token", ) refresh_token_config_path: Optional[List[str]] = Field( - ['credentials', 'refresh_token'], - description='Config path to the access token. Make sure the field actually exists in the config.', - examples=[['credentials', 'refresh_token'], ['refresh_token']], - title='Config Path To Refresh Token', + ["credentials", "refresh_token"], + description="Config path to the access token. Make sure the field actually exists in the config.", + examples=[["credentials", "refresh_token"], ["refresh_token"]], + title="Config Path To Refresh Token", ) token_expiry_date_config_path: Optional[List[str]] = Field( - ['credentials', 'token_expiry_date'], - description='Config path to the expiry date. Make sure actually exists in the config.', - examples=[['credentials', 'token_expiry_date']], - title='Config Path To Expiry Date', + ["credentials", "token_expiry_date"], + description="Config path to the expiry date. Make sure actually exists in the config.", + examples=[["credentials", "token_expiry_date"]], + title="Config Path To Expiry Date", ) refresh_token_error_status_codes: Optional[List[int]] = Field( [], - description='Status Codes to Identify refresh token error in response (Refresh Token Error Key and Refresh Token Error Values should be also specified). Responses with one of the error status code and containing an error value will be flagged as a config error', + description="Status Codes to Identify refresh token error in response (Refresh Token Error Key and Refresh Token Error Values should be also specified). Responses with one of the error status code and containing an error value will be flagged as a config error", examples=[[400, 500]], - title='Refresh Token Error Status Codes', + title="Refresh Token Error Status Codes", ) refresh_token_error_key: Optional[str] = Field( - '', - description='Key to Identify refresh token error in response (Refresh Token Error Status Codes and Refresh Token Error Values should be also specified).', - examples=['error'], - title='Refresh Token Error Key', + "", + description="Key to Identify refresh token error in response (Refresh Token Error Status Codes and Refresh Token Error Values should be also specified).", + examples=["error"], + title="Refresh Token Error Key", ) refresh_token_error_values: Optional[List[str]] = Field( [], description='List of values to check for exception during token refresh process. Used to check if the error found in the response matches the key from the Refresh Token Error Key field (e.g. response={"error": "invalid_grant"}). Only responses with one of the error status code and containing an error value will be flagged as a config error', - examples=[['invalid_grant', 'invalid_permissions']], - title='Refresh Token Error Values', + examples=[["invalid_grant", "invalid_permissions"]], + title="Refresh Token Error Values", ) class OAuthAuthenticator(BaseModel): - type: Literal['OAuthAuthenticator'] + type: Literal["OAuthAuthenticator"] client_id: str = Field( ..., - description='The OAuth client ID. Fill it in the user inputs.', + description="The OAuth client ID. Fill it in the user inputs.", examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"], - title='Client ID', + title="Client ID", ) client_secret: str = Field( ..., - description='The OAuth client secret. Fill it in the user inputs.', + description="The OAuth client secret. Fill it in the user inputs.", examples=[ "{{ config['client_secret }}", "{{ config['credentials']['client_secret }}", ], - title='Client Secret', + title="Client Secret", ) refresh_token: Optional[str] = Field( None, - description='Credential artifact used to get a new access token.', + description="Credential artifact used to get a new access token.", examples=[ "{{ config['refresh_token'] }}", "{{ config['credentials]['refresh_token'] }}", ], - title='Refresh Token', + title="Refresh Token", ) token_refresh_endpoint: str = Field( ..., - description='The full URL to call to obtain a new access token.', - examples=['https://connect.squareup.com/oauth2/token'], - title='Token Refresh Endpoint', + description="The full URL to call to obtain a new access token.", + examples=["https://connect.squareup.com/oauth2/token"], + title="Token Refresh Endpoint", ) access_token_name: Optional[str] = Field( - 'access_token', - description='The name of the property which contains the access token in the response from the token refresh endpoint.', - examples=['access_token'], - title='Access Token Property Name', + "access_token", + description="The name of the property which contains the access token in the response from the token refresh endpoint.", + examples=["access_token"], + title="Access Token Property Name", ) expires_in_name: Optional[str] = Field( - 'expires_in', - description='The name of the property which contains the expiry date in the response from the token refresh endpoint.', - examples=['expires_in'], - title='Token Expiry Property Name', + "expires_in", + description="The name of the property which contains the expiry date in the response from the token refresh endpoint.", + examples=["expires_in"], + title="Token Expiry Property Name", ) grant_type: Optional[str] = Field( - 'refresh_token', - description='Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.', - examples=['refresh_token', 'client_credentials'], - title='Grant Type', + "refresh_token", + description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.", + examples=["refresh_token", "client_credentials"], + title="Grant Type", ) refresh_request_body: Optional[Dict[str, Any]] = Field( None, - description='Body of the request sent to get a new access token.', + description="Body of the request sent to get a new access token.", examples=[ { - 'applicationId': "{{ config['application_id'] }}", - 'applicationSecret': "{{ config['application_secret'] }}", - 'token': "{{ config['token'] }}", + "applicationId": "{{ config['application_id'] }}", + "applicationSecret": "{{ config['application_secret'] }}", + "token": "{{ config['token'] }}", } ], - title='Refresh Request Body', + title="Refresh Request Body", ) scopes: Optional[List[str]] = Field( None, - description='List of scopes that should be granted to the access token.', - examples=[ - ['crm.list.read', 'crm.objects.contacts.read', 'crm.schema.contacts.read'] - ], - title='Scopes', + description="List of scopes that should be granted to the access token.", + examples=[["crm.list.read", "crm.objects.contacts.read", "crm.schema.contacts.read"]], + title="Scopes", ) token_expiry_date: Optional[str] = Field( None, - description='The access token expiry date.', - examples=['2023-04-06T07:12:10.421833+00:00', 1680842386], - title='Token Expiry Date', + description="The access token expiry date.", + examples=["2023-04-06T07:12:10.421833+00:00", 1680842386], + title="Token Expiry Date", ) token_expiry_date_format: Optional[str] = Field( None, - description='The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.', - examples=['%Y-%m-%d %H:%M:%S.%f+00:00'], - title='Token Expiry Date Format', + description="The format of the time to expiration datetime. Provide it if the time is returned as a date-time string instead of seconds.", + examples=["%Y-%m-%d %H:%M:%S.%f+00:00"], + title="Token Expiry Date Format", ) refresh_token_updater: Optional[RefreshTokenUpdater] = Field( None, - description='When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.', - title='Token Updater', + description="When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.", + title="Token Updater", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DpathExtractor(BaseModel): - type: Literal['DpathExtractor'] + type: Literal["DpathExtractor"] field_path: List[str] = Field( ..., description='List of potentially nested fields describing the full path of the field to extract. Use "*" to extract all values from an array. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/record-selector).', examples=[ - ['data'], - ['data', 'records'], - ['data', '{{ parameters.name }}'], - ['data', '*', 'record'], + ["data"], + ["data", "records"], + ["data", "{{ parameters.name }}"], + ["data", "*", "record"], ], - title='Field Path', + title="Field Path", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ExponentialBackoffStrategy(BaseModel): - type: Literal['ExponentialBackoffStrategy'] + type: Literal["ExponentialBackoffStrategy"] factor: Optional[Union[float, str]] = Field( 5, - description='Multiplicative constant applied on each retry.', - examples=[5, 5.5, '10'], - title='Factor', + description="Multiplicative constant applied on each retry.", + examples=[5, 5.5, "10"], + title="Factor", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SessionTokenRequestBearerAuthenticator(BaseModel): - type: Literal['Bearer'] + type: Literal["Bearer"] class HttpMethod(Enum): - GET = 'GET' - POST = 'POST' + GET = "GET" + POST = "POST" class Action(Enum): - SUCCESS = 'SUCCESS' - FAIL = 'FAIL' - RETRY = 'RETRY' - IGNORE = 'IGNORE' - RATE_LIMITED = 'RATE_LIMITED' + SUCCESS = "SUCCESS" + FAIL = "FAIL" + RETRY = "RETRY" + IGNORE = "IGNORE" + RATE_LIMITED = "RATE_LIMITED" class FailureType(Enum): - system_error = 'system_error' - config_error = 'config_error' - transient_error = 'transient_error' + system_error = "system_error" + config_error = "config_error" + transient_error = "transient_error" class HttpResponseFilter(BaseModel): - type: Literal['HttpResponseFilter'] + type: Literal["HttpResponseFilter"] action: Optional[Action] = Field( None, - description='Action to execute if a response matches the filter.', - examples=['SUCCESS', 'FAIL', 'RETRY', 'IGNORE', 'RATE_LIMITED'], - title='Action', + description="Action to execute if a response matches the filter.", + examples=["SUCCESS", "FAIL", "RETRY", "IGNORE", "RATE_LIMITED"], + title="Action", ) failure_type: Optional[FailureType] = Field( None, - description='Failure type of traced exception if a response matches the filter.', - examples=['system_error', 'config_error', 'transient_error'], - title='Failure Type', + description="Failure type of traced exception if a response matches the filter.", + examples=["system_error", "config_error", "transient_error"], + title="Failure Type", ) error_message: Optional[str] = Field( None, - description='Error Message to display if the response matches the filter.', - title='Error Message', + description="Error Message to display if the response matches the filter.", + title="Error Message", ) error_message_contains: Optional[str] = Field( None, - description='Match the response if its error message contains the substring.', - example=['This API operation is not enabled for this site'], - title='Error Message Substring', + description="Match the response if its error message contains the substring.", + example=["This API operation is not enabled for this site"], + title="Error Message Substring", ) http_codes: Optional[List[int]] = Field( None, - description='Match the response if its HTTP code is included in this list.', + description="Match the response if its HTTP code is included in this list.", examples=[[420, 429], [500]], - title='HTTP Codes', + title="HTTP Codes", ) predicate: Optional[str] = Field( None, - description='Match the response if the predicate evaluates to true.', + description="Match the response if the predicate evaluates to true.", examples=[ "{{ 'Too much requests' in response }}", "{{ 'error_code' in response and response['error_code'] == 'ComplexityException' }}", ], - title='Predicate', + title="Predicate", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class InlineSchemaLoader(BaseModel): - type: Literal['InlineSchemaLoader'] + type: Literal["InlineSchemaLoader"] schema_: Optional[Dict[str, Any]] = Field( None, - alias='schema', + alias="schema", description='Describes a streams\' schema. Refer to the Data Types documentation for more details on which types are valid.', - title='Schema', + title="Schema", ) class JsonFileSchemaLoader(BaseModel): - type: Literal['JsonFileSchemaLoader'] + type: Literal["JsonFileSchemaLoader"] file_path: Optional[str] = Field( None, description="Path to the JSON file defining the schema. The path is relative to the connector module's root.", - example=['./schemas/users.json'], - title='File Path', + example=["./schemas/users.json"], + title="File Path", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class JsonDecoder(BaseModel): - type: Literal['JsonDecoder'] + type: Literal["JsonDecoder"] class JsonlDecoder(BaseModel): - type: Literal['JsonlDecoder'] + type: Literal["JsonlDecoder"] class KeysToLower(BaseModel): - type: Literal['KeysToLower'] - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + type: Literal["KeysToLower"] + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class IterableDecoder(BaseModel): - type: Literal['IterableDecoder'] + type: Literal["IterableDecoder"] class XmlDecoder(BaseModel): - type: Literal['XmlDecoder'] + type: Literal["XmlDecoder"] class MinMaxDatetime(BaseModel): - type: Literal['MinMaxDatetime'] + type: Literal["MinMaxDatetime"] datetime: str = Field( ..., - description='Datetime value.', - examples=['2021-01-01', '2021-01-01T00:00:00Z', "{{ config['start_time'] }}"], - title='Datetime', + description="Datetime value.", + examples=["2021-01-01", "2021-01-01T00:00:00Z", "{{ config['start_time'] }}"], + title="Datetime", ) datetime_format: Optional[str] = Field( - '', + "", description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n', - examples=['%Y-%m-%dT%H:%M:%S.%f%z', '%Y-%m-%d', '%s'], - title='Datetime Format', + examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"], + title="Datetime Format", ) max_datetime: Optional[str] = Field( None, - description='Ceiling applied on the datetime value. Must be formatted with the datetime_format field.', - examples=['2021-01-01T00:00:00Z', '2021-01-01'], - title='Max Datetime', + description="Ceiling applied on the datetime value. Must be formatted with the datetime_format field.", + examples=["2021-01-01T00:00:00Z", "2021-01-01"], + title="Max Datetime", ) min_datetime: Optional[str] = Field( None, - description='Floor applied on the datetime value. Must be formatted with the datetime_format field.', - examples=['2010-01-01T00:00:00Z', '2010-01-01'], - title='Min Datetime', + description="Floor applied on the datetime value. Must be formatted with the datetime_format field.", + examples=["2010-01-01T00:00:00Z", "2010-01-01"], + title="Min Datetime", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class NoAuth(BaseModel): - type: Literal['NoAuth'] - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + type: Literal["NoAuth"] + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class NoPagination(BaseModel): - type: Literal['NoPagination'] + type: Literal["NoPagination"] class OAuthConfigSpecification(BaseModel): class Config: extra = Extra.allow - oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = ( - Field( - None, - description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", - examples=[ - {'app_id': {'type': 'string', 'path_in_connector_config': ['app_id']}}, - { - 'app_id': { - 'type': 'string', - 'path_in_connector_config': ['info', 'app_id'], - } - }, - ], - title='OAuth user input', - ) + oauth_user_input_from_connector_config_specification: Optional[Dict[str, Any]] = Field( + None, + description="OAuth specific blob. This is a Json Schema used to validate Json configurations used as input to OAuth.\nMust be a valid non-nested JSON that refers to properties from ConnectorSpecification.connectionSpecification\nusing special annotation 'path_in_connector_config'.\nThese are input values the user is entering through the UI to authenticate to the connector, that might also shared\nas inputs for syncing data via the connector.\nExamples:\nif no connector values is shared during oauth flow, oauth_user_input_from_connector_config_specification=[]\nif connector values such as 'app_id' inside the top level are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['app_id']\n }\n }\nif connector values such as 'info.app_id' nested inside another object are used to generate the API url for the oauth flow,\n oauth_user_input_from_connector_config_specification={\n app_id: {\n type: string\n path_in_connector_config: ['info', 'app_id']\n }\n }", + examples=[ + {"app_id": {"type": "string", "path_in_connector_config": ["app_id"]}}, + { + "app_id": { + "type": "string", + "path_in_connector_config": ["info", "app_id"], + } + }, + ], + title="OAuth user input", ) complete_oauth_output_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations produced by the OAuth flows as they are\nreturned by the distant OAuth APIs.\nMust be a valid JSON describing the fields to merge back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_output_specification={\n refresh_token: {\n type: string,\n path_in_connector_config: ['credentials', 'refresh_token']\n }\n }", examples=[ { - 'refresh_token': { - 'type': 'string,', - 'path_in_connector_config': ['credentials', 'refresh_token'], + "refresh_token": { + "type": "string,", + "path_in_connector_config": ["credentials", "refresh_token"], } } ], - title='OAuth output specification', + title="OAuth output specification", ) complete_oauth_server_input_specification: Optional[Dict[str, Any]] = Field( None, - description='OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }', - examples=[ - {'client_id': {'type': 'string'}, 'client_secret': {'type': 'string'}} - ], - title='OAuth input specification', + description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations.\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nserver when completing an OAuth flow (typically exchanging an auth code for refresh token).\nExamples:\n complete_oauth_server_input_specification={\n client_id: {\n type: string\n },\n client_secret: {\n type: string\n }\n }", + examples=[{"client_id": {"type": "string"}, "client_secret": {"type": "string"}}], + title="OAuth input specification", ) complete_oauth_server_output_specification: Optional[Dict[str, Any]] = Field( None, description="OAuth specific blob. This is a Json Schema used to validate Json configurations persisted as Airbyte Server configurations that\nalso need to be merged back into the connector configuration at runtime.\nThis is a subset configuration of `complete_oauth_server_input_specification` that filters fields out to retain only the ones that\nare necessary for the connector to function with OAuth. (some fields could be used during oauth flows but not needed afterwards, therefore\nthey would be listed in the `complete_oauth_server_input_specification` but not `complete_oauth_server_output_specification`)\nMust be a valid non-nested JSON describing additional fields configured by the Airbyte Instance or Workspace Admins to be used by the\nconnector when using OAuth flow APIs.\nThese fields are to be merged back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_server_output_specification={\n client_id: {\n type: string,\n path_in_connector_config: ['credentials', 'client_id']\n },\n client_secret: {\n type: string,\n path_in_connector_config: ['credentials', 'client_secret']\n }\n }", examples=[ { - 'client_id': { - 'type': 'string,', - 'path_in_connector_config': ['credentials', 'client_id'], + "client_id": { + "type": "string,", + "path_in_connector_config": ["credentials", "client_id"], }, - 'client_secret': { - 'type': 'string,', - 'path_in_connector_config': ['credentials', 'client_secret'], + "client_secret": { + "type": "string,", + "path_in_connector_config": ["credentials", "client_secret"], }, } ], - title='OAuth server output specification', + title="OAuth server output specification", ) class OffsetIncrement(BaseModel): - type: Literal['OffsetIncrement'] + type: Literal["OffsetIncrement"] page_size: Optional[Union[int, str]] = Field( None, - description='The number of records to include in each pages.', + description="The number of records to include in each pages.", examples=[100, "{{ config['page_size'] }}"], - title='Limit', + title="Limit", ) inject_on_first_request: Optional[bool] = Field( False, - description='Using the `offset` with value `0` during the first request', - title='Inject Offset', + description="Using the `offset` with value `0` during the first request", + title="Inject Offset", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class PageIncrement(BaseModel): - type: Literal['PageIncrement'] + type: Literal["PageIncrement"] page_size: Optional[Union[int, str]] = Field( None, - description='The number of records to include in each pages.', - examples=[100, '100', "{{ config['page_size'] }}"], - title='Page Size', + description="The number of records to include in each pages.", + examples=[100, "100", "{{ config['page_size'] }}"], + title="Page Size", ) start_from_page: Optional[int] = Field( 0, - description='Index of the first page to request.', + description="Index of the first page to request.", examples=[0, 1], - title='Start From Page', + title="Start From Page", ) inject_on_first_request: Optional[bool] = Field( False, - description='Using the `page number` with value defined by `start_from_page` during the first request', - title='Inject Page Number', + description="Using the `page number` with value defined by `start_from_page` during the first request", + title="Inject Page Number", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class PrimaryKey(BaseModel): __root__: Union[str, List[str], List[List[str]]] = Field( ..., - description='The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.', - examples=['id', ['code', 'type']], - title='Primary Key', + description="The stream field to be used to distinguish unique records. Can either be a single field, an array of fields representing a composite key, or an array of arrays representing a composite key where the fields are nested fields.", + examples=["id", ["code", "type"]], + title="Primary Key", ) class RecordFilter(BaseModel): - type: Literal['RecordFilter'] + type: Literal["RecordFilter"] condition: Optional[str] = Field( - '', - description='The predicate to filter a record. Records will be removed if evaluated to False.', + "", + description="The predicate to filter a record. Records will be removed if evaluated to False.", examples=[ "{{ record['created_at'] >= stream_interval['start_time'] }}", "{{ record.status in ['active', 'expired'] }}", ], ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SchemaNormalization(Enum): - None_ = 'None' - Default = 'Default' + None_ = "None" + Default = "Default" class RemoveFields(BaseModel): - type: Literal['RemoveFields'] + type: Literal["RemoveFields"] condition: Optional[str] = Field( - '', - description='The predicate to filter a property by a property value. Property will be removed if it is empty OR expression is evaluated to True.,', + "", + description="The predicate to filter a property by a property value. Property will be removed if it is empty OR expression is evaluated to True.,", examples=[ "{{ property|string == '' }}", - '{{ property is integer }}', - '{{ property|length > 5 }}', + "{{ property is integer }}", + "{{ property|length > 5 }}", "{{ property == 'some_string_to_match' }}", ], ) field_pointers: List[List[str]] = Field( ..., - description='Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.', - examples=[['tags'], [['content', 'html'], ['content', 'plain_text']]], - title='Field Paths', + description="Array of paths defining the field to remove. Each item is an array whose field describe the path of a field to remove.", + examples=[["tags"], [["content", "html"], ["content", "plain_text"]]], + title="Field Paths", ) class RequestPath(BaseModel): - type: Literal['RequestPath'] + type: Literal["RequestPath"] class InjectInto(Enum): - request_parameter = 'request_parameter' - header = 'header' - body_data = 'body_data' - body_json = 'body_json' + request_parameter = "request_parameter" + header = "header" + body_data = "body_data" + body_json = "body_json" class RequestOption(BaseModel): - type: Literal['RequestOption'] + type: Literal["RequestOption"] field_name: str = Field( ..., - description='Configures which key should be used in the location that the descriptor is being injected into', - examples=['segment_id'], - title='Request Option', + description="Configures which key should be used in the location that the descriptor is being injected into", + examples=["segment_id"], + title="Request Option", ) inject_into: InjectInto = Field( ..., - description='Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.', - examples=['request_parameter', 'header', 'body_data', 'body_json'], - title='Inject Into', + description="Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.", + examples=["request_parameter", "header", "body_data", "body_json"], + title="Inject Into", ) @@ -907,54 +901,54 @@ class Config: class LegacySessionTokenAuthenticator(BaseModel): - type: Literal['LegacySessionTokenAuthenticator'] + type: Literal["LegacySessionTokenAuthenticator"] header: str = Field( ..., - description='The name of the session token header that will be injected in the request', - examples=['X-Session'], - title='Session Request Header', + description="The name of the session token header that will be injected in the request", + examples=["X-Session"], + title="Session Request Header", ) login_url: str = Field( ..., - description='Path of the login URL (do not include the base URL)', - examples=['session'], - title='Login Path', + description="Path of the login URL (do not include the base URL)", + examples=["session"], + title="Login Path", ) session_token: Optional[str] = Field( None, - description='Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair', + description="Session token to use if using a pre-defined token. Not needed if authenticating with username + password pair", example=["{{ config['session_token'] }}"], - title='Session Token', + title="Session Token", ) session_token_response_key: str = Field( ..., - description='Name of the key of the session token to be extracted from the response', - examples=['id'], - title='Response Token Response Key', + description="Name of the key of the session token to be extracted from the response", + examples=["id"], + title="Response Token Response Key", ) username: Optional[str] = Field( None, - description='Username used to authenticate and obtain a session token', + description="Username used to authenticate and obtain a session token", examples=[" {{ config['username'] }}"], - title='Username', + title="Username", ) password: Optional[str] = Field( - '', - description='Password used to authenticate and obtain a session token', - examples=["{{ config['password'] }}", ''], - title='Password', + "", + description="Password used to authenticate and obtain a session token", + examples=["{{ config['password'] }}", ""], + title="Password", ) validate_session_url: str = Field( ..., - description='Path of the URL to use to validate that the session token is valid (do not include the base URL)', - examples=['user/current'], - title='Validate Session Path', + description="Path of the URL to use to validate that the session token is valid (do not include the base URL)", + examples=["user/current"], + title="Validate Session Path", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AsyncJobStatusMap(BaseModel): - type: Optional[Literal['AsyncJobStatusMap']] = None + type: Optional[Literal["AsyncJobStatusMap"]] = None running: List[str] completed: List[str] failed: List[str] @@ -962,65 +956,65 @@ class AsyncJobStatusMap(BaseModel): class ValueType(Enum): - string = 'string' - number = 'number' - integer = 'integer' - boolean = 'boolean' + string = "string" + number = "number" + integer = "integer" + boolean = "boolean" class WaitTimeFromHeader(BaseModel): - type: Literal['WaitTimeFromHeader'] + type: Literal["WaitTimeFromHeader"] header: str = Field( ..., - description='The name of the response header defining how long to wait before retrying.', - examples=['Retry-After'], - title='Response Header Name', + description="The name of the response header defining how long to wait before retrying.", + examples=["Retry-After"], + title="Response Header Name", ) regex: Optional[str] = Field( None, - description='Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.', - examples=['([-+]?\\d+)'], - title='Extraction Regex', + description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", + examples=["([-+]?\\d+)"], + title="Extraction Regex", ) max_waiting_time_in_seconds: Optional[float] = Field( None, - description='Given the value extracted from the header is greater than this value, stop the stream.', + description="Given the value extracted from the header is greater than this value, stop the stream.", examples=[3600], - title='Max Waiting Time in Seconds', + title="Max Waiting Time in Seconds", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class WaitUntilTimeFromHeader(BaseModel): - type: Literal['WaitUntilTimeFromHeader'] + type: Literal["WaitUntilTimeFromHeader"] header: str = Field( ..., - description='The name of the response header defining how long to wait before retrying.', - examples=['wait_time'], - title='Response Header', + description="The name of the response header defining how long to wait before retrying.", + examples=["wait_time"], + title="Response Header", ) min_wait: Optional[Union[float, str]] = Field( None, - description='Minimum time to wait before retrying.', - examples=[10, '60'], - title='Minimum Wait Time', + description="Minimum time to wait before retrying.", + examples=[10, "60"], + title="Minimum Wait Time", ) regex: Optional[str] = Field( None, - description='Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.', - examples=['([-+]?\\d+)'], - title='Extraction Regex', + description="Optional regex to apply on the header to extract its value. The regex should define a capture group defining the wait time.", + examples=["([-+]?\\d+)"], + title="Extraction Regex", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AddedFieldDefinition(BaseModel): - type: Literal['AddedFieldDefinition'] + type: Literal["AddedFieldDefinition"] path: List[str] = Field( ..., - description='List of strings defining the path where to add the value on the record.', - examples=[['segment_id'], ['metadata', 'segment_id']], - title='Path', + description="List of strings defining the path where to add the value on the record.", + examples=[["segment_id"], ["metadata", "segment_id"]], + title="Path", ) value: str = Field( ..., @@ -1030,167 +1024,165 @@ class AddedFieldDefinition(BaseModel): "{{ record['MetaData']['LastUpdatedTime'] }}", "{{ stream_partition['segment_id'] }}", ], - title='Value', + title="Value", ) value_type: Optional[ValueType] = Field( None, - description='Type of the value. If not specified, the type will be inferred from the value.', - title='Value Type', + description="Type of the value. If not specified, the type will be inferred from the value.", + title="Value Type", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AddFields(BaseModel): - type: Literal['AddFields'] + type: Literal["AddFields"] fields: List[AddedFieldDefinition] = Field( ..., - description='List of transformations (path and corresponding value) that will be added to the record.', - title='Fields', + description="List of transformations (path and corresponding value) that will be added to the record.", + title="Fields", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ApiKeyAuthenticator(BaseModel): - type: Literal['ApiKeyAuthenticator'] + type: Literal["ApiKeyAuthenticator"] api_token: Optional[str] = Field( None, - description='The API key to inject in the request. Fill it in the user inputs.', + description="The API key to inject in the request. Fill it in the user inputs.", examples=["{{ config['api_key'] }}", "Token token={{ config['api_key'] }}"], - title='API Key', + title="API Key", ) header: Optional[str] = Field( None, - description='The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.', - examples=['Authorization', 'Api-Token', 'X-Auth-Token'], - title='Header Name', + description="The name of the HTTP header that will be set to the API key. This setting is deprecated, use inject_into instead. Header and inject_into can not be defined at the same time.", + examples=["Authorization", "Api-Token", "X-Auth-Token"], + title="Header Name", ) inject_into: Optional[RequestOption] = Field( None, - description='Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.', + description="Configure how the API Key will be sent in requests to the source API. Either inject_into or header has to be defined.", examples=[ - {'inject_into': 'header', 'field_name': 'Authorization'}, - {'inject_into': 'request_parameter', 'field_name': 'authKey'}, + {"inject_into": "header", "field_name": "Authorization"}, + {"inject_into": "request_parameter", "field_name": "authKey"}, ], - title='Inject API Key Into Outgoing HTTP Request', + title="Inject API Key Into Outgoing HTTP Request", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AuthFlow(BaseModel): - auth_flow_type: Optional[AuthFlowType] = Field( - None, description='The type of auth to use', title='Auth flow type' - ) + auth_flow_type: Optional[AuthFlowType] = Field(None, description="The type of auth to use", title="Auth flow type") predicate_key: Optional[List[str]] = Field( None, - description='JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.', - examples=[['credentials', 'auth_type']], - title='Predicate key', + description="JSON path to a field in the connectorSpecification that should exist for the advanced auth to be applicable.", + examples=[["credentials", "auth_type"]], + title="Predicate key", ) predicate_value: Optional[str] = Field( None, - description='Value of the predicate_key fields for the advanced auth to be applicable.', - examples=['Oauth'], - title='Predicate value', + description="Value of the predicate_key fields for the advanced auth to be applicable.", + examples=["Oauth"], + title="Predicate value", ) oauth_config_specification: Optional[OAuthConfigSpecification] = None class DatetimeBasedCursor(BaseModel): - type: Literal['DatetimeBasedCursor'] + type: Literal["DatetimeBasedCursor"] cursor_field: str = Field( ..., - description='The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.', - examples=['created_at', "{{ config['record_cursor'] }}"], - title='Cursor Field', + description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.", + examples=["created_at", "{{ config['record_cursor'] }}"], + title="Cursor Field", ) datetime_format: str = Field( ..., - description='The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n', - examples=['%Y-%m-%dT%H:%M:%S.%f%z', '%Y-%m-%d', '%s', '%ms', '%s_as_float'], - title='Outgoing Datetime Format', + description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n", + examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s", "%ms", "%s_as_float"], + title="Outgoing Datetime Format", ) start_datetime: Union[str, MinMaxDatetime] = Field( ..., - description='The datetime that determines the earliest record that should be synced.', - examples=['2020-01-1T00:00:00Z', "{{ config['start_time'] }}"], - title='Start Datetime', + description="The datetime that determines the earliest record that should be synced.", + examples=["2020-01-1T00:00:00Z", "{{ config['start_time'] }}"], + title="Start Datetime", ) cursor_datetime_formats: Optional[List[str]] = Field( None, - description='The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If not provided, the `datetime_format` will be used.', - title='Cursor Datetime Formats', + description="The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If not provided, the `datetime_format` will be used.", + title="Cursor Datetime Formats", ) cursor_granularity: Optional[str] = Field( None, - description='Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should be P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.', - examples=['PT1S'], - title='Cursor Granularity', + description="Smallest increment the datetime_format has (ISO 8601 duration) that is used to ensure the start of a slice does not overlap with the end of the previous one, e.g. for %Y-%m-%d the granularity should be P1D, for %Y-%m-%dT%H:%M:%SZ the granularity should be PT1S. Given this field is provided, `step` needs to be provided as well.", + examples=["PT1S"], + title="Cursor Granularity", ) end_datetime: Optional[Union[str, MinMaxDatetime]] = Field( None, - description='The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.', - examples=['2021-01-1T00:00:00Z', '{{ now_utc() }}', '{{ day_delta(-1) }}'], - title='End Datetime', + description="The datetime that determines the last record that should be synced. If not provided, `{{ now_utc() }}` will be used.", + examples=["2021-01-1T00:00:00Z", "{{ now_utc() }}", "{{ day_delta(-1) }}"], + title="End Datetime", ) end_time_option: Optional[RequestOption] = Field( None, - description='Optionally configures how the end datetime will be sent in requests to the source API.', - title='Inject End Time Into Outgoing HTTP Request', + description="Optionally configures how the end datetime will be sent in requests to the source API.", + title="Inject End Time Into Outgoing HTTP Request", ) is_data_feed: Optional[bool] = Field( None, - description='A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.', - title='Whether the target API is formatted as a data feed', + description="A data feed API is an API that does not allow filtering and paginates the content from the most recent to the least recent. Given this, the CDK needs to know when to stop paginating and this field will generate a stop condition for pagination.", + title="Whether the target API is formatted as a data feed", ) is_client_side_incremental: Optional[bool] = Field( None, - description='If the target API endpoint does not take cursor values to filter records and returns all records anyway, the connector with this cursor will filter out records locally, and only emit new records from the last sync, hence incremental. This means that all records would be read from the API, but only new records will be emitted to the destination.', - title='Whether the target API does not support filtering and returns all data (the cursor filters records in the client instead of the API side)', + description="If the target API endpoint does not take cursor values to filter records and returns all records anyway, the connector with this cursor will filter out records locally, and only emit new records from the last sync, hence incremental. This means that all records would be read from the API, but only new records will be emitted to the destination.", + title="Whether the target API does not support filtering and returns all data (the cursor filters records in the client instead of the API side)", ) is_compare_strictly: Optional[bool] = Field( False, - description='Set to True if the target API does not accept queries where the start time equal the end time.', - title='Whether to skip requests if the start time equals the end time', + description="Set to True if the target API does not accept queries where the start time equal the end time.", + title="Whether to skip requests if the start time equals the end time", ) global_substream_cursor: Optional[bool] = Field( False, - description='This setting optimizes performance when the parent stream has thousands of partitions by storing the cursor as a single value rather than per partition. Notably, the substream state is updated only at the end of the sync, which helps prevent data loss in case of a sync failure. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/incremental-syncs).', - title='Whether to store cursor as one value instead of per partition', + description="This setting optimizes performance when the parent stream has thousands of partitions by storing the cursor as a single value rather than per partition. Notably, the substream state is updated only at the end of the sync, which helps prevent data loss in case of a sync failure. See more info in the [docs](https://docs.airbyte.com/connector-development/config-based/understanding-the-yaml-file/incremental-syncs).", + title="Whether to store cursor as one value instead of per partition", ) lookback_window: Optional[str] = Field( None, - description='Time interval before the start_datetime to read data for, e.g. P1M for looking back one month.', - examples=['P1D', "P{{ config['lookback_days'] }}D"], - title='Lookback Window', + description="Time interval before the start_datetime to read data for, e.g. P1M for looking back one month.", + examples=["P1D", "P{{ config['lookback_days'] }}D"], + title="Lookback Window", ) partition_field_end: Optional[str] = Field( None, - description='Name of the partition start time field.', - examples=['ending_time'], - title='Partition Field End', + description="Name of the partition start time field.", + examples=["ending_time"], + title="Partition Field End", ) partition_field_start: Optional[str] = Field( None, - description='Name of the partition end time field.', - examples=['starting_time'], - title='Partition Field Start', + description="Name of the partition end time field.", + examples=["starting_time"], + title="Partition Field Start", ) start_time_option: Optional[RequestOption] = Field( None, - description='Optionally configures how the start datetime will be sent in requests to the source API.', - title='Inject Start Time Into Outgoing HTTP Request', + description="Optionally configures how the start datetime will be sent in requests to the source API.", + title="Inject Start Time Into Outgoing HTTP Request", ) step: Optional[str] = Field( None, - description='The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.', - examples=['P1W', "{{ config['step_increment'] }}"], - title='Step', + description="The size of the time window (ISO8601 duration). Given this field is provided, `cursor_granularity` needs to be provided as well.", + examples=["P1W", "{{ config['step_increment'] }}"], + title="Step", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DefaultErrorHandler(BaseModel): - type: Literal['DefaultErrorHandler'] + type: Literal["DefaultErrorHandler"] backoff_strategies: Optional[ List[ Union[ @@ -1203,124 +1195,122 @@ class DefaultErrorHandler(BaseModel): ] ] = Field( None, - description='List of backoff strategies to use to determine how long to wait before retrying a retryable request.', - title='Backoff Strategies', + description="List of backoff strategies to use to determine how long to wait before retrying a retryable request.", + title="Backoff Strategies", ) max_retries: Optional[int] = Field( 5, - description='The maximum number of time to retry a retryable request before giving up and failing.', + description="The maximum number of time to retry a retryable request before giving up and failing.", examples=[5, 0, 10], - title='Max Retry Count', + title="Max Retry Count", ) response_filters: Optional[List[HttpResponseFilter]] = Field( None, description="List of response filters to iterate on when deciding how to handle an error. When using an array of multiple filters, the filters will be applied sequentially and the response will be selected if it matches any of the filter's predicate.", - title='Response Filters', + title="Response Filters", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DefaultPaginator(BaseModel): - type: Literal['DefaultPaginator'] - pagination_strategy: Union[ - CursorPagination, CustomPaginationStrategy, OffsetIncrement, PageIncrement - ] = Field( + type: Literal["DefaultPaginator"] + pagination_strategy: Union[CursorPagination, CustomPaginationStrategy, OffsetIncrement, PageIncrement] = Field( ..., - description='Strategy defining how records are paginated.', - title='Pagination Strategy', + description="Strategy defining how records are paginated.", + title="Pagination Strategy", ) page_size_option: Optional[RequestOption] = None page_token_option: Optional[Union[RequestOption, RequestPath]] = None - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SessionTokenRequestApiKeyAuthenticator(BaseModel): - type: Literal['ApiKey'] + type: Literal["ApiKey"] inject_into: RequestOption = Field( ..., - description='Configure how the API Key will be sent in requests to the source API.', + description="Configure how the API Key will be sent in requests to the source API.", examples=[ - {'inject_into': 'header', 'field_name': 'Authorization'}, - {'inject_into': 'request_parameter', 'field_name': 'authKey'}, + {"inject_into": "header", "field_name": "Authorization"}, + {"inject_into": "request_parameter", "field_name": "authKey"}, ], - title='Inject API Key Into Outgoing HTTP Request', + title="Inject API Key Into Outgoing HTTP Request", ) class ListPartitionRouter(BaseModel): - type: Literal['ListPartitionRouter'] + type: Literal["ListPartitionRouter"] cursor_field: str = Field( ..., description='While iterating over list values, the name of field used to reference a list value. The partition value can be accessed with string interpolation. e.g. "{{ stream_partition[\'my_key\'] }}" where "my_key" is the value of the cursor_field.', - examples=['section', "{{ config['section_key'] }}"], - title='Current Partition Value Identifier', + examples=["section", "{{ config['section_key'] }}"], + title="Current Partition Value Identifier", ) values: Union[str, List[str]] = Field( ..., - description='The list of attributes being iterated over and used as input for the requests made to the source API.', - examples=[['section_a', 'section_b', 'section_c'], "{{ config['sections'] }}"], - title='Partition Values', + description="The list of attributes being iterated over and used as input for the requests made to the source API.", + examples=[["section_a", "section_b", "section_c"], "{{ config['sections'] }}"], + title="Partition Values", ) request_option: Optional[RequestOption] = Field( None, - description='A request option describing where the list value should be injected into and under what field name if applicable.', - title='Inject Partition Value Into Outgoing HTTP Request', + description="A request option describing where the list value should be injected into and under what field name if applicable.", + title="Inject Partition Value Into Outgoing HTTP Request", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class RecordSelector(BaseModel): - type: Literal['RecordSelector'] + type: Literal["RecordSelector"] extractor: Union[CustomRecordExtractor, DpathExtractor] record_filter: Optional[Union[CustomRecordFilter, RecordFilter]] = Field( None, - description='Responsible for filtering records to be emitted by the Source.', - title='Record Filter', + description="Responsible for filtering records to be emitted by the Source.", + title="Record Filter", ) schema_normalization: Optional[SchemaNormalization] = SchemaNormalization.None_ - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class Spec(BaseModel): - type: Literal['Spec'] + type: Literal["Spec"] connection_specification: Dict[str, Any] = Field( ..., - description='A connection specification describing how a the connector can be configured.', - title='Connection Specification', + description="A connection specification describing how a the connector can be configured.", + title="Connection Specification", ) documentation_url: Optional[str] = Field( None, description="URL of the connector's documentation page.", - examples=['https://docs.airbyte.com/integrations/sources/dremio'], - title='Documentation URL', + examples=["https://docs.airbyte.com/integrations/sources/dremio"], + title="Documentation URL", ) advanced_auth: Optional[AuthFlow] = Field( None, - description='Advanced specification for configuring the authentication flow.', - title='Advanced Auth', + description="Advanced specification for configuring the authentication flow.", + title="Advanced Auth", ) class CompositeErrorHandler(BaseModel): - type: Literal['CompositeErrorHandler'] + type: Literal["CompositeErrorHandler"] error_handlers: List[Union[CompositeErrorHandler, DefaultErrorHandler]] = Field( ..., - description='List of error handlers to iterate on to determine how to handle a failed response.', - title='Error Handlers', + description="List of error handlers to iterate on to determine how to handle a failed response.", + title="Error Handlers", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DeclarativeSource(BaseModel): class Config: extra = Extra.forbid - type: Literal['DeclarativeSource'] + type: Literal["DeclarativeSource"] check: CheckStream streams: List[DeclarativeStream] version: str = Field( ..., - description='The version of the Airbyte CDK used to build and test the source.', + description="The version of the Airbyte CDK used to build and test the source.", ) schemas: Optional[Schemas] = None definitions: Optional[Dict[str, Any]] = None @@ -1328,11 +1318,11 @@ class Config: concurrency_level: Optional[ConcurrencyLevel] = None metadata: Optional[Dict[str, Any]] = Field( None, - description='For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.', + description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.", ) description: Optional[str] = Field( None, - description='A description of the connector. It will be presented on the Source documentation page.', + description="A description of the connector. It will be presented on the Source documentation page.", ) @@ -1340,12 +1330,12 @@ class SelectiveAuthenticator(BaseModel): class Config: extra = Extra.allow - type: Literal['SelectiveAuthenticator'] + type: Literal["SelectiveAuthenticator"] authenticator_selection_path: List[str] = Field( ..., - description='Path of the field in config with selected authenticator name', - examples=[['auth'], ['auth', 'type']], - title='Authenticator Selection Path', + description="Path of the field in config with selected authenticator name", + examples=[["auth"], ["auth", "type"]], + title="Authenticator Selection Path", ) authenticators: Dict[ str, @@ -1362,132 +1352,116 @@ class Config: ], ] = Field( ..., - description='Authenticators to select from.', + description="Authenticators to select from.", examples=[ { - 'authenticators': { - 'token': '#/definitions/ApiKeyAuthenticator', - 'oauth': '#/definitions/OAuthAuthenticator', - 'jwt': '#/definitions/JwtAuthenticator', + "authenticators": { + "token": "#/definitions/ApiKeyAuthenticator", + "oauth": "#/definitions/OAuthAuthenticator", + "jwt": "#/definitions/JwtAuthenticator", } } ], - title='Authenticators', + title="Authenticators", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class DeclarativeStream(BaseModel): class Config: extra = Extra.allow - type: Literal['DeclarativeStream'] + type: Literal["DeclarativeStream"] retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field( ..., - description='Component used to coordinate how records are extracted across stream slices and request pages.', - title='Retriever', + description="Component used to coordinate how records are extracted across stream slices and request pages.", + title="Retriever", ) - incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = ( - Field( - None, - description='Component used to fetch data incrementally based on a time field in the data.', - title='Incremental Sync', - ) - ) - name: Optional[str] = Field( - '', description='The stream name.', example=['Users'], title='Name' - ) - primary_key: Optional[PrimaryKey] = Field( - '', description='The primary key of the stream.', title='Primary Key' + incremental_sync: Optional[Union[CustomIncrementalSync, DatetimeBasedCursor]] = Field( + None, + description="Component used to fetch data incrementally based on a time field in the data.", + title="Incremental Sync", ) - schema_loader: Optional[ - Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader] - ] = Field( + name: Optional[str] = Field("", description="The stream name.", example=["Users"], title="Name") + primary_key: Optional[PrimaryKey] = Field("", description="The primary key of the stream.", title="Primary Key") + schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader]] = Field( None, - description='Component used to retrieve the schema for the current stream.', - title='Schema Loader', + description="Component used to retrieve the schema for the current stream.", + title="Schema Loader", ) - transformations: Optional[ - List[Union[AddFields, CustomTransformation, RemoveFields, KeysToLower]] - ] = Field( + transformations: Optional[List[Union[AddFields, CustomTransformation, RemoveFields, KeysToLower]]] = Field( None, - description='A list of transformations to be applied to each output record.', - title='Transformations', + description="A list of transformations to be applied to each output record.", + title="Transformations", ) - state_migrations: Optional[ - List[Union[LegacyToPerPartitionStateMigration, CustomStateMigration]] - ] = Field( + state_migrations: Optional[List[Union[LegacyToPerPartitionStateMigration, CustomStateMigration]]] = Field( [], - description='Array of state migrations to be applied on the input state', - title='State Migrations', + description="Array of state migrations to be applied on the input state", + title="State Migrations", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SessionTokenAuthenticator(BaseModel): - type: Literal['SessionTokenAuthenticator'] + type: Literal["SessionTokenAuthenticator"] login_requester: HttpRequester = Field( ..., - description='Description of the request to perform to obtain a session token to perform data requests. The response body is expected to be a JSON object with a session token property.', + description="Description of the request to perform to obtain a session token to perform data requests. The response body is expected to be a JSON object with a session token property.", examples=[ { - 'type': 'HttpRequester', - 'url_base': 'https://my_api.com', - 'path': '/login', - 'authenticator': { - 'type': 'BasicHttpAuthenticator', - 'username': '{{ config.username }}', - 'password': '{{ config.password }}', + "type": "HttpRequester", + "url_base": "https://my_api.com", + "path": "/login", + "authenticator": { + "type": "BasicHttpAuthenticator", + "username": "{{ config.username }}", + "password": "{{ config.password }}", }, } ], - title='Login Requester', + title="Login Requester", ) session_token_path: List[str] = Field( ..., - description='The path in the response body returned from the login requester to the session token.', - examples=[['access_token'], ['result', 'token']], - title='Session Token Path', + description="The path in the response body returned from the login requester to the session token.", + examples=[["access_token"], ["result", "token"]], + title="Session Token Path", ) expiration_duration: Optional[str] = Field( None, - description='The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.', - examples=['PT1H', 'P1D'], - title='Expiration Duration', + description="The duration in ISO 8601 duration notation after which the session token expires, starting from the time it was obtained. Omitting it will result in the session token being refreshed for every request.", + examples=["PT1H", "P1D"], + title="Expiration Duration", ) - request_authentication: Union[ - SessionTokenRequestApiKeyAuthenticator, SessionTokenRequestBearerAuthenticator - ] = Field( + request_authentication: Union[SessionTokenRequestApiKeyAuthenticator, SessionTokenRequestBearerAuthenticator] = Field( ..., - description='Authentication method to use for requests sent to the API, specifying how to inject the session token.', - title='Data Request Authentication', + description="Authentication method to use for requests sent to the API, specifying how to inject the session token.", + title="Data Request Authentication", ) - decoder: Optional[Union[JsonDecoder, XmlDecoder]] = Field( - None, description='Component used to decode the response.', title='Decoder' - ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + decoder: Optional[Union[JsonDecoder, XmlDecoder]] = Field(None, description="Component used to decode the response.", title="Decoder") + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class HttpRequester(BaseModel): - type: Literal['HttpRequester'] + type: Literal["HttpRequester"] url_base: str = Field( ..., - description='Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.', + description="Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", examples=[ - 'https://connect.squareup.com/v2', + "https://connect.squareup.com/v2", "{{ config['base_url'] or 'https://app.posthog.com'}}/api/", ], - title='API Base URL', + title="API Base URL", ) path: str = Field( ..., - description='Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.', + description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.", examples=[ - '/products', + "/products", "/quotes/{{ stream_partition['id'] }}/quote_line_groups", "/trades/{{ config['symbol_id'] }}/history", ], - title='URL Path', + title="URL Path", ) authenticator: Optional[ Union[ @@ -1504,111 +1478,107 @@ class HttpRequester(BaseModel): ] ] = Field( None, - description='Authentication method to use for requests sent to the API.', - title='Authenticator', + description="Authentication method to use for requests sent to the API.", + title="Authenticator", ) - error_handler: Optional[ - Union[DefaultErrorHandler, CustomErrorHandler, CompositeErrorHandler] - ] = Field( + error_handler: Optional[Union[DefaultErrorHandler, CustomErrorHandler, CompositeErrorHandler]] = Field( None, - description='Error handler component that defines how to handle errors.', - title='Error Handler', + description="Error handler component that defines how to handle errors.", + title="Error Handler", ) http_method: Optional[HttpMethod] = Field( HttpMethod.GET, - description='The HTTP method used to fetch data from the source (can be GET or POST).', - examples=['GET', 'POST'], - title='HTTP Method', + description="The HTTP method used to fetch data from the source (can be GET or POST).", + examples=["GET", "POST"], + title="HTTP Method", ) request_body_data: Optional[Union[str, Dict[str, str]]] = Field( None, - description='Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.', + description="Specifies how to populate the body of the request with a non-JSON payload. Plain text will be sent as is, whereas objects will be converted to a urlencoded form.", examples=[ '[{"clause": {"type": "timestamp", "operator": 10, "parameters":\n [{"value": {{ stream_interval[\'start_time\'] | int * 1000 }} }]\n }, "orderBy": 1, "columnName": "Timestamp"}]/\n' ], - title='Request Body Payload (Non-JSON)', + title="Request Body Payload (Non-JSON)", ) request_body_json: Optional[Union[str, Dict[str, Any]]] = Field( None, - description='Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.', + description="Specifies how to populate the body of the request with a JSON payload. Can contain nested objects.", examples=[ - {'sort_order': 'ASC', 'sort_field': 'CREATED_AT'}, - {'key': "{{ config['value'] }}"}, - {'sort': {'field': 'updated_at', 'order': 'ascending'}}, + {"sort_order": "ASC", "sort_field": "CREATED_AT"}, + {"key": "{{ config['value'] }}"}, + {"sort": {"field": "updated_at", "order": "ascending"}}, ], - title='Request Body JSON Payload', + title="Request Body JSON Payload", ) request_headers: Optional[Union[str, Dict[str, str]]] = Field( None, - description='Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.', - examples=[{'Output-Format': 'JSON'}, {'Version': "{{ config['version'] }}"}], - title='Request Headers', + description="Return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.", + examples=[{"Output-Format": "JSON"}, {"Version": "{{ config['version'] }}"}], + title="Request Headers", ) request_parameters: Optional[Union[str, Dict[str, str]]] = Field( None, - description='Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.', + description="Specifies the query parameters that should be set on an outgoing HTTP request given the inputs.", examples=[ - {'unit': 'day'}, + {"unit": "day"}, { - 'query': 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"' + "query": 'last_event_time BETWEEN TIMESTAMP "{{ stream_interval.start_time }}" AND TIMESTAMP "{{ stream_interval.end_time }}"' }, - {'searchIn': "{{ ','.join(config.get('search_in', [])) }}"}, - {'sort_by[asc]': 'updated_at'}, + {"searchIn": "{{ ','.join(config.get('search_in', [])) }}"}, + {"sort_by[asc]": "updated_at"}, ], - title='Query Parameters', + title="Query Parameters", ) use_cache: Optional[bool] = Field( False, - description='Enables stream requests caching. This field is automatically set by the CDK.', - title='Use Cache', + description="Enables stream requests caching. This field is automatically set by the CDK.", + title="Use Cache", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class ParentStreamConfig(BaseModel): - type: Literal['ParentStreamConfig'] + type: Literal["ParentStreamConfig"] parent_key: str = Field( ..., - description='The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.', - examples=['id', "{{ config['parent_record_id'] }}"], - title='Parent Key', - ) - stream: DeclarativeStream = Field( - ..., description='Reference to the parent stream.', title='Parent Stream' + description="The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.", + examples=["id", "{{ config['parent_record_id'] }}"], + title="Parent Key", ) + stream: DeclarativeStream = Field(..., description="Reference to the parent stream.", title="Parent Stream") partition_field: str = Field( ..., - description='While iterating over parent records during a sync, the parent_key value can be referenced by using this field.', - examples=['parent_id', "{{ config['parent_partition_field'] }}"], - title='Current Parent Key Value Identifier', + description="While iterating over parent records during a sync, the parent_key value can be referenced by using this field.", + examples=["parent_id", "{{ config['parent_partition_field'] }}"], + title="Current Parent Key Value Identifier", ) request_option: Optional[RequestOption] = Field( None, - description='A request option describing where the parent key value should be injected into and under what field name if applicable.', - title='Request Option', + description="A request option describing where the parent key value should be injected into and under what field name if applicable.", + title="Request Option", ) incremental_dependency: Optional[bool] = Field( False, - description='Indicates whether the parent stream should be read incrementally based on updates in the child stream.', - title='Incremental Dependency', + description="Indicates whether the parent stream should be read incrementally based on updates in the child stream.", + title="Incremental Dependency", ) extra_fields: Optional[List[List[str]]] = Field( None, - description='Array of field paths to include as additional fields in the stream slice. Each path is an array of strings representing keys to access fields in the respective parent record. Accessible via `stream_slice.extra_fields`. Missing fields are set to `None`.', - title='Extra Fields', + description="Array of field paths to include as additional fields in the stream slice. Each path is an array of strings representing keys to access fields in the respective parent record. Accessible via `stream_slice.extra_fields`. Missing fields are set to `None`.", + title="Extra Fields", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SimpleRetriever(BaseModel): - type: Literal['SimpleRetriever'] + type: Literal["SimpleRetriever"] record_selector: RecordSelector = Field( ..., - description='Component that describes how to extract records from a HTTP response.', + description="Component that describes how to extract records from a HTTP response.", ) requester: Union[CustomRequester, HttpRequester] = Field( ..., - description='Requester component that describes how to prepare HTTP requests to send to the source API.', + description="Requester component that describes how to prepare HTTP requests to send to the source API.", ) paginator: Optional[Union[DefaultPaginator, NoPagination]] = Field( None, @@ -1616,61 +1586,53 @@ class SimpleRetriever(BaseModel): ) ignore_stream_slicer_parameters_on_paginated_requests: Optional[bool] = Field( False, - description='If true, the partition router and incremental request options will be ignored when paginating requests. Request options set directly on the requester will not be ignored.', + description="If true, the partition router and incremental request options will be ignored when paginating requests. Request options set directly on the requester will not be ignored.", ) partition_router: Optional[ Union[ CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter, - List[ - Union[ - CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter - ] - ], + List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]], ] ] = Field( [], - description='PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.', - title='Partition Router', + description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.", + title="Partition Router", ) - decoder: Optional[Union[JsonDecoder, JsonlDecoder, IterableDecoder, XmlDecoder]] = ( - Field( - None, - description='Component decoding the response so records can be extracted.', - title='Decoder', - ) + decoder: Optional[Union[JsonDecoder, JsonlDecoder, IterableDecoder, XmlDecoder]] = Field( + None, + description="Component decoding the response so records can be extracted.", + title="Decoder", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class AsyncRetriever(BaseModel): - type: Literal['AsyncRetriever'] + type: Literal["AsyncRetriever"] record_selector: RecordSelector = Field( ..., - description='Component that describes how to extract records from a HTTP response.', - ) - status_mapping: AsyncJobStatusMap = Field( - ..., description='Async Job Status to Airbyte CDK Async Job Status mapping.' + description="Component that describes how to extract records from a HTTP response.", ) + status_mapping: AsyncJobStatusMap = Field(..., description="Async Job Status to Airbyte CDK Async Job Status mapping.") status_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field( - ..., description='Responsible for fetching the actual status of the async job.' + ..., description="Responsible for fetching the actual status of the async job." ) urls_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field( ..., - description='Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.', + description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.", ) creation_requester: Union[CustomRequester, HttpRequester] = Field( ..., - description='Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.', + description="Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.", ) polling_requester: Union[CustomRequester, HttpRequester] = Field( ..., - description='Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.', + description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.", ) download_requester: Union[CustomRequester, HttpRequester] = Field( ..., - description='Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.', + description="Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.", ) download_paginator: Optional[Union[DefaultPaginator, NoPagination]] = Field( None, @@ -1682,42 +1644,36 @@ class AsyncRetriever(BaseModel): ) delete_requester: Optional[Union[CustomRequester, HttpRequester]] = Field( None, - description='Requester component that describes how to prepare HTTP requests to send to the source API to delete a job once the records are extracted.', + description="Requester component that describes how to prepare HTTP requests to send to the source API to delete a job once the records are extracted.", ) partition_router: Optional[ Union[ CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter, - List[ - Union[ - CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter - ] - ], + List[Union[CustomPartitionRouter, ListPartitionRouter, SubstreamPartitionRouter]], ] ] = Field( [], - description='PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.', - title='Partition Router', + description="PartitionRouter component that describes how to partition the stream, enabling incremental syncs and checkpointing.", + title="Partition Router", ) - decoder: Optional[Union[JsonDecoder, JsonlDecoder, IterableDecoder, XmlDecoder]] = ( - Field( - None, - description='Component decoding the response so records can be extracted.', - title='Decoder', - ) + decoder: Optional[Union[JsonDecoder, JsonlDecoder, IterableDecoder, XmlDecoder]] = Field( + None, + description="Component decoding the response so records can be extracted.", + title="Decoder", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") class SubstreamPartitionRouter(BaseModel): - type: Literal['SubstreamPartitionRouter'] + type: Literal["SubstreamPartitionRouter"] parent_stream_configs: List[ParentStreamConfig] = Field( ..., - description='Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.', - title='Parent Stream Configs', + description="Specifies which parent streams are being iterated over and how parent records should be used to partition the child stream data set.", + title="Parent Stream Configs", ) - parameters: Optional[Dict[str, Any]] = Field(None, alias='$parameters') + parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters") CompositeErrorHandler.update_forward_refs() diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index f0eb4f38..e9420dfb 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -185,7 +185,6 @@ class ModelToComponentFactory: - EPOCH_DATETIME_FORMAT = "%s" def __init__( @@ -401,7 +400,9 @@ def create_legacy_to_per_partition_state_migration( if not hasattr(partition_router, "parent_stream_configs"): raise ValueError("LegacyToPerPartitionStateMigrations can only be applied with a parent stream configuration.") - return LegacyToPerPartitionStateMigration(declarative_stream.retriever.partition_router, declarative_stream.incremental_sync, config, declarative_stream.parameters) # type: ignore # The retriever type was already checked + return LegacyToPerPartitionStateMigration( + declarative_stream.retriever.partition_router, declarative_stream.incremental_sync, config, declarative_stream.parameters + ) # type: ignore # The retriever type was already checked def create_session_token_authenticator( self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any @@ -483,7 +484,6 @@ def create_concurrent_cursor_from_datetime_based_cursor( stream_state: MutableMapping[str, Any], **kwargs: Any, ) -> Tuple[ConcurrentCursor, DateTimeStreamStateConverter]: - component_type = component_definition.get("type") if component_definition.get("type") != model_type.__name__: raise ValueError(f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead") @@ -1474,7 +1474,6 @@ def create_async_retriever( transformations: List[RecordTransformation], **kwargs: Any, ) -> AsyncRetriever: - decoder = self._create_component_from_model(model=model.decoder, config=config) if model.decoder else JsonDecoder(parameters={}) record_selector = self._create_component_from_model( model=model.record_selector, diff --git a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py index 80bf6034..667b673d 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py @@ -141,7 +141,10 @@ def stream_slices(self) -> Iterable[StreamSlice]: partition_field = parent_stream_config.partition_field.eval(self.config) # type: ignore # partition_field is always casted to an interpolated string extra_fields = None if parent_stream_config.extra_fields: - extra_fields = [[field_path_part.eval(self.config) for field_path_part in field_path] for field_path in parent_stream_config.extra_fields] # type: ignore # extra_fields is always casted to an interpolated string + extra_fields = [ + [field_path_part.eval(self.config) for field_path_part in field_path] + for field_path in parent_stream_config.extra_fields + ] # type: ignore # extra_fields is always casted to an interpolated string # read_stateless() assumes the parent is not concurrent. This is currently okay since the concurrent CDK does # not support either substreams or RFR, but something that needs to be considered once we do diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py index 68ff5ecf..c255360f 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py @@ -98,14 +98,12 @@ class DefaultErrorHandler(ErrorHandler): backoff_strategies: Optional[List[BackoffStrategy]] = None def __post_init__(self, parameters: Mapping[str, Any]) -> None: - if not self.response_filters: self.response_filters = [HttpResponseFilter(config=self.config, parameters={})] self._last_request_to_attempt_count: MutableMapping[requests.PreparedRequest, int] = {} def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]]) -> ErrorResolution: - if self.response_filters: for response_filter in self.response_filters: matched_error_resolution = response_filter.matches(response_or_exception=response_or_exception) diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py index 4e3f5416..2a8eae72 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py @@ -12,11 +12,9 @@ class DefaultHttpResponseFilter(HttpResponseFilter): def matches(self, response_or_exception: Optional[Union[requests.Response, Exception]]) -> Optional[ErrorResolution]: - default_mapped_error_resolution = None if isinstance(response_or_exception, (requests.Response, Exception)): - mapped_key: Union[int, type] = ( response_or_exception.status_code if isinstance(response_or_exception, requests.Response) diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py index c452dcac..172e1521 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py @@ -42,7 +42,6 @@ class HttpResponseFilter: error_message: Union[InterpolatedString, str] = "" def __post_init__(self, parameters: Mapping[str, Any]) -> None: - if self.action is not None: if self.http_codes is None and self.predicate is None and self.error_message_contains is None: raise ValueError("HttpResponseFilter requires a filter condition if an action is specified") @@ -129,7 +128,14 @@ def _create_error_message(self, response: requests.Response) -> Optional[str]: return self.error_message.eval(self.config, response=self._safe_response_json(response), headers=response.headers) # type: ignore # error_message is always cast to an interpolated string def _response_matches_predicate(self, response: requests.Response) -> bool: - return bool(self.predicate.condition and self.predicate.eval(None, response=self._safe_response_json(response), headers=response.headers)) if self.predicate else False # type: ignore # predicate is always cast to an interpolated string + return ( + bool( + self.predicate.condition + and self.predicate.eval(None, response=self._safe_response_json(response), headers=response.headers) + ) + if self.predicate + else False + ) # type: ignore # predicate is always cast to an interpolated string def _response_contains_error_message(self, response: requests.Response) -> bool: if not self.error_message_contains: diff --git a/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte_cdk/sources/declarative/requesters/http_requester.py index 05d8bfa1..15d20981 100644 --- a/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -301,7 +301,6 @@ def send_request( request_body_json: Optional[Mapping[str, Any]] = None, log_formatter: Optional[Callable[[requests.Response], Any]] = None, ) -> Optional[requests.Response]: - request, response = self._http_client.send_request( http_method=self.get_method().value, url=self._join_url( diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py index d9e86afc..453940e7 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py @@ -74,5 +74,7 @@ def _get_request_options(self, option_type: RequestOptionType, stream_slice: Opt self._partition_field_start.eval(self.config) ) if self.end_time_option and self.end_time_option.inject_into == option_type: - options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(self._partition_field_end.eval(self.config)) # type: ignore # field_name is always casted to an interpolated string + options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( + self._partition_field_end.eval(self.config) + ) # type: ignore # field_name is always casted to an interpolated string return options diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py index 4a6c7a86..1880ce82 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py @@ -23,7 +23,6 @@ class InterpolatedNestedRequestInputProvider: _request_inputs: Optional[Union[str, NestedMapping]] = field(init=False, repr=False, default=None) def __post_init__(self, parameters: Mapping[str, Any]) -> None: - self._request_inputs = self.request_inputs or {} if isinstance(self._request_inputs, str): self._interpolator = InterpolatedString(self._request_inputs, default="", parameters=parameters) diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py index 868cd842..8a3ed7d8 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py @@ -23,7 +23,6 @@ class InterpolatedRequestInputProvider: _request_inputs: Optional[Union[str, Mapping[str, str]]] = field(init=False, repr=False, default=None) def __post_init__(self, parameters: Mapping[str, Any]) -> None: - self._request_inputs = self.request_inputs or {} if isinstance(self._request_inputs, str): self._interpolator = InterpolatedString(self._request_inputs, default="", parameters=parameters) diff --git a/airbyte_cdk/sources/declarative/retrievers/async_retriever.py b/airbyte_cdk/sources/declarative/retrievers/async_retriever.py index a9f9686e..886ef0f9 100644 --- a/airbyte_cdk/sources/declarative/retrievers/async_retriever.py +++ b/airbyte_cdk/sources/declarative/retrievers/async_retriever.py @@ -102,7 +102,6 @@ def read_records( records_schema: Mapping[str, Any], stream_slice: Optional[StreamSlice] = None, ) -> Iterable[StreamData]: - stream_state: StreamState = self._get_stream_state() partition: AsyncPartition = self._validate_and_get_stream_slice_partition(stream_slice) records: Iterable[Mapping[str, Any]] = self._job_orchestrator.fetch_records(partition) diff --git a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py index 079b1e2a..20ddd114 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py @@ -22,7 +22,9 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy def __init__(self, stream_reader: AbstractFileBasedStreamReader): self.stream_reader = stream_reader - def check_availability(self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]) -> Tuple[bool, Optional[str]]: # type: ignore[override] + def check_availability( + self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source] + ) -> Tuple[bool, Optional[str]]: # type: ignore[override] """ Perform a connection check for the stream (verify that we can list files from the stream). diff --git a/airbyte_cdk/sources/file_based/config/unstructured_format.py b/airbyte_cdk/sources/file_based/config/unstructured_format.py index 7858ae61..b799d1fe 100644 --- a/airbyte_cdk/sources/file_based/config/unstructured_format.py +++ b/airbyte_cdk/sources/file_based/config/unstructured_format.py @@ -85,7 +85,10 @@ class Config(OneOfOptionConfig): description="The strategy used to parse documents. `fast` extracts text directly from the document which doesn't work for all files. `ocr_only` is more reliable, but slower. `hi_res` is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf", ) - processing: Union[LocalProcessingConfigModel, APIProcessingConfigModel,] = Field( + processing: Union[ + LocalProcessingConfigModel, + APIProcessingConfigModel, + ] = Field( default=LocalProcessingConfigModel(mode="local"), title="Processing", description="Processing configuration", diff --git a/airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py b/airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py index ca264578..115382cf 100644 --- a/airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +++ b/airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py @@ -15,9 +15,7 @@ class AbstractDiscoveryPolicy(ABC): @property @abstractmethod - def n_concurrent_requests(self) -> int: - ... + def n_concurrent_requests(self) -> int: ... @abstractmethod - def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int: - ... + def get_max_n_files_for_schema_inference(self, parser: FileTypeParser) -> int: ... diff --git a/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py b/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py index f0603f4e..4772173f 100644 --- a/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py @@ -16,7 +16,6 @@ class JsonlParser(FileTypeParser): - MAX_BYTES_PER_FILE_FOR_SCHEMA_INFERENCE = 1_000_000 ENCODING = "utf8" diff --git a/airbyte_cdk/sources/file_based/file_types/parquet_parser.py b/airbyte_cdk/sources/file_based/file_types/parquet_parser.py index 7e3d3013..ed25ceb4 100644 --- a/airbyte_cdk/sources/file_based/file_types/parquet_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/parquet_parser.py @@ -20,7 +20,6 @@ class ParquetParser(FileTypeParser): - ENCODING = None def check_config(self, config: FileBasedStreamConfig) -> Tuple[bool, Optional[str]]: diff --git a/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py b/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py index 850c4c93..5c2393e9 100644 --- a/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +++ b/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py @@ -64,8 +64,7 @@ def __init__( @property @abstractmethod - def primary_key(self) -> PrimaryKeyType: - ... + def primary_key(self) -> PrimaryKeyType: ... @cache def list_files(self) -> List[RemoteFile]: diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py index d21a6a01..9cb3541c 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py @@ -24,45 +24,34 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: @property @abstractmethod - def state(self) -> MutableMapping[str, Any]: - ... + def state(self) -> MutableMapping[str, Any]: ... @abstractmethod - def observe(self, record: Record) -> None: - ... + def observe(self, record: Record) -> None: ... @abstractmethod - def close_partition(self, partition: Partition) -> None: - ... + def close_partition(self, partition: Partition) -> None: ... @abstractmethod - def set_pending_partitions(self, partitions: List["FileBasedStreamPartition"]) -> None: - ... + def set_pending_partitions(self, partitions: List["FileBasedStreamPartition"]) -> None: ... @abstractmethod - def add_file(self, file: RemoteFile) -> None: - ... + def add_file(self, file: RemoteFile) -> None: ... @abstractmethod - def get_files_to_sync(self, all_files: Iterable[RemoteFile], logger: logging.Logger) -> Iterable[RemoteFile]: - ... + def get_files_to_sync(self, all_files: Iterable[RemoteFile], logger: logging.Logger) -> Iterable[RemoteFile]: ... @abstractmethod - def get_state(self) -> MutableMapping[str, Any]: - ... + def get_state(self) -> MutableMapping[str, Any]: ... @abstractmethod - def set_initial_state(self, value: StreamState) -> None: - ... + def set_initial_state(self, value: StreamState) -> None: ... @abstractmethod - def get_start_time(self) -> datetime: - ... + def get_start_time(self) -> datetime: ... @abstractmethod - def emit_state_message(self) -> None: - ... + def emit_state_message(self) -> None: ... @abstractmethod - def ensure_at_least_one_state_emitted(self) -> None: - ... + def ensure_at_least_one_state_emitted(self) -> None: ... diff --git a/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py b/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py index 2b92f103..a69712ef 100644 --- a/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +++ b/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py @@ -33,7 +33,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin): - """ The default file-based stream. """ diff --git a/airbyte_cdk/sources/source.py b/airbyte_cdk/sources/source.py index 975770c8..c1d8ec66 100644 --- a/airbyte_cdk/sources/source.py +++ b/airbyte_cdk/sources/source.py @@ -27,12 +27,10 @@ class ExperimentalClassWarning(DeprecationWarning): class BaseSource(BaseConnector[TConfig], ABC, Generic[TConfig, TState, TCatalog]): @abstractmethod - def read_state(self, state_path: str) -> TState: - ... + def read_state(self, state_path: str) -> TState: ... @abstractmethod - def read_catalog(self, catalog_path: str) -> TCatalog: - ... + def read_catalog(self, catalog_path: str) -> TCatalog: ... @abstractmethod def read(self, logger: logging.Logger, config: TConfig, catalog: TCatalog, state: Optional[TState] = None) -> Iterable[AirbyteMessage]: diff --git a/airbyte_cdk/sources/streams/concurrent/cursor.py b/airbyte_cdk/sources/streams/concurrent/cursor.py index e212693b..d5b8fbca 100644 --- a/airbyte_cdk/sources/streams/concurrent/cursor.py +++ b/airbyte_cdk/sources/streams/concurrent/cursor.py @@ -62,8 +62,7 @@ def extract_value(self, record: Record) -> CursorValueType: class Cursor(ABC): @property @abstractmethod - def state(self) -> MutableMapping[str, Any]: - ... + def state(self) -> MutableMapping[str, Any]: ... @abstractmethod def observe(self, record: Record) -> None: diff --git a/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py b/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py index e80def36..60d8f17f 100644 --- a/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +++ b/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py @@ -155,5 +155,4 @@ def parse_value(self, value: Any) -> Any: @property @abstractmethod - def zero_value(self) -> Any: - ... + def zero_value(self) -> Any: ... diff --git a/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py b/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py index f6f181e6..a6a33fac 100644 --- a/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +++ b/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py @@ -28,8 +28,7 @@ def _to_state_message(self, value: Any) -> Any: @property @abstractmethod - def _zero_value(self) -> Any: - ... + def _zero_value(self) -> Any: ... @property def zero_value(self) -> datetime: @@ -40,16 +39,13 @@ def get_end_provider(cls) -> Callable[[], datetime]: return lambda: datetime.now(timezone.utc) @abstractmethod - def increment(self, timestamp: datetime) -> datetime: - ... + def increment(self, timestamp: datetime) -> datetime: ... @abstractmethod - def parse_timestamp(self, timestamp: Any) -> datetime: - ... + def parse_timestamp(self, timestamp: Any) -> datetime: ... @abstractmethod - def output_format(self, timestamp: datetime) -> Any: - ... + def output_format(self, timestamp: datetime) -> Any: ... def parse_value(self, value: Any) -> Any: """ diff --git a/airbyte_cdk/sources/streams/http/exceptions.py b/airbyte_cdk/sources/streams/http/exceptions.py index efa44165..3db57ffe 100644 --- a/airbyte_cdk/sources/streams/http/exceptions.py +++ b/airbyte_cdk/sources/streams/http/exceptions.py @@ -15,7 +15,6 @@ def __init__( response: Optional[Union[requests.Response, Exception]], error_message: str = "", ): - if isinstance(response, requests.Response): error_message = ( error_message or f"Request URL: {request.url}, Response Code: {response.status_code}, Response Text: {response.text}" diff --git a/airbyte_cdk/sources/streams/http/http.py b/airbyte_cdk/sources/streams/http/http.py index 6c552dda..a132702e 100644 --- a/airbyte_cdk/sources/streams/http/http.py +++ b/airbyte_cdk/sources/streams/http/http.py @@ -448,7 +448,6 @@ def _fetch_next_page( stream_state: Optional[Mapping[str, Any]] = None, next_page_token: Optional[Mapping[str, Any]] = None, ) -> Tuple[requests.PreparedRequest, requests.Response]: - request, response = self._http_client.send_request( http_method=self.http_method, url=self._join_url( diff --git a/airbyte_cdk/sources/streams/http/http_client.py b/airbyte_cdk/sources/streams/http/http_client.py index cccbc4b8..0b57b4ed 100644 --- a/airbyte_cdk/sources/streams/http/http_client.py +++ b/airbyte_cdk/sources/streams/http/http_client.py @@ -69,7 +69,6 @@ def __str__(self) -> str: class HttpClient: - _DEFAULT_MAX_RETRY: int = 5 _DEFAULT_MAX_TIME: int = 60 * 10 @@ -233,7 +232,9 @@ def _send_with_retry( rate_limit_backoff_handler = rate_limit_default_backoff_handler() backoff_handler = http_client_default_backoff_handler(max_tries=max_tries, max_time=max_time) # backoff handlers wrap _send, so it will always return a response - response = backoff_handler(rate_limit_backoff_handler(user_backoff_handler))(request, request_kwargs, log_formatter=log_formatter, exit_on_rate_limit=exit_on_rate_limit) # type: ignore # mypy can't infer that backoff_handler wraps _send + response = backoff_handler(rate_limit_backoff_handler(user_backoff_handler))( + request, request_kwargs, log_formatter=log_formatter, exit_on_rate_limit=exit_on_rate_limit + ) # type: ignore # mypy can't infer that backoff_handler wraps _send return response @@ -244,7 +245,6 @@ def _send( log_formatter: Optional[Callable[[requests.Response], Any]] = None, exit_on_rate_limit: Optional[bool] = False, ) -> requests.Response: - if request not in self._request_attempt_count: self._request_attempt_count[request] = 1 else: diff --git a/airbyte_cdk/test/catalog_builder.py b/airbyte_cdk/test/catalog_builder.py index 235be7c5..ac02a561 100644 --- a/airbyte_cdk/test/catalog_builder.py +++ b/airbyte_cdk/test/catalog_builder.py @@ -45,12 +45,10 @@ def __init__(self) -> None: self._streams: List[ConfiguredAirbyteStreamBuilder] = [] @overload - def with_stream(self, name: ConfiguredAirbyteStreamBuilder) -> "CatalogBuilder": - ... + def with_stream(self, name: ConfiguredAirbyteStreamBuilder) -> "CatalogBuilder": ... @overload - def with_stream(self, name: str, sync_mode: SyncMode) -> "CatalogBuilder": - ... + def with_stream(self, name: str, sync_mode: SyncMode) -> "CatalogBuilder": ... def with_stream(self, name: Union[str, ConfiguredAirbyteStreamBuilder], sync_mode: Union[SyncMode, None] = None) -> "CatalogBuilder": # As we are introducing a fully fledge ConfiguredAirbyteStreamBuilder, we would like to deprecate the previous interface diff --git a/airbyte_cdk/utils/message_utils.py b/airbyte_cdk/utils/message_utils.py index a862d469..f9c7b65d 100644 --- a/airbyte_cdk/utils/message_utils.py +++ b/airbyte_cdk/utils/message_utils.py @@ -12,7 +12,8 @@ def get_stream_descriptor(message: AirbyteMessage) -> HashableStreamDescriptor: if not message.state.stream or not message.state.stream.stream_descriptor: # type: ignore[union-attr] # state has `stream` raise ValueError("State message was not in per-stream state format, which is required for record counts.") return HashableStreamDescriptor( - name=message.state.stream.stream_descriptor.name, namespace=message.state.stream.stream_descriptor.namespace # type: ignore[union-attr] # state has `stream` + name=message.state.stream.stream_descriptor.name, + namespace=message.state.stream.stream_descriptor.namespace, # type: ignore[union-attr] # state has `stream` ) case _: raise NotImplementedError(f"get_stream_descriptor is not implemented for message type '{message.type}'.") diff --git a/airbyte_cdk/utils/traced_exception.py b/airbyte_cdk/utils/traced_exception.py index bd96ea39..bdc975e9 100644 --- a/airbyte_cdk/utils/traced_exception.py +++ b/airbyte_cdk/utils/traced_exception.py @@ -91,7 +91,9 @@ def emit_message(self) -> None: print(filtered_message) @classmethod - def from_exception(cls, exc: BaseException, stream_descriptor: Optional[StreamDescriptor] = None, *args, **kwargs) -> "AirbyteTracedException": # type: ignore # ignoring because of args and kwargs + def from_exception( + cls, exc: BaseException, stream_descriptor: Optional[StreamDescriptor] = None, *args, **kwargs + ) -> "AirbyteTracedException": # type: ignore # ignoring because of args and kwargs """ Helper to create an AirbyteTracedException from an existing exception :param exc: the exception that caused the error diff --git a/bin/generate_component_manifest_files.py b/bin/generate_component_manifest_files.py index 152486f9..6a595934 100755 --- a/bin/generate_component_manifest_files.py +++ b/bin/generate_component_manifest_files.py @@ -45,7 +45,6 @@ async def main(): init_module_content = generate_init_module_content() async with dagger.Connection(dagger.Config(log_output=sys.stderr)) as dagger_client: - codegen_container = ( dagger_client.container() .from_(PYTHON_IMAGE) @@ -70,7 +69,7 @@ async def main(): use_entrypoint=True, ) - await ((await post_process_codegen(codegen_container)).directory("/generated_post_processed").export(LOCAL_OUTPUT_DIR_PATH)) + await (await post_process_codegen(codegen_container)).directory("/generated_post_processed").export(LOCAL_OUTPUT_DIR_PATH) anyio.run(main) diff --git a/unit_tests/conftest.py b/unit_tests/conftest.py index ab0c9bb8..e40ddd21 100644 --- a/unit_tests/conftest.py +++ b/unit_tests/conftest.py @@ -16,9 +16,7 @@ def mock_sleep(monkeypatch): def pytest_addoption(parser): - parser.addoption( - "--skipslow", action="store_true", default=False, help="skip slow tests" - ) + parser.addoption("--skipslow", action="store_true", default=False, help="skip slow tests") def pytest_configure(config): diff --git a/unit_tests/connector_builder/test_connector_builder_handler.py b/unit_tests/connector_builder/test_connector_builder_handler.py index 7189c802..0212466d 100644 --- a/unit_tests/connector_builder/test_connector_builder_handler.py +++ b/unit_tests/connector_builder/test_connector_builder_handler.py @@ -101,8 +101,7 @@ "values": ["0", "1", "2", "3", "4", "5", "6", "7"], "cursor_field": "item_id", }, - "" - "requester": { + "" "requester": { "path": "/v3/marketing/lists", "authenticator": {"type": "BearerAuthenticator", "api_token": "{{ config.apikey }}"}, "request_parameters": {"a_param": "10"}, @@ -150,8 +149,7 @@ "values": ["0", "1", "2", "3", "4", "5", "6", "7"], "cursor_field": "item_id", }, - "" - "requester": { + "" "requester": { "path": "/v3/marketing/lists", "authenticator": {"type": "OAuthAuthenticator", "api_token": "{{ config.apikey }}"}, "request_parameters": {"a_param": "10"}, @@ -280,13 +278,17 @@ def _mocked_send(self, request, **kwargs) -> requests.Response: def test_handle_resolve_manifest(valid_resolve_manifest_config_file, dummy_catalog): - with mock.patch.object(connector_builder.main, "handle_connector_builder_request", return_value=AirbyteMessage(type=MessageType.RECORD)) as patched_handle: + with mock.patch.object( + connector_builder.main, "handle_connector_builder_request", return_value=AirbyteMessage(type=MessageType.RECORD) + ) as patched_handle: handle_request(["read", "--config", str(valid_resolve_manifest_config_file), "--catalog", str(dummy_catalog)]) assert patched_handle.call_count == 1 def test_handle_test_read(valid_read_config_file, configured_catalog): - with mock.patch.object(connector_builder.main, "handle_connector_builder_request", return_value=AirbyteMessage(type=MessageType.RECORD)) as patch: + with mock.patch.object( + connector_builder.main, "handle_connector_builder_request", return_value=AirbyteMessage(type=MessageType.RECORD) + ) as patch: handle_request(["read", "--config", str(valid_read_config_file), "--catalog", str(configured_catalog)]) assert patch.call_count == 1 diff --git a/unit_tests/destinations/test_destination.py b/unit_tests/destinations/test_destination.py index a03d7ffc..3620b671 100644 --- a/unit_tests/destinations/test_destination.py +++ b/unit_tests/destinations/test_destination.py @@ -98,7 +98,7 @@ def write_file(path: PathLike, content: Union[str, Mapping]): def _wrapped( - msg: Union[AirbyteRecordMessage, AirbyteStateMessage, AirbyteCatalog, ConnectorSpecification, AirbyteConnectionStatus] + msg: Union[AirbyteRecordMessage, AirbyteStateMessage, AirbyteCatalog, ConnectorSpecification, AirbyteConnectionStatus], ) -> AirbyteMessage: if isinstance(msg, AirbyteRecordMessage): return AirbyteMessage(type=Type.RECORD, record=msg) @@ -240,7 +240,10 @@ def test_run_write(self, mocker, destination: Destination, tmp_path, monkeypatch expected_write_result = [_wrapped(_state({"k1": "v1"})), _wrapped(_state({"k2": "v2"}))] mocker.patch.object( - destination, "write", return_value=iter(expected_write_result), autospec=True # convert to iterator to mimic real usage + destination, + "write", + return_value=iter(expected_write_result), + autospec=True, # convert to iterator to mimic real usage ) spec_msg = ConnectorSpecification(connectionSpecification={}) mocker.patch.object(destination, "spec", return_value=spec_msg) diff --git a/unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py b/unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py index 22c5d34a..38e87d67 100644 --- a/unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +++ b/unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py @@ -41,9 +41,7 @@ def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> def streams(self, config: Mapping[str, Any]) -> List[Stream]: return [ - self.convert_to_concurrent_stream(self._logger, s, Mock()) - if is_concurrent - else s + self.convert_to_concurrent_stream(self._logger, s, Mock()) if is_concurrent else s for s, is_concurrent in self._streams_to_is_concurrent.items() ] diff --git a/unit_tests/sources/declarative/async_job/test_integration.py b/unit_tests/sources/declarative/async_job/test_integration.py index 7fbd04b9..b3d4f095 100644 --- a/unit_tests/sources/declarative/async_job/test_integration.py +++ b/unit_tests/sources/declarative/async_job/test_integration.py @@ -28,7 +28,6 @@ class MockAsyncJobRepository(AsyncJobRepository): - def start(self, stream_slice: StreamSlice) -> AsyncJob: return AsyncJob("a_job_id", StreamSlice(partition={}, cursor_slice={})) @@ -47,7 +46,6 @@ def delete(self, job: AsyncJob) -> None: class MockSource(AbstractSource): - def __init__(self, stream_slicer: Optional[StreamSlicer] = None) -> None: self._stream_slicer = SinglePartitionRouter({}) if stream_slicer is None else stream_slicer self._message_repository = NoopMessageRepository() @@ -65,7 +63,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: parameters={}, schema_normalization=TypeTransformer(TransformConfig.NoTransform), record_filter=None, - transformations=[] + transformations=[], ) return [ DeclarativeStream( @@ -75,7 +73,10 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: record_selector=noop_record_selector, stream_slicer=self._stream_slicer, job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator( - MockAsyncJobRepository(), stream_slices, JobTracker(_NO_LIMIT), self._message_repository, + MockAsyncJobRepository(), + stream_slices, + JobTracker(_NO_LIMIT), + self._message_repository, ), ), config={}, @@ -100,9 +101,7 @@ def setUp(self) -> None: def test_when_read_then_return_records_from_repository(self) -> None: output = read( - self._source, - self._CONFIG, - CatalogBuilder().with_stream(ConfiguredAirbyteStreamBuilder().with_name(_A_STREAM_NAME)).build() + self._source, self._CONFIG, CatalogBuilder().with_stream(ConfiguredAirbyteStreamBuilder().with_name(_A_STREAM_NAME)).build() ) assert len(output.records) == 1 @@ -112,9 +111,7 @@ def test_when_read_then_call_stream_slices_only_once(self) -> None: As generating stream slices is very expensive, we want to ensure that during a read, it is only called once. """ output = read( - self._source, - self._CONFIG, - CatalogBuilder().with_stream(ConfiguredAirbyteStreamBuilder().with_name(_A_STREAM_NAME)).build() + self._source, self._CONFIG, CatalogBuilder().with_stream(ConfiguredAirbyteStreamBuilder().with_name(_A_STREAM_NAME)).build() ) assert not output.errors diff --git a/unit_tests/sources/declarative/async_job/test_job_orchestrator.py b/unit_tests/sources/declarative/async_job/test_job_orchestrator.py index 7f10bb3a..5eb8d569 100644 --- a/unit_tests/sources/declarative/async_job/test_job_orchestrator.py +++ b/unit_tests/sources/declarative/async_job/test_job_orchestrator.py @@ -77,14 +77,12 @@ def setUp(self) -> None: self._job_for_another_slice = self._an_async_job("another api job id", _ANOTHER_STREAM_SLICE) @mock.patch(sleep_mock_target) - def test_when_create_and_get_completed_partitions_then_create_job_and_update_status_until_completed(self, mock_sleep: MagicMock) -> None: + def test_when_create_and_get_completed_partitions_then_create_job_and_update_status_until_completed( + self, mock_sleep: MagicMock + ) -> None: self._job_repository.start.return_value = self._job_for_a_slice status_updates = [AsyncJobStatus.RUNNING, AsyncJobStatus.RUNNING, AsyncJobStatus.COMPLETED] - self._job_repository.update_jobs_status.side_effect = _status_update_per_jobs( - { - self._job_for_a_slice: status_updates - } - ) + self._job_repository.update_jobs_status.side_effect = _status_update_per_jobs({self._job_for_a_slice: status_updates}) orchestrator = self._orchestrator([_A_STREAM_SLICE]) partitions = list(orchestrator.create_and_get_completed_partitions()) @@ -94,7 +92,9 @@ def test_when_create_and_get_completed_partitions_then_create_job_and_update_sta assert self._job_for_a_slice.update_status.mock_calls == [call(status) for status in status_updates] @mock.patch(sleep_mock_target) - def test_given_one_job_still_running_when_create_and_get_completed_partitions_then_only_update_running_job_status(self, mock_sleep: MagicMock) -> None: + def test_given_one_job_still_running_when_create_and_get_completed_partitions_then_only_update_running_job_status( + self, mock_sleep: MagicMock + ) -> None: self._job_repository.start.side_effect = [self._job_for_a_slice, self._job_for_another_slice] self._job_repository.update_jobs_status.side_effect = _status_update_per_jobs( { @@ -112,14 +112,12 @@ def test_given_one_job_still_running_when_create_and_get_completed_partitions_th ] @mock.patch(sleep_mock_target) - def test_given_timeout_when_create_and_get_completed_partitions_then_free_budget_and_raise_exception(self, mock_sleep: MagicMock) -> None: + def test_given_timeout_when_create_and_get_completed_partitions_then_free_budget_and_raise_exception( + self, mock_sleep: MagicMock + ) -> None: job_tracker = JobTracker(1) self._job_repository.start.return_value = self._job_for_a_slice - self._job_repository.update_jobs_status.side_effect = _status_update_per_jobs( - { - self._job_for_a_slice: [AsyncJobStatus.TIMED_OUT] - } - ) + self._job_repository.update_jobs_status.side_effect = _status_update_per_jobs({self._job_for_a_slice: [AsyncJobStatus.TIMED_OUT]}) orchestrator = self._orchestrator([_A_STREAM_SLICE], job_tracker) with pytest.raises(AirbyteTracedException): @@ -130,11 +128,7 @@ def test_given_timeout_when_create_and_get_completed_partitions_then_free_budget @mock.patch(sleep_mock_target) def test_given_failure_when_create_and_get_completed_partitions_then_raise_exception(self, mock_sleep: MagicMock) -> None: self._job_repository.start.return_value = self._job_for_a_slice - self._job_repository.update_jobs_status.side_effect = _status_update_per_jobs( - { - self._job_for_a_slice: [AsyncJobStatus.FAILED] - } - ) + self._job_repository.update_jobs_status.side_effect = _status_update_per_jobs({self._job_for_a_slice: [AsyncJobStatus.FAILED]}) orchestrator = self._orchestrator([_A_STREAM_SLICE]) with pytest.raises(AirbyteTracedException): @@ -158,7 +152,9 @@ def _orchestrator(self, slices: List[StreamSlice], job_tracker: Optional[JobTrac job_tracker = job_tracker if job_tracker else JobTracker(_NO_JOB_LIMIT) return AsyncJobOrchestrator(self._job_repository, slices, job_tracker, self._message_repository) - def test_given_more_jobs_than_limit_when_create_and_get_completed_partitions_then_still_return_all_slices_and_free_job_budget(self) -> None: + def test_given_more_jobs_than_limit_when_create_and_get_completed_partitions_then_still_return_all_slices_and_free_job_budget( + self, + ) -> None: job_tracker = JobTracker(1) self._job_repository.start.side_effect = [self._job_for_a_slice, self._job_for_another_slice] self._job_repository.update_jobs_status.side_effect = _status_update_per_jobs( @@ -167,7 +163,9 @@ def test_given_more_jobs_than_limit_when_create_and_get_completed_partitions_the self._job_for_another_slice: [AsyncJobStatus.COMPLETED], } ) - orchestrator = self._orchestrator([self._job_for_a_slice.job_parameters(), self._job_for_another_slice.job_parameters()], job_tracker) + orchestrator = self._orchestrator( + [self._job_for_a_slice.job_parameters(), self._job_for_another_slice.job_parameters()], job_tracker + ) partitions = list(orchestrator.create_and_get_completed_partitions()) @@ -196,7 +194,9 @@ def test_given_traced_config_error_when_start_job_and_raise_this_exception_and_a Since this is a config error, we assume the other jobs will fail for the same reasons. """ job_tracker = JobTracker(1) - self._job_repository.start.side_effect = MessageRepresentationAirbyteTracedErrors("Can't create job", failure_type=FailureType.config_error) + self._job_repository.start.side_effect = MessageRepresentationAirbyteTracedErrors( + "Can't create job", failure_type=FailureType.config_error + ) orchestrator = AsyncJobOrchestrator(self._job_repository, [_A_STREAM_SLICE], job_tracker, self._message_repository, [ValueError]) @@ -240,7 +240,9 @@ def test_given_exception_when_start_job_and_skip_this_exception(self, mock_sleep assert exception.failure_type == FailureType.config_error # type: ignore # exception should be of type AirbyteTracedException @mock.patch(sleep_mock_target) - def test_given_jobs_failed_more_than_max_attempts_when_create_and_get_completed_partitions_then_free_job_budget(self, mock_sleep: MagicMock) -> None: + def test_given_jobs_failed_more_than_max_attempts_when_create_and_get_completed_partitions_then_free_job_budget( + self, mock_sleep: MagicMock + ) -> None: job_tracker = JobTracker(1) jobs = [self._an_async_job(str(i), _A_STREAM_SLICE) for i in range(_MAX_NUMBER_OF_ATTEMPTS)] self._job_repository.start.side_effect = jobs @@ -262,11 +264,10 @@ def wait_and_free_intent(_job_tracker: JobTracker, _intent_to_free: str) -> None time.sleep(1) print("Waiting done, freeing budget!") _job_tracker.remove_job(_intent_to_free) + self._job_repository.start.return_value = self._job_for_a_slice self._job_repository.update_jobs_status.side_effect = _status_update_per_jobs( - { - self._job_for_a_slice: [AsyncJobStatus.COMPLETED] * _BUFFER - } + {self._job_for_a_slice: [AsyncJobStatus.COMPLETED] * _BUFFER} ) orchestrator = self._orchestrator([_A_STREAM_SLICE], job_tracker) @@ -292,7 +293,9 @@ def _mock_repository(self) -> None: def _an_async_job(self, job_id: str, stream_slice: StreamSlice) -> AsyncJob: return mock.Mock(wraps=AsyncJob(job_id, stream_slice)) - def _accumulate_create_and_get_completed_partitions(self, orchestrator: AsyncJobOrchestrator) -> Tuple[List[AsyncPartition], Optional[Exception]]: + def _accumulate_create_and_get_completed_partitions( + self, orchestrator: AsyncJobOrchestrator + ) -> Tuple[List[AsyncPartition], Optional[Exception]]: result = [] try: for i in orchestrator.create_and_get_completed_partitions(): diff --git a/unit_tests/sources/declarative/concurrency_level/test_concurrency_level.py b/unit_tests/sources/declarative/concurrency_level/test_concurrency_level.py index 0195a71c..b806edba 100644 --- a/unit_tests/sources/declarative/concurrency_level/test_concurrency_level.py +++ b/unit_tests/sources/declarative/concurrency_level/test_concurrency_level.py @@ -19,10 +19,7 @@ def test_stream_slices(default_concurrency: Union[int, str], max_concurrency: int, expected_concurrency: int) -> None: config = {"num_workers": 50} concurrency_level = ConcurrencyLevel( - default_concurrency=default_concurrency, - max_concurrency=max_concurrency, - config=config, - parameters={} + default_concurrency=default_concurrency, max_concurrency=max_concurrency, config=config, parameters={} ) actual_concurrency = concurrency_level.get_concurrency_level() @@ -39,15 +36,12 @@ def test_stream_slices(default_concurrency: Union[int, str], max_concurrency: in ], ) def test_default_concurrency_input_types_and_errors( - config: Mapping[str, Any], - expected_concurrency: Optional[int], - expected_error: Optional[Type[Exception]], + config: Mapping[str, Any], + expected_concurrency: Optional[int], + expected_error: Optional[Type[Exception]], ) -> None: concurrency_level = ConcurrencyLevel( - default_concurrency="{{ config['num_workers'] or 30 }}", - max_concurrency=65, - config=config, - parameters={} + default_concurrency="{{ config['num_workers'] or 30 }}", max_concurrency=65, config=config, parameters={} ) if expected_error: @@ -63,9 +57,4 @@ def test_max_concurrency_is_required_for_default_concurrency_using_config() -> N config = {"num_workers": "50"} with pytest.raises(ValueError): - ConcurrencyLevel( - default_concurrency="{{ config['num_workers'] or 40 }}", - max_concurrency=None, - config=config, - parameters={} - ) + ConcurrencyLevel(default_concurrency="{{ config['num_workers'] or 40 }}", max_concurrency=None, config=config, parameters={}) diff --git a/unit_tests/sources/declarative/decoders/test_pagination_decoder_decorator.py b/unit_tests/sources/declarative/decoders/test_pagination_decoder_decorator.py index f440a2b1..70fc26d1 100644 --- a/unit_tests/sources/declarative/decoders/test_pagination_decoder_decorator.py +++ b/unit_tests/sources/declarative/decoders/test_pagination_decoder_decorator.py @@ -11,16 +11,10 @@ def is_stream_response(self) -> bool: return True -@pytest.mark.parametrize( - "decoder_class, expected", - [ - (StreamingJsonDecoder, {}), - (JsonDecoder, {"data": [{"id": 1}, {"id": 2}]}) - ] -) +@pytest.mark.parametrize("decoder_class, expected", [(StreamingJsonDecoder, {}), (JsonDecoder, {"data": [{"id": 1}, {"id": 2}]})]) def test_pagination_decoder_decorator(requests_mock, decoder_class, expected): decoder = PaginationDecoderDecorator(decoder=decoder_class(parameters={})) - response_body = "{\"data\": [{\"id\": 1}, {\"id\": 2}]}" + response_body = '{"data": [{"id": 1}, {"id": 2}]}' requests_mock.register_uri("GET", "https://airbyte.io/", text=response_body) response = requests.get("https://airbyte.io/") assert next(decoder.decode(response)) == expected diff --git a/unit_tests/sources/declarative/decoders/test_xml_decoder.py b/unit_tests/sources/declarative/decoders/test_xml_decoder.py index 87c78dae..43e970ad 100644 --- a/unit_tests/sources/declarative/decoders/test_xml_decoder.py +++ b/unit_tests/sources/declarative/decoders/test_xml_decoder.py @@ -7,30 +7,21 @@ @pytest.mark.parametrize( - "response_body, expected", - [ - ( - "", - {"item": {"@name": "item_1"}} - ), - ( - "Item 1Item 2", - {"data": {"item": [{"@name": "item_1", "#text": "Item 1"}, {"@name": "item_2", "#text": "Item 2"}]}} - ), - ( - None, - {} - ), - ( - "", - {} - ), - ( - "1Item 1", - {'item': {'@xmlns:ns': 'https://airbyte.io', 'ns:id': '1', 'ns:name': 'Item 1'}} - ) - ], - ids=["one_element_response", "multi_element_response", "empty_response", "malformed_xml_response", "xml_with_namespace_response"] + "response_body, expected", + [ + ('', {"item": {"@name": "item_1"}}), + ( + 'Item 1Item 2', + {"data": {"item": [{"@name": "item_1", "#text": "Item 1"}, {"@name": "item_2", "#text": "Item 2"}]}}, + ), + (None, {}), + ('', {}), + ( + '1Item 1', + {"item": {"@xmlns:ns": "https://airbyte.io", "ns:id": "1", "ns:name": "Item 1"}}, + ), + ], + ids=["one_element_response", "multi_element_response", "empty_response", "malformed_xml_response", "xml_with_namespace_response"], ) def test_xml_decoder(requests_mock, response_body, expected): requests_mock.register_uri("GET", "https://airbyte.io/", text=response_body) diff --git a/unit_tests/sources/declarative/extractors/test_record_filter.py b/unit_tests/sources/declarative/extractors/test_record_filter.py index 498f61b7..5e73d78e 100644 --- a/unit_tests/sources/declarative/extractors/test_record_filter.py +++ b/unit_tests/sources/declarative/extractors/test_record_filter.py @@ -47,29 +47,29 @@ "filter_template, records, expected_records", [ ( - "{{ record['created_at'] > stream_state['created_at'] }}", - [{"id": 1, "created_at": "06-06-21"}, {"id": 2, "created_at": "06-07-21"}, {"id": 3, "created_at": "06-08-21"}], - [{"id": 2, "created_at": "06-07-21"}, {"id": 3, "created_at": "06-08-21"}], + "{{ record['created_at'] > stream_state['created_at'] }}", + [{"id": 1, "created_at": "06-06-21"}, {"id": 2, "created_at": "06-07-21"}, {"id": 3, "created_at": "06-08-21"}], + [{"id": 2, "created_at": "06-07-21"}, {"id": 3, "created_at": "06-08-21"}], ), ( - "{{ record['last_seen'] >= stream_slice['last_seen'] }}", - [{"id": 1, "last_seen": "06-06-21"}, {"id": 2, "last_seen": "06-07-21"}, {"id": 3, "last_seen": "06-10-21"}], - [{"id": 3, "last_seen": "06-10-21"}], + "{{ record['last_seen'] >= stream_slice['last_seen'] }}", + [{"id": 1, "last_seen": "06-06-21"}, {"id": 2, "last_seen": "06-07-21"}, {"id": 3, "last_seen": "06-10-21"}], + [{"id": 3, "last_seen": "06-10-21"}], ), ( - "{{ record['id'] >= next_page_token['last_seen_id'] }}", - [{"id": 11}, {"id": 12}, {"id": 13}, {"id": 14}, {"id": 15}], - [{"id": 14}, {"id": 15}], + "{{ record['id'] >= next_page_token['last_seen_id'] }}", + [{"id": 11}, {"id": 12}, {"id": 13}, {"id": 14}, {"id": 15}], + [{"id": 14}, {"id": 15}], ), ( - "{{ record['id'] >= next_page_token['path_to_nowhere'] }}", - [{"id": 11}, {"id": 12}, {"id": 13}, {"id": 14}, {"id": 15}], - [], + "{{ record['id'] >= next_page_token['path_to_nowhere'] }}", + [{"id": 11}, {"id": 12}, {"id": 13}, {"id": 14}, {"id": 15}], + [], ), ( - "{{ record['created_at'] > parameters['created_at'] }}", - [{"id": 1, "created_at": "06-06-21"}, {"id": 2, "created_at": "06-07-21"}, {"id": 3, "created_at": "06-08-21"}], - [{"id": 3, "created_at": "06-08-21"}], + "{{ record['created_at'] > parameters['created_at'] }}", + [{"id": 1, "created_at": "06-06-21"}, {"id": 2, "created_at": "06-07-21"}, {"id": 3, "created_at": "06-08-21"}], + [{"id": 3, "created_at": "06-08-21"}], ), ( "{{ record['created_at'] > stream_slice.extra_fields['created_at'] }}", @@ -111,54 +111,54 @@ def test_record_filter(filter_template: str, records: List[Mapping], expected_re (DATE_TIME_WITH_TZ_FORMAT, {}, None, "2021-01-05T00:00:00+00:00", RECORDS_TO_FILTER_DATE_TIME_WITH_TZ_FORMAT, [2, 3]), (DATE_TIME_WITH_TZ_FORMAT, {}, None, None, RECORDS_TO_FILTER_DATE_TIME_WITH_TZ_FORMAT, [2, 3, 4]), ( - DATE_TIME_WITH_TZ_FORMAT, - {"created_at": "2021-01-04T00:00:00+00:00"}, - None, - "2021-01-05T00:00:00+00:00", - RECORDS_TO_FILTER_DATE_TIME_WITH_TZ_FORMAT, - [3], + DATE_TIME_WITH_TZ_FORMAT, + {"created_at": "2021-01-04T00:00:00+00:00"}, + None, + "2021-01-05T00:00:00+00:00", + RECORDS_TO_FILTER_DATE_TIME_WITH_TZ_FORMAT, + [3], ), ( - DATE_TIME_WITH_TZ_FORMAT, - {"created_at": "2021-01-04T00:00:00+00:00"}, - None, - None, - RECORDS_TO_FILTER_DATE_TIME_WITH_TZ_FORMAT, - [3, 4], + DATE_TIME_WITH_TZ_FORMAT, + {"created_at": "2021-01-04T00:00:00+00:00"}, + None, + None, + RECORDS_TO_FILTER_DATE_TIME_WITH_TZ_FORMAT, + [3, 4], ), ( - DATE_TIME_WITH_TZ_FORMAT, - {}, - "{{ record['id'] % 2 == 1 }}", - "2021-01-05T00:00:00+00:00", - RECORDS_TO_FILTER_DATE_TIME_WITH_TZ_FORMAT, - [3], + DATE_TIME_WITH_TZ_FORMAT, + {}, + "{{ record['id'] % 2 == 1 }}", + "2021-01-05T00:00:00+00:00", + RECORDS_TO_FILTER_DATE_TIME_WITH_TZ_FORMAT, + [3], ), (DATE_TIME_WITHOUT_TZ_FORMAT, {}, None, "2021-01-05T00:00:00", RECORDS_TO_FILTER_DATE_TIME_WITHOUT_TZ_FORMAT, [2, 3]), (DATE_TIME_WITHOUT_TZ_FORMAT, {}, None, None, RECORDS_TO_FILTER_DATE_TIME_WITHOUT_TZ_FORMAT, [2, 3, 4]), ( - DATE_TIME_WITHOUT_TZ_FORMAT, - {"created_at": "2021-01-04T00:00:00"}, - None, - "2021-01-05T00:00:00", - RECORDS_TO_FILTER_DATE_TIME_WITHOUT_TZ_FORMAT, - [3], + DATE_TIME_WITHOUT_TZ_FORMAT, + {"created_at": "2021-01-04T00:00:00"}, + None, + "2021-01-05T00:00:00", + RECORDS_TO_FILTER_DATE_TIME_WITHOUT_TZ_FORMAT, + [3], ), ( - DATE_TIME_WITHOUT_TZ_FORMAT, - {"created_at": "2021-01-04T00:00:00"}, - None, - None, - RECORDS_TO_FILTER_DATE_TIME_WITHOUT_TZ_FORMAT, - [3, 4], + DATE_TIME_WITHOUT_TZ_FORMAT, + {"created_at": "2021-01-04T00:00:00"}, + None, + None, + RECORDS_TO_FILTER_DATE_TIME_WITHOUT_TZ_FORMAT, + [3, 4], ), ( - DATE_TIME_WITHOUT_TZ_FORMAT, - {}, - "{{ record['id'] % 2 == 1 }}", - "2021-01-05T00:00:00", - RECORDS_TO_FILTER_DATE_TIME_WITHOUT_TZ_FORMAT, - [3], + DATE_TIME_WITHOUT_TZ_FORMAT, + {}, + "{{ record['id'] % 2 == 1 }}", + "2021-01-05T00:00:00", + RECORDS_TO_FILTER_DATE_TIME_WITHOUT_TZ_FORMAT, + [3], ), ], ids=[ @@ -180,12 +180,12 @@ def test_record_filter(filter_template: str, records: List[Mapping], expected_re ], ) def test_client_side_record_filter_decorator_no_parent_stream( - datetime_format: str, - stream_state: Optional[Mapping], - record_filter_expression: str, - end_datetime: Optional[str], - records_to_filter: List[Mapping], - expected_record_ids: List[int], + datetime_format: str, + stream_state: Optional[Mapping], + record_filter_expression: str, + end_datetime: Optional[str], + records_to_filter: List[Mapping], + expected_record_ids: List[int], ): date_time_based_cursor = DatetimeBasedCursor( start_datetime=MinMaxDatetime(datetime="2021-01-01", datetime_format=DATE_FORMAT, parameters={}), @@ -218,80 +218,52 @@ def test_client_side_record_filter_decorator_no_parent_stream( "stream_state, cursor_type, expected_record_ids", [ # Use only DatetimeBasedCursor - ({}, 'datetime', [2, 3, 5]), + ({}, "datetime", [2, 3, 5]), # Use GlobalSubstreamCursor with no state - ({}, 'global_substream', [2, 3, 5]), + ({}, "global_substream", [2, 3, 5]), # Use GlobalSubstreamCursor with global state - ( - { - 'state': {'created_at': '2021-01-03'} - }, - 'global_substream', - [2, 3] - ), + ({"state": {"created_at": "2021-01-03"}}, "global_substream", [2, 3]), # Use PerPartitionWithGlobalCursor with partition state ( - { - 'use_global_cursor': False, - 'state': {'created_at': '2021-01-10'}, - 'states': [ - { - 'partition': {'id': 'some_parent_id', 'parent_slice': {}}, - 'cursor': {'created_at': '2021-01-03'} - } - ] - }, - 'per_partition_with_global', - [2, 3] + { + "use_global_cursor": False, + "state": {"created_at": "2021-01-10"}, + "states": [{"partition": {"id": "some_parent_id", "parent_slice": {}}, "cursor": {"created_at": "2021-01-03"}}], + }, + "per_partition_with_global", + [2, 3], ), # Use PerPartitionWithGlobalCursor with global state ( - { - 'use_global_cursor': True, - 'state': {'created_at': '2021-01-03'}, - 'states': [ - { - 'partition': {'id': 'some_parent_id', 'parent_slice': {}}, - 'cursor': {'created_at': '2021-01-13'} - } - ] - }, - 'per_partition_with_global', - [2, 3] + { + "use_global_cursor": True, + "state": {"created_at": "2021-01-03"}, + "states": [{"partition": {"id": "some_parent_id", "parent_slice": {}}, "cursor": {"created_at": "2021-01-13"}}], + }, + "per_partition_with_global", + [2, 3], ), # Use PerPartitionWithGlobalCursor with partition state missing, global cursor used - ( - { - 'use_global_cursor': True, - 'state': {'created_at': '2021-01-03'} - }, - 'per_partition_with_global', - [2, 3] - ), + ({"use_global_cursor": True, "state": {"created_at": "2021-01-03"}}, "per_partition_with_global", [2, 3]), # Use PerPartitionWithGlobalCursor with partition state missing, global cursor not used ( - { - 'use_global_cursor': False, - 'state': {'created_at': '2021-01-03'} - }, - 'per_partition_with_global', - [2, 3, 5] # Global cursor not used, start date used + {"use_global_cursor": False, "state": {"created_at": "2021-01-03"}}, + "per_partition_with_global", + [2, 3, 5], # Global cursor not used, start date used ), ], ids=[ - 'datetime_cursor_only', - 'global_substream_no_state', - 'global_substream_with_state', - 'per_partition_with_partition_state', - 'per_partition_with_global_state', - 'per_partition_partition_missing_global_cursor_used', - 'per_partition_partition_missing_global_cursor_not_used', - ] + "datetime_cursor_only", + "global_substream_no_state", + "global_substream_with_state", + "per_partition_with_partition_state", + "per_partition_with_global_state", + "per_partition_partition_missing_global_cursor_used", + "per_partition_partition_missing_global_cursor_not_used", + ], ) def test_client_side_record_filter_decorator_with_cursor_types( - stream_state: Optional[Mapping], - cursor_type: str, - expected_record_ids: List[int] + stream_state: Optional[Mapping], cursor_type: str, expected_record_ids: List[int] ): def date_time_based_cursor_factory() -> DatetimeBasedCursor: return DatetimeBasedCursor( @@ -317,17 +289,16 @@ def date_time_based_cursor_factory() -> DatetimeBasedCursor: parent_key="id", partition_field="id", stream=DeclarativeStream( - type="DeclarativeStream", - retriever=CustomRetriever(type="CustomRetriever", class_name="a_class_name") + type="DeclarativeStream", retriever=CustomRetriever(type="CustomRetriever", class_name="a_class_name") ), ) ], ) - if cursor_type == 'datetime': + if cursor_type == "datetime": # Use only DatetimeBasedCursor pass # No additional cursor needed - elif cursor_type == 'global_substream': + elif cursor_type == "global_substream": # Create GlobalSubstreamCursor instance substream_cursor = GlobalSubstreamCursor( stream_cursor=date_time_based_cursor, @@ -335,7 +306,7 @@ def date_time_based_cursor_factory() -> DatetimeBasedCursor: ) if stream_state: substream_cursor.set_initial_state(stream_state) - elif cursor_type == 'per_partition_with_global': + elif cursor_type == "per_partition_with_global": # Create PerPartitionWithGlobalCursor instance substream_cursor = PerPartitionWithGlobalCursor( cursor_factory=CursorFactory(date_time_based_cursor_factory), diff --git a/unit_tests/sources/declarative/extractors/test_response_to_file_extractor.py b/unit_tests/sources/declarative/extractors/test_response_to_file_extractor.py index 8771a702..98251df6 100644 --- a/unit_tests/sources/declarative/extractors/test_response_to_file_extractor.py +++ b/unit_tests/sources/declarative/extractors/test_response_to_file_extractor.py @@ -66,7 +66,7 @@ def large_event_response_fixture(): csv_writer = csv.writer(csvfile) csv_writer.writerow(["username", "email"]) # headers for _ in range(lines_in_response): - csv_writer.writerow(["a_username","email1@example.com"]) + csv_writer.writerow(["a_username", "email1@example.com"]) yield (lines_in_response, file_path) os.remove(file_path) diff --git a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py index f47a890b..e14c730d 100644 --- a/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py +++ b/unit_tests/sources/declarative/parsers/test_model_to_component_factory.py @@ -2226,7 +2226,6 @@ def get_json_schema(self) -> Mapping[str, Any]: def test_create_custom_schema_loader(): - definition = { "type": "CustomSchemaLoader", "class_name": "unit_tests.sources.declarative.parsers.test_model_to_component_factory.MyCustomSchemaLoader", @@ -2561,14 +2560,13 @@ def test_use_default_request_options_provider(): "stream_state,expected_start", [ pytest.param({}, "2024-08-01T00:00:00.000000Z", id="test_create_concurrent_cursor_without_state"), - pytest.param({"updated_at": "2024-10-01T00:00:00.000000Z"}, "2024-10-01T00:00:00.000000Z", id="test_create_concurrent_cursor_with_state"), - ] + pytest.param( + {"updated_at": "2024-10-01T00:00:00.000000Z"}, "2024-10-01T00:00:00.000000Z", id="test_create_concurrent_cursor_with_state" + ), + ], ) def test_create_concurrent_cursor_from_datetime_based_cursor_all_fields(stream_state, expected_start): - config = { - "start_time": "2024-08-01T00:00:00.000000Z", - "end_time": "2024-10-15T00:00:00.000000Z" - } + config = {"start_time": "2024-08-01T00:00:00.000000Z", "end_time": "2024-10-15T00:00:00.000000Z"} expected_cursor_field = "updated_at" expected_start_boundary = "custom_start" @@ -2621,7 +2619,7 @@ def test_create_concurrent_cursor_from_datetime_based_cursor_all_fields(stream_s "partition_field_end": "custom_end", "step": "P10D", "cursor_granularity": "PT0.000001S", - "lookback_window": "P3D" + "lookback_window": "P3D", } concurrent_cursor, stream_state_converter = connector_builder_factory.create_concurrent_cursor_from_datetime_based_cursor( @@ -2657,27 +2655,34 @@ def test_create_concurrent_cursor_from_datetime_based_cursor_all_fields(stream_s @pytest.mark.parametrize( "cursor_fields_to_replace,assertion_field,expected_value,expected_error", [ - pytest.param({"partition_field_start": None}, "slice_boundary_fields", ('start_time', 'custom_end'), None, id="test_no_partition_field_start"), - pytest.param({"partition_field_end": None}, "slice_boundary_fields", ('custom_start', 'end_time'), None, id="test_no_partition_field_end"), + pytest.param( + {"partition_field_start": None}, "slice_boundary_fields", ("start_time", "custom_end"), None, id="test_no_partition_field_start" + ), + pytest.param( + {"partition_field_end": None}, "slice_boundary_fields", ("custom_start", "end_time"), None, id="test_no_partition_field_end" + ), pytest.param({"lookback_window": None}, "_lookback_window", None, None, id="test_no_lookback_window"), pytest.param({"lookback_window": "{{ config.does_not_exist }}"}, "_lookback_window", None, None, id="test_no_lookback_window"), pytest.param({"step": None}, None, None, ValueError, id="test_no_step_raises_exception"), pytest.param({"cursor_granularity": None}, None, None, ValueError, id="test_no_cursor_granularity_exception"), - pytest.param({ - "end_time": None, - "cursor_granularity": None, - "step": None, - }, "_slice_range", datetime.timedelta.max, None, id="test_uses_a_single_time_interval_when_no_specified_step_and_granularity"), - ] + pytest.param( + { + "end_time": None, + "cursor_granularity": None, + "step": None, + }, + "_slice_range", + datetime.timedelta.max, + None, + id="test_uses_a_single_time_interval_when_no_specified_step_and_granularity", + ), + ], ) @freezegun.freeze_time("2024-10-01T00:00:00") def test_create_concurrent_cursor_from_datetime_based_cursor(cursor_fields_to_replace, assertion_field, expected_value, expected_error): connector_state_manager = ConnectorStateManager() - config = { - "start_time": "2024-08-01T00:00:00.000000Z", - "end_time": "2024-09-01T00:00:00.000000Z" - } + config = {"start_time": "2024-08-01T00:00:00.000000Z", "end_time": "2024-09-01T00:00:00.000000Z"} stream_name = "test" @@ -2738,10 +2743,7 @@ def test_create_concurrent_cursor_uses_min_max_datetime_format_if_defined(): connector_state_manager = ConnectorStateManager() - config = { - "start_time": "2024-08-01T00:00:00Z", - "end_time": "2024-09-01T00:00:00Z" - } + config = {"start_time": "2024-08-01T00:00:00Z", "end_time": "2024-09-01T00:00:00Z"} connector_builder_factory = ModelToComponentFactory(emit_connector_builder_messages=True) @@ -2751,21 +2753,13 @@ def test_create_concurrent_cursor_uses_min_max_datetime_format_if_defined(): "type": "DatetimeBasedCursor", "cursor_field": "updated_at", "datetime_format": "%Y-%m-%dT%H:%MZ", - "start_datetime": { - "type": "MinMaxDatetime", - "datetime": "{{ config.start_time }}", - "datetime_format": "%Y-%m-%dT%H:%M:%SZ" - }, - "end_datetime": { - "type": "MinMaxDatetime", - "datetime": "{{ config.end_time }}", - "datetime_format": "%Y-%m-%dT%H:%M:%SZ" - }, + "start_datetime": {"type": "MinMaxDatetime", "datetime": "{{ config.start_time }}", "datetime_format": "%Y-%m-%dT%H:%M:%SZ"}, + "end_datetime": {"type": "MinMaxDatetime", "datetime": "{{ config.end_time }}", "datetime_format": "%Y-%m-%dT%H:%M:%SZ"}, "partition_field_start": "custom_start", "partition_field_end": "custom_end", "step": "P10D", "cursor_granularity": "PT0.000001S", - "lookback_window": "P3D" + "lookback_window": "P3D", } concurrent_cursor, stream_state_converter = connector_builder_factory.create_concurrent_cursor_from_datetime_based_cursor( diff --git a/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py b/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py index e41f5485..76a8f082 100644 --- a/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py +++ b/unit_tests/sources/declarative/partition_routers/test_parent_state_stream.py @@ -646,11 +646,7 @@ def test_incremental_parent_state(test_name, manifest, mock_requests, expected_r type=AirbyteStateType.STREAM, stream=AirbyteStreamState( stream_descriptor=StreamDescriptor(name="post_comment_votes", namespace=None), - stream_state=AirbyteStateBlob( - { - "created_at": "2024-01-02T00:00:00Z" - } - ), + stream_state=AirbyteStateBlob({"created_at": "2024-01-02T00:00:00Z"}), ), ) ], @@ -682,8 +678,8 @@ def test_incremental_parent_state(test_name, manifest, mock_requests, expected_r "cursor": {"created_at": "2024-01-13T00:00:00Z"}, }, { - 'partition': {'id': 12, 'parent_slice': {'id': 1, 'parent_slice': {}}}, - 'cursor': {'created_at': '2024-01-02T00:00:00Z'}, + "partition": {"id": 12, "parent_slice": {"id": 1, "parent_slice": {}}}, + "cursor": {"created_at": "2024-01-02T00:00:00Z"}, }, { "partition": {"id": 20, "parent_slice": {"id": 2, "parent_slice": {}}}, diff --git a/unit_tests/sources/declarative/requesters/error_handlers/test_default_http_response_filter.py b/unit_tests/sources/declarative/requesters/error_handlers/test_default_http_response_filter.py index b3e4c517..ada35a51 100644 --- a/unit_tests/sources/declarative/requesters/error_handlers/test_default_http_response_filter.py +++ b/unit_tests/sources/declarative/requesters/error_handlers/test_default_http_response_filter.py @@ -21,7 +21,6 @@ ], ) def test_matches_mapped_http_status_code(http_code, expected_error_resolution): - response = MagicMock(spec=Response) response.status_code = http_code @@ -35,7 +34,6 @@ def test_matches_mapped_http_status_code(http_code, expected_error_resolution): def test_matches_mapped_exception(): - exc = MagicMock(spec=RequestException) response_filter = DefaultHttpResponseFilter( @@ -48,7 +46,6 @@ def test_matches_mapped_exception(): def test_unmapped_http_status_code_returns_default_error_resolution(): - response = MagicMock(spec=Response) response.status_code = 508 diff --git a/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py b/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py index 41d973a9..ff341fd8 100644 --- a/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py +++ b/unit_tests/sources/declarative/requesters/paginators/test_default_paginator.py @@ -159,7 +159,7 @@ def test_default_paginator_with_cursor( expected_next_page_token, limit, decoder, - response_body + response_body, ): page_size_request_option = RequestOption( inject_into=RequestOptionType.request_parameter, field_name="{{parameters['page_limit']}}", parameters={"page_limit": "limit"} @@ -352,11 +352,13 @@ def test_paginator_with_page_option_no_page_size(): pagination_strategy = OffsetIncrement(config={}, page_size=None, parameters={}) with pytest.raises(ValueError): - DefaultPaginator( - page_size_option=MagicMock(), - page_token_option=RequestOption("limit", RequestOptionType.request_parameter, parameters={}), - pagination_strategy=pagination_strategy, - config=MagicMock(), - url_base=MagicMock(), - parameters={}, - ), + ( + DefaultPaginator( + page_size_option=MagicMock(), + page_token_option=RequestOption("limit", RequestOptionType.request_parameter, parameters={}), + pagination_strategy=pagination_strategy, + config=MagicMock(), + url_base=MagicMock(), + parameters={}, + ), + ) diff --git a/unit_tests/sources/declarative/requesters/request_options/test_datetime_based_request_options_provider.py b/unit_tests/sources/declarative/requesters/request_options/test_datetime_based_request_options_provider.py index 816f83f9..9fca23be 100644 --- a/unit_tests/sources/declarative/requesters/request_options/test_datetime_based_request_options_provider.py +++ b/unit_tests/sources/declarative/requesters/request_options/test_datetime_based_request_options_provider.py @@ -86,12 +86,7 @@ ], ) def test_datetime_based_request_options_provider( - start_time_option, - end_time_option, - partition_field_start, - partition_field_end, - stream_slice, - expected_request_options + start_time_option, end_time_option, partition_field_start, partition_field_end, stream_slice, expected_request_options ): config = {} request_options_provider = DatetimeBasedRequestOptionsProvider( @@ -100,7 +95,7 @@ def test_datetime_based_request_options_provider( partition_field_start=partition_field_start, partition_field_end=partition_field_end, config=config, - parameters={} + parameters={}, ) request_option_type = start_time_option.inject_into if isinstance(start_time_option, RequestOption) else None diff --git a/unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py b/unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py index b8239d43..a233d371 100644 --- a/unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py +++ b/unit_tests/sources/declarative/requesters/request_options/test_interpolated_request_options_provider.py @@ -108,11 +108,23 @@ def test_error_on_create_for_both_request_json_and_data(): pytest.param("request_parameter", {"start": "{{ slice_interval.get('start_date') }}"}, False, id="test_request_parameter_no_state"), pytest.param("request_header", {"start": "{{ stream_state.get('start_date') }}"}, True, id="test_request_header_has_state"), pytest.param("request_header", {"start": "{{ slice_interval.get('start_date') }}"}, False, id="test_request_header_no_state"), - pytest.param("request_body_data", "[{'query': {'type': 'timestamp', 'value': stream_state.get('start_date')}}]", True, id="test_request_body_data_has_state"), - pytest.param("request_body_data", "[{'query': {'type': 'timestamp', 'value': stream_interval.get('start_date')}}]", False, id="test_request_body_data_no_state"), + pytest.param( + "request_body_data", + "[{'query': {'type': 'timestamp', 'value': stream_state.get('start_date')}}]", + True, + id="test_request_body_data_has_state", + ), + pytest.param( + "request_body_data", + "[{'query': {'type': 'timestamp', 'value': stream_interval.get('start_date')}}]", + False, + id="test_request_body_data_no_state", + ), pytest.param("request_body_json", {"start": "{{ stream_state.get('start_date') }}"}, True, id="test_request_body_json_has_state"), - pytest.param("request_body_json", {"start": "{{ slice_interval.get('start_date') }}"}, False, id="test_request_request_body_json_no_state"), - ] + pytest.param( + "request_body_json", {"start": "{{ slice_interval.get('start_date') }}"}, False, id="test_request_request_body_json_no_state" + ), + ], ) def test_request_options_contain_stream_state(request_option_type, request_input, contains_state): request_options_provider: InterpolatedRequestOptionsProvider diff --git a/unit_tests/sources/declarative/requesters/test_http_job_repository.py b/unit_tests/sources/declarative/requesters/test_http_job_repository.py index 90768d8b..98ccd600 100644 --- a/unit_tests/sources/declarative/requesters/test_http_job_repository.py +++ b/unit_tests/sources/declarative/requesters/test_http_job_repository.py @@ -145,7 +145,7 @@ def test_given_different_statuses_when_update_jobs_status_then_update_status_pro HttpResponse(body=json.dumps({"id": _A_JOB_ID, "status": "pending"})), HttpResponse(body=json.dumps({"id": _A_JOB_ID, "status": "failure"})), HttpResponse(body=json.dumps({"id": _A_JOB_ID, "status": "ready"})), - ] + ], ) job = self._repository.start(_ANY_SLICE) @@ -195,11 +195,7 @@ def test_given_pagination_when_fetch_records_then_yield_records_from_all_pages(s self._mock_create_response(_A_JOB_ID) self._http_mocker.get( HttpRequest(url=f"{_EXPORT_URL}/{_A_JOB_ID}"), - HttpResponse(body=json.dumps({ - "id": _A_JOB_ID, - "status": "ready", - "urls": [_JOB_FIRST_URL] - })) + HttpResponse(body=json.dumps({"id": _A_JOB_ID, "status": "ready", "urls": [_JOB_FIRST_URL]})), ) self._http_mocker.get( HttpRequest(url=_JOB_FIRST_URL), @@ -220,14 +216,18 @@ def test_given_multiple_urls_when_fetch_records_then_fetch_from_multiple_urls(se self._mock_create_response(_A_JOB_ID) self._http_mocker.get( HttpRequest(url=f"{_EXPORT_URL}/{_A_JOB_ID}"), - HttpResponse(body=json.dumps({ - "id": _A_JOB_ID, - "status": "ready", - "urls": [ - _JOB_FIRST_URL, - _JOB_SECOND_URL, - ] - })) + HttpResponse( + body=json.dumps( + { + "id": _A_JOB_ID, + "status": "ready", + "urls": [ + _JOB_FIRST_URL, + _JOB_SECOND_URL, + ], + } + ) + ), ) self._http_mocker.get( HttpRequest(url=_JOB_FIRST_URL), diff --git a/unit_tests/sources/declarative/test_concurrent_declarative_source.py b/unit_tests/sources/declarative/test_concurrent_declarative_source.py index da3e9af3..2081d6f7 100644 --- a/unit_tests/sources/declarative/test_concurrent_declarative_source.py +++ b/unit_tests/sources/declarative/test_concurrent_declarative_source.py @@ -38,9 +38,7 @@ from airbyte_cdk.utils import AirbyteTracedException from deprecated.classic import deprecated -_CONFIG = { - "start_date": "2024-07-01T00:00:00.000Z" -} +_CONFIG = {"start_date": "2024-07-01T00:00:00.000Z"} _CATALOG = ConfiguredAirbyteCatalog( streams=[ @@ -63,284 +61,229 @@ stream=AirbyteStream(name="party_members_skills", json_schema={}, supported_sync_modes=[SyncMode.full_refresh]), sync_mode=SyncMode.full_refresh, destination_sync_mode=DestinationSyncMode.append, - ) + ), ] ) -_LOCATIONS_RESPONSE = HttpResponse(json.dumps([ - {"id": "444", "name": "Yongen-jaya", "updated_at": "2024-08-10"}, - {"id": "scramble", "name": "Shibuya", "updated_at": "2024-08-10"}, - {"id": "aoyama", "name": "Aoyama-itchome", "updated_at": "2024-08-10"}, - {"id": "shin123", "name": "Shinjuku", "updated_at": "2024-08-10"}, -])) -_PALACES_RESPONSE = HttpResponse(json.dumps([ - {"id": "0", "world": "castle", "owner": "kamoshida"}, - {"id": "1", "world": "museum", "owner": "madarame"}, - {"id": "2", "world": "bank", "owner": "kaneshiro"}, - {"id": "3", "world": "pyramid", "owner": "futaba"}, - {"id": "4", "world": "spaceport", "owner": "okumura"}, - {"id": "5", "world": "casino", "owner": "nijima"}, - {"id": "6", "world": "cruiser", "owner": "shido"}, -])) -_PARTY_MEMBERS_SKILLS_RESPONSE = HttpResponse(json.dumps([ - {"id": "0", "name": "hassou tobi"}, - {"id": "1", "name": "mafreidyne"}, - {"id": "2", "name": "myriad truths"}, -])) +_LOCATIONS_RESPONSE = HttpResponse( + json.dumps( + [ + {"id": "444", "name": "Yongen-jaya", "updated_at": "2024-08-10"}, + {"id": "scramble", "name": "Shibuya", "updated_at": "2024-08-10"}, + {"id": "aoyama", "name": "Aoyama-itchome", "updated_at": "2024-08-10"}, + {"id": "shin123", "name": "Shinjuku", "updated_at": "2024-08-10"}, + ] + ) +) +_PALACES_RESPONSE = HttpResponse( + json.dumps( + [ + {"id": "0", "world": "castle", "owner": "kamoshida"}, + {"id": "1", "world": "museum", "owner": "madarame"}, + {"id": "2", "world": "bank", "owner": "kaneshiro"}, + {"id": "3", "world": "pyramid", "owner": "futaba"}, + {"id": "4", "world": "spaceport", "owner": "okumura"}, + {"id": "5", "world": "casino", "owner": "nijima"}, + {"id": "6", "world": "cruiser", "owner": "shido"}, + ] + ) +) +_PARTY_MEMBERS_SKILLS_RESPONSE = HttpResponse( + json.dumps( + [ + {"id": "0", "name": "hassou tobi"}, + {"id": "1", "name": "mafreidyne"}, + {"id": "2", "name": "myriad truths"}, + ] + ) +) _EMPTY_RESPONSE = HttpResponse(json.dumps([])) _NOW = "2024-09-10T00:00:00" _NO_STATE_PARTY_MEMBERS_SLICES_AND_RESPONSES = [ - ({"start": "2024-07-01", "end": "2024-07-15"}, HttpResponse(json.dumps([{"id": "amamiya", "first_name": "ren", "last_name": "amamiya", "updated_at": "2024-07-10"}]))), + ( + {"start": "2024-07-01", "end": "2024-07-15"}, + HttpResponse(json.dumps([{"id": "amamiya", "first_name": "ren", "last_name": "amamiya", "updated_at": "2024-07-10"}])), + ), ({"start": "2024-07-16", "end": "2024-07-30"}, _EMPTY_RESPONSE), - ({"start": "2024-07-31", "end": "2024-08-14"}, HttpResponse(json.dumps([{"id": "nijima", "first_name": "makoto", "last_name": "nijima", "updated_at": "2024-08-10"}, ]))), + ( + {"start": "2024-07-31", "end": "2024-08-14"}, + HttpResponse( + json.dumps( + [ + {"id": "nijima", "first_name": "makoto", "last_name": "nijima", "updated_at": "2024-08-10"}, + ] + ) + ), + ), ({"start": "2024-08-15", "end": "2024-08-29"}, _EMPTY_RESPONSE), - ({"start": "2024-08-30", "end": "2024-09-10"}, HttpResponse(json.dumps([{"id": "yoshizawa", "first_name": "sumire", "last_name": "yoshizawa", "updated_at": "2024-09-10"}]))), + ( + {"start": "2024-08-30", "end": "2024-09-10"}, + HttpResponse(json.dumps([{"id": "yoshizawa", "first_name": "sumire", "last_name": "yoshizawa", "updated_at": "2024-09-10"}])), + ), ] _MANIFEST = { - "version": "5.0.0", - "definitions": { - "selector": { - "type": "RecordSelector", - "extractor": { - "type": "DpathExtractor", - "field_path": [] - } - }, - "requester": { - "type": "HttpRequester", - "url_base": "https://persona.metaverse.com", - "http_method": "GET", - "authenticator": { - "type": "BasicHttpAuthenticator", - "username": "{{ config['api_key'] }}", - "password": "{{ config['secret_key'] }}" - }, - "error_handler": { - "type": "DefaultErrorHandler", - "response_filters": [ - { - "http_codes": [403], - "action": "FAIL", - "failure_type": "config_error", - "error_message": "Access denied due to lack of permission or invalid API/Secret key or wrong data region." - }, - { - "http_codes": [404], - "action": "IGNORE", - "error_message": "No data available for the time range requested." - } - ] - }, - }, - "retriever": { - "type": "SimpleRetriever", - "record_selector": { - "$ref": "#/definitions/selector" - }, - "paginator": { - "type": "NoPagination" - }, - "requester": { - "$ref": "#/definitions/requester" - } - }, - "incremental_cursor": { - "type": "DatetimeBasedCursor", - "start_datetime": { - "datetime": "{{ format_datetime(config['start_date'], '%Y-%m-%d') }}" - }, - "end_datetime": { - "datetime": "{{ now_utc().strftime('%Y-%m-%d') }}" - }, - "datetime_format": "%Y-%m-%d", - "cursor_datetime_formats": ["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"], - "cursor_granularity": "P1D", - "step": "P15D", - "cursor_field": "updated_at", - "lookback_window": "P5D", - "start_time_option": { - "type": "RequestOption", - "field_name": "start", - "inject_into": "request_parameter" - }, - "end_time_option": { - "type": "RequestOption", - "field_name": "end", - "inject_into": "request_parameter" - } - }, - "base_stream": { - "retriever": { - "$ref": "#/definitions/retriever" - } - }, - "base_incremental_stream": { - "retriever": { - "$ref": "#/definitions/retriever", + "version": "5.0.0", + "definitions": { + "selector": {"type": "RecordSelector", "extractor": {"type": "DpathExtractor", "field_path": []}}, "requester": { - "$ref": "#/definitions/requester" - } - }, - "incremental_sync": { - "$ref": "#/definitions/incremental_cursor" - } - }, - "party_members_stream": { - "$ref": "#/definitions/base_incremental_stream", - "retriever": { - "$ref": "#/definitions/base_incremental_stream/retriever", - "record_selector": { - "$ref": "#/definitions/selector" - } - }, - "$parameters": { - "name": "party_members", - "primary_key": "id", - "path": "/party_members" - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "$schema": "https://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "id": { - "description": "The identifier", - "type": ["null", "string"], + "type": "HttpRequester", + "url_base": "https://persona.metaverse.com", + "http_method": "GET", + "authenticator": { + "type": "BasicHttpAuthenticator", + "username": "{{ config['api_key'] }}", + "password": "{{ config['secret_key'] }}", }, - "name": { - "description": "The name of the party member", - "type": ["null", "string"] - } - } - } - } - }, - "palaces_stream": { - "$ref": "#/definitions/base_stream", - "$parameters": { - "name": "palaces", - "primary_key": "id", - "path": "/palaces" - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "$schema": "https://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "id": { - "description": "The identifier", - "type": ["null", "string"], + "error_handler": { + "type": "DefaultErrorHandler", + "response_filters": [ + { + "http_codes": [403], + "action": "FAIL", + "failure_type": "config_error", + "error_message": "Access denied due to lack of permission or invalid API/Secret key or wrong data region.", + }, + {"http_codes": [404], "action": "IGNORE", "error_message": "No data available for the time range requested."}, + ], }, - "name": { - "description": "The name of the metaverse palace", - "type": ["null", "string"] - } - } - } - } - }, - "locations_stream": { - "$ref": "#/definitions/base_incremental_stream", - "retriever": { - "$ref": "#/definitions/base_incremental_stream/retriever", - "requester": { - "$ref": "#/definitions/base_incremental_stream/retriever/requester", - "request_parameters": { - "m": "active", - "i": "1", - "g": "country" - } }, - "record_selector": { - "$ref": "#/definitions/selector" - } - }, - "incremental_sync": { - "$ref": "#/definitions/incremental_cursor", - "step": "P1M", - "cursor_field": "updated_at" - }, - "$parameters": { - "name": "locations", - "primary_key": "id", - "path": "/locations" - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "$schema": "https://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "id": { - "description": "The identifier", - "type": ["null", "string"], + "retriever": { + "type": "SimpleRetriever", + "record_selector": {"$ref": "#/definitions/selector"}, + "paginator": {"type": "NoPagination"}, + "requester": {"$ref": "#/definitions/requester"}, + }, + "incremental_cursor": { + "type": "DatetimeBasedCursor", + "start_datetime": {"datetime": "{{ format_datetime(config['start_date'], '%Y-%m-%d') }}"}, + "end_datetime": {"datetime": "{{ now_utc().strftime('%Y-%m-%d') }}"}, + "datetime_format": "%Y-%m-%d", + "cursor_datetime_formats": ["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"], + "cursor_granularity": "P1D", + "step": "P15D", + "cursor_field": "updated_at", + "lookback_window": "P5D", + "start_time_option": {"type": "RequestOption", "field_name": "start", "inject_into": "request_parameter"}, + "end_time_option": {"type": "RequestOption", "field_name": "end", "inject_into": "request_parameter"}, + }, + "base_stream": {"retriever": {"$ref": "#/definitions/retriever"}}, + "base_incremental_stream": { + "retriever": {"$ref": "#/definitions/retriever", "requester": {"$ref": "#/definitions/requester"}}, + "incremental_sync": {"$ref": "#/definitions/incremental_cursor"}, + }, + "party_members_stream": { + "$ref": "#/definitions/base_incremental_stream", + "retriever": {"$ref": "#/definitions/base_incremental_stream/retriever", "record_selector": {"$ref": "#/definitions/selector"}}, + "$parameters": {"name": "party_members", "primary_key": "id", "path": "/party_members"}, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "$schema": "https://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "description": "The identifier", + "type": ["null", "string"], + }, + "name": {"description": "The name of the party member", "type": ["null", "string"]}, + }, + }, }, - "name": { - "description": "The name of the neighborhood location", - "type": ["null", "string"] - } - } - } - } - }, - "party_members_skills_stream": { - "$ref": "#/definitions/base_stream", - "retriever": { - "$ref": "#/definitions/base_incremental_stream/retriever", - "record_selector": { - "$ref": "#/definitions/selector" }, - "partition_router": { - "type": "SubstreamPartitionRouter", - "parent_stream_configs": [ - { - "type": "ParentStreamConfig", - "stream": "#/definitions/party_members_stream", - "parent_key": "id", - "partition_field": "party_member_id", - } - ] - } - }, - "$parameters": { - "name": "party_members_skills", - "primary_key": "id", - "path": "/party_members/{{stream_slice.party_member_id}}/skills" - }, - "schema_loader": { - "type": "InlineSchemaLoader", - "schema": { - "$schema": "https://json-schema.org/draft-07/schema#", - "type": "object", - "properties": { - "id": { - "description": "The identifier", - "type": ["null", "string"], + "palaces_stream": { + "$ref": "#/definitions/base_stream", + "$parameters": {"name": "palaces", "primary_key": "id", "path": "/palaces"}, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "$schema": "https://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "description": "The identifier", + "type": ["null", "string"], + }, + "name": {"description": "The name of the metaverse palace", "type": ["null", "string"]}, + }, + }, }, - "name": { - "description": "The name of the party member", - "type": ["null", "string"] - } - } - } - } + }, + "locations_stream": { + "$ref": "#/definitions/base_incremental_stream", + "retriever": { + "$ref": "#/definitions/base_incremental_stream/retriever", + "requester": { + "$ref": "#/definitions/base_incremental_stream/retriever/requester", + "request_parameters": {"m": "active", "i": "1", "g": "country"}, + }, + "record_selector": {"$ref": "#/definitions/selector"}, + }, + "incremental_sync": {"$ref": "#/definitions/incremental_cursor", "step": "P1M", "cursor_field": "updated_at"}, + "$parameters": {"name": "locations", "primary_key": "id", "path": "/locations"}, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "$schema": "https://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "description": "The identifier", + "type": ["null", "string"], + }, + "name": {"description": "The name of the neighborhood location", "type": ["null", "string"]}, + }, + }, + }, + }, + "party_members_skills_stream": { + "$ref": "#/definitions/base_stream", + "retriever": { + "$ref": "#/definitions/base_incremental_stream/retriever", + "record_selector": {"$ref": "#/definitions/selector"}, + "partition_router": { + "type": "SubstreamPartitionRouter", + "parent_stream_configs": [ + { + "type": "ParentStreamConfig", + "stream": "#/definitions/party_members_stream", + "parent_key": "id", + "partition_field": "party_member_id", + } + ], + }, + }, + "$parameters": { + "name": "party_members_skills", + "primary_key": "id", + "path": "/party_members/{{stream_slice.party_member_id}}/skills", + }, + "schema_loader": { + "type": "InlineSchemaLoader", + "schema": { + "$schema": "https://json-schema.org/draft-07/schema#", + "type": "object", + "properties": { + "id": { + "description": "The identifier", + "type": ["null", "string"], + }, + "name": {"description": "The name of the party member", "type": ["null", "string"]}, + }, + }, + }, + }, + }, + "streams": [ + "#/definitions/party_members_stream", + "#/definitions/palaces_stream", + "#/definitions/locations_stream", + "#/definitions/party_members_skills_stream", + ], + "check": {"stream_names": ["party_members", "palaces", "locations"]}, + "concurrency_level": { + "type": "ConcurrencyLevel", + "default_concurrency": "{{ config['num_workers'] or 10 }}", + "max_concurrency": 25, }, - }, - "streams": [ - "#/definitions/party_members_stream", - "#/definitions/palaces_stream", - "#/definitions/locations_stream", - "#/definitions/party_members_skills_stream" - ], - "check": { - "stream_names": ["party_members", "palaces", "locations"] - }, - "concurrency_level": { - "type": "ConcurrencyLevel", - "default_concurrency": "{{ config['num_workers'] or 10 }}", - "max_concurrency": 25, - } } @@ -384,7 +327,8 @@ def read_records( if slice_key == ("2024-08-05", "2024-09-04"): raise AirbyteTracedException( message=f"Received an unexpected error during interval with start: {slice_key[0]} and end: {slice_key[1]}.", - failure_type=FailureType.config_error) + failure_type=FailureType.config_error, + ) if slice_key in self._slice_to_records_mapping: yield from self._slice_to_records_mapping.get(slice_key) @@ -426,7 +370,7 @@ def test_group_streams(): stream=AirbyteStream(name="party_members_skills", json_schema={}, supported_sync_modes=[SyncMode.full_refresh]), sync_mode=SyncMode.full_refresh, destination_sync_mode=DestinationSyncMode.append, - ) + ), ] ) @@ -464,7 +408,7 @@ def test_create_concurrent_cursor(): "slices": [ {"start": "2024-07-01T00:00:00", "end": "2024-07-31T00:00:00"}, ], - "state_type": "date-range" + "state_type": "date-range", } state = [ @@ -472,7 +416,7 @@ def test_create_concurrent_cursor(): type=AirbyteStateType.STREAM, stream=AirbyteStreamState( stream_descriptor=StreamDescriptor(name="locations", namespace=None), - stream_state=AirbyteStateBlob(**incoming_locations_state) + stream_state=AirbyteStateBlob(**incoming_locations_state), ), ), ] @@ -513,7 +457,7 @@ def test_create_concurrent_cursor(): "end": datetime(2024, 7, 31, 0, 0, 0, 0, tzinfo=timezone.utc), } ], - "state_type": "date-range" + "state_type": "date-range", } @@ -623,10 +567,12 @@ def test_read_with_concurrent_and_synchronous_streams(): party_members_states = get_states_for_stream(stream_name="party_members", messages=messages) assert len(party_members_states) == 6 - assert party_members_states[5].stream.stream_state.__dict__ == AirbyteStateBlob( - state_type="date-range", - slices=[{"start": "2024-07-01", "end": "2024-09-10", "most_recent_cursor_value": "2024-09-10"}] - ).__dict__ + assert ( + party_members_states[5].stream.stream_state.__dict__ + == AirbyteStateBlob( + state_type="date-range", slices=[{"start": "2024-07-01", "end": "2024-09-10", "most_recent_cursor_value": "2024-09-10"}] + ).__dict__ + ) # Expects 12 records, 3 slices, 4 records each slice locations_records = get_records_for_stream(stream_name="locations", messages=messages) @@ -636,10 +582,12 @@ def test_read_with_concurrent_and_synchronous_streams(): # Because we cannot guarantee the order partitions finish, we only validate that the final state has the latest checkpoint value locations_states = get_states_for_stream(stream_name="locations", messages=messages) assert len(locations_states) == 4 - assert locations_states[3].stream.stream_state.__dict__ == AirbyteStateBlob( - state_type="date-range", - slices=[{"start": "2024-07-01", "end": "2024-09-10", "most_recent_cursor_value": "2024-08-10"}] - ).__dict__ + assert ( + locations_states[3].stream.stream_state.__dict__ + == AirbyteStateBlob( + state_type="date-range", slices=[{"start": "2024-07-01", "end": "2024-09-10", "most_recent_cursor_value": "2024-08-10"}] + ).__dict__ + ) # Expects 7 records, 1 empty slice, 7 records in slice palaces_records = get_records_for_stream("palaces", messages) @@ -670,7 +618,7 @@ def test_read_with_concurrent_and_synchronous_streams(): "states": [ {"partition": {"parent_slice": {}, "party_member_id": "amamiya"}, "cursor": {"__ab_full_refresh_sync_complete": True}}, {"partition": {"parent_slice": {}, "party_member_id": "nijima"}, "cursor": {"__ab_full_refresh_sync_complete": True}}, - {"partition": {"parent_slice": {}, "party_member_id": "yoshizawa"}, "cursor": {"__ab_full_refresh_sync_complete": True}} + {"partition": {"parent_slice": {}, "party_member_id": "yoshizawa"}, "cursor": {"__ab_full_refresh_sync_complete": True}}, ] } @@ -702,7 +650,7 @@ def test_read_with_concurrent_and_synchronous_streams_with_concurrent_state(): {"start": "2024-07-16", "end": "2024-07-30"}, {"start": "2024-07-31", "end": "2024-08-14"}, {"start": "2024-08-30", "end": "2024-09-09"}, - ] + ], ), ), ), @@ -711,9 +659,7 @@ def test_read_with_concurrent_and_synchronous_streams_with_concurrent_state(): party_members_slices_and_responses = _NO_STATE_PARTY_MEMBERS_SLICES_AND_RESPONSES + [ ( {"start": "2024-09-04", "end": "2024-09-10"}, # considering lookback window - HttpResponse( - json.dumps([{"id": "yoshizawa", "first_name": "sumire", "last_name": "yoshizawa", "updated_at": "2024-09-10"}]) - ), + HttpResponse(json.dumps([{"id": "yoshizawa", "first_name": "sumire", "last_name": "yoshizawa", "updated_at": "2024-09-10"}])), ) ] location_slices = [ @@ -738,10 +684,12 @@ def test_read_with_concurrent_and_synchronous_streams_with_concurrent_state(): locations_states = get_states_for_stream(stream_name="locations", messages=messages) assert len(locations_states) == 3 - assert locations_states[2].stream.stream_state.__dict__ == AirbyteStateBlob( - state_type="date-range", - slices=[{"start": "2024-07-01", "end": "2024-09-10", "most_recent_cursor_value": "2024-08-10"}] - ).__dict__ + assert ( + locations_states[2].stream.stream_state.__dict__ + == AirbyteStateBlob( + state_type="date-range", slices=[{"start": "2024-07-01", "end": "2024-09-10", "most_recent_cursor_value": "2024-08-10"}] + ).__dict__ + ) # slices to sync are: # * {"start": "2024-07-01", "end": "2024-07-15"}: one record in _NO_STATE_PARTY_MEMBERS_SLICES_AND_RESPONSES @@ -751,10 +699,12 @@ def test_read_with_concurrent_and_synchronous_streams_with_concurrent_state(): party_members_states = get_states_for_stream(stream_name="party_members", messages=messages) assert len(party_members_states) == 4 - assert party_members_states[3].stream.stream_state.__dict__ == AirbyteStateBlob( - state_type="date-range", - slices=[{"start": "2024-07-01", "end": "2024-09-10", "most_recent_cursor_value": "2024-09-10"}] - ).__dict__ + assert ( + party_members_states[3].stream.stream_state.__dict__ + == AirbyteStateBlob( + state_type="date-range", slices=[{"start": "2024-07-01", "end": "2024-09-10", "most_recent_cursor_value": "2024-09-10"}] + ).__dict__ + ) # Expects 7 records, 1 empty slice, 7 records in slice palaces_records = get_records_for_stream("palaces", messages) @@ -785,15 +735,21 @@ def test_read_with_concurrent_and_synchronous_streams_with_sequential_state(): stream_descriptor=StreamDescriptor(name="party_members", namespace=None), stream_state=AirbyteStateBlob(updated_at="2024-08-21"), ), - ) + ), ] source = ConcurrentDeclarativeSource(source_config=_MANIFEST, config=_CONFIG, catalog=_CATALOG, state=state) disable_emitting_sequential_state_messages(source=source) party_members_slices_and_responses = _NO_STATE_PARTY_MEMBERS_SLICES_AND_RESPONSES + [ - ({"start": "2024-08-16", "end": "2024-08-30"}, HttpResponse(json.dumps([{"id": "nijima", "first_name": "makoto", "last_name": "nijima", "updated_at": "2024-08-10"}]))), # considering lookback window - ({"start": "2024-08-31", "end": "2024-09-10"}, HttpResponse(json.dumps([{"id": "yoshizawa", "first_name": "sumire", "last_name": "yoshizawa", "updated_at": "2024-09-10"}]))), + ( + {"start": "2024-08-16", "end": "2024-08-30"}, + HttpResponse(json.dumps([{"id": "nijima", "first_name": "makoto", "last_name": "nijima", "updated_at": "2024-08-10"}])), + ), # considering lookback window + ( + {"start": "2024-08-31", "end": "2024-09-10"}, + HttpResponse(json.dumps([{"id": "yoshizawa", "first_name": "sumire", "last_name": "yoshizawa", "updated_at": "2024-09-10"}])), + ), ] location_slices = [ {"start": "2024-08-01", "end": "2024-08-31"}, @@ -814,10 +770,12 @@ def test_read_with_concurrent_and_synchronous_streams_with_sequential_state(): locations_states = get_states_for_stream(stream_name="locations", messages=messages) assert len(locations_states) == 3 - assert locations_states[2].stream.stream_state.__dict__ == AirbyteStateBlob( - state_type="date-range", - slices=[{"start": "2024-07-01", "end": "2024-09-10", "most_recent_cursor_value": "2024-08-10"}] - ).__dict__ + assert ( + locations_states[2].stream.stream_state.__dict__ + == AirbyteStateBlob( + state_type="date-range", slices=[{"start": "2024-07-01", "end": "2024-09-10", "most_recent_cursor_value": "2024-08-10"}] + ).__dict__ + ) # From extra slices defined in party_members_slices_and_responses party_members_records = get_records_for_stream("party_members", messages) @@ -825,10 +783,12 @@ def test_read_with_concurrent_and_synchronous_streams_with_sequential_state(): party_members_states = get_states_for_stream(stream_name="party_members", messages=messages) assert len(party_members_states) == 3 - assert party_members_states[2].stream.stream_state.__dict__ == AirbyteStateBlob( - state_type="date-range", - slices=[{"start": "2024-07-01", "end": "2024-09-10", "most_recent_cursor_value": "2024-09-10"}] - ).__dict__ + assert ( + party_members_states[2].stream.stream_state.__dict__ + == AirbyteStateBlob( + state_type="date-range", slices=[{"start": "2024-07-01", "end": "2024-09-10", "most_recent_cursor_value": "2024-09-10"}] + ).__dict__ + ) # Expects 7 records, 1 empty slice, 7 records in slice palaces_records = get_records_for_stream("palaces", messages) @@ -881,7 +841,9 @@ def test_read_concurrent_with_failing_partition_in_the_middle(): for message in source.read(logger=source.logger, config=_CONFIG, catalog=catalog, state=[]): messages.append(message) except AirbyteTracedException: - assert get_states_for_stream(stream_name="locations", messages=messages)[-1].stream.stream_state.__dict__ == expected_stream_state + assert ( + get_states_for_stream(stream_name="locations", messages=messages)[-1].stream.stream_state.__dict__ == expected_stream_state + ) @freezegun.freeze_time(_NOW) @@ -945,10 +907,7 @@ def test_read_concurrent_skip_streams_not_in_catalog(): def test_default_perform_interpolation_on_concurrency_level(): - config = { - "start_date": "2024-07-01T00:00:00.000Z", - "num_workers": 20 - } + config = {"start_date": "2024-07-01T00:00:00.000Z", "num_workers": 20} catalog = ConfiguredAirbyteCatalog( streams=[ ConfiguredAirbyteStream( @@ -982,10 +941,7 @@ def test_default_to_single_threaded_when_no_concurrency_level(): def test_concurrency_level_initial_number_partitions_to_generate_is_always_one_or_more(): - config = { - "start_date": "2024-07-01T00:00:00.000Z", - "num_workers": 1 - } + config = {"start_date": "2024-07-01T00:00:00.000Z", "num_workers": 1} catalog = ConfiguredAirbyteCatalog( streams=[ ConfiguredAirbyteStream( @@ -1001,7 +957,7 @@ def test_concurrency_level_initial_number_partitions_to_generate_is_always_one_o "type": "ConcurrencyLevel", "default_concurrency": "{{ config.get('num_workers', 1) }}", "max_concurrency": 25, - } + } source = ConcurrentDeclarativeSource(source_config=_MANIFEST, config=config, catalog=catalog, state=[]) assert source._concurrent_source._initial_number_partitions_to_generate == 1 @@ -1018,14 +974,11 @@ def test_streams_with_stream_state_interpolation_should_be_synchronous(): # Add a RecordFilter component that uses stream_state interpolation to the party member stream manifest_with_stream_state_interpolation["definitions"]["party_members_stream"]["retriever"]["record_selector"]["record_filter"] = { "type": "RecordFilter", - "condition": "{{ record.updated_at > stream_state['updated_at'] }}" + "condition": "{{ record.updated_at > stream_state['updated_at'] }}", } source = ConcurrentDeclarativeSource( - source_config=manifest_with_stream_state_interpolation, - config=_CONFIG, - catalog=_CATALOG, - state=None + source_config=manifest_with_stream_state_interpolation, config=_CONFIG, catalog=_CATALOG, state=None ) assert len(source._concurrent_streams) == 0 @@ -1036,13 +989,7 @@ def test_given_partition_routing_and_incremental_sync_then_stream_is_not_concurr manifest = { "version": "5.0.0", "definitions": { - "selector": { - "type": "RecordSelector", - "extractor": { - "type": "DpathExtractor", - "field_path": [] - } - }, + "selector": {"type": "RecordSelector", "extractor": {"type": "DpathExtractor", "field_path": []}}, "requester": { "type": "HttpRequester", "url_base": "https://persona.metaverse.com", @@ -1050,7 +997,7 @@ def test_given_partition_routing_and_incremental_sync_then_stream_is_not_concurr "authenticator": { "type": "BasicHttpAuthenticator", "username": "{{ config['api_key'] }}", - "password": "{{ config['secret_key'] }}" + "password": "{{ config['secret_key'] }}", }, "error_handler": { "type": "DefaultErrorHandler", @@ -1059,68 +1006,35 @@ def test_given_partition_routing_and_incremental_sync_then_stream_is_not_concurr "http_codes": [403], "action": "FAIL", "failure_type": "config_error", - "error_message": "Access denied due to lack of permission or invalid API/Secret key or wrong data region." + "error_message": "Access denied due to lack of permission or invalid API/Secret key or wrong data region.", }, - { - "http_codes": [404], - "action": "IGNORE", - "error_message": "No data available for the time range requested." - } - ] + {"http_codes": [404], "action": "IGNORE", "error_message": "No data available for the time range requested."}, + ], }, }, "retriever": { "type": "SimpleRetriever", - "record_selector": { - "$ref": "#/definitions/selector" - }, - "paginator": { - "type": "NoPagination" - }, - "requester": { - "$ref": "#/definitions/requester" - } + "record_selector": {"$ref": "#/definitions/selector"}, + "paginator": {"type": "NoPagination"}, + "requester": {"$ref": "#/definitions/requester"}, }, "incremental_cursor": { "type": "DatetimeBasedCursor", - "start_datetime": { - "datetime": "{{ format_datetime(config['start_date'], '%Y-%m-%d') }}" - }, - "end_datetime": { - "datetime": "{{ now_utc().strftime('%Y-%m-%d') }}" - }, + "start_datetime": {"datetime": "{{ format_datetime(config['start_date'], '%Y-%m-%d') }}"}, + "end_datetime": {"datetime": "{{ now_utc().strftime('%Y-%m-%d') }}"}, "datetime_format": "%Y-%m-%d", "cursor_datetime_formats": ["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"], "cursor_granularity": "P1D", "step": "P15D", "cursor_field": "updated_at", "lookback_window": "P5D", - "start_time_option": { - "type": "RequestOption", - "field_name": "start", - "inject_into": "request_parameter" - }, - "end_time_option": { - "type": "RequestOption", - "field_name": "end", - "inject_into": "request_parameter" - } - }, - "base_stream": { - "retriever": { - "$ref": "#/definitions/retriever" - } + "start_time_option": {"type": "RequestOption", "field_name": "start", "inject_into": "request_parameter"}, + "end_time_option": {"type": "RequestOption", "field_name": "end", "inject_into": "request_parameter"}, }, + "base_stream": {"retriever": {"$ref": "#/definitions/retriever"}}, "base_incremental_stream": { - "retriever": { - "$ref": "#/definitions/retriever", - "requester": { - "$ref": "#/definitions/requester" - } - }, - "incremental_sync": { - "$ref": "#/definitions/incremental_cursor" - } + "retriever": {"$ref": "#/definitions/retriever", "requester": {"$ref": "#/definitions/requester"}}, + "incremental_sync": {"$ref": "#/definitions/incremental_cursor"}, }, "incremental_party_members_skills_stream": { "$ref": "#/definitions/base_incremental_stream", @@ -1130,12 +1044,12 @@ def test_given_partition_routing_and_incremental_sync_then_stream_is_not_concurr "type": "ListPartitionRouter", "cursor_field": "party_member_id", "values": ["party_member1", "party_member2"], - } + }, }, "$parameters": { "name": "incremental_party_members_skills", "primary_key": "id", - "path": "/party_members/{{stream_slice.party_member_id}}/skills" + "path": "/party_members/{{stream_slice.party_member_id}}/skills", }, "schema_loader": { "type": "InlineSchemaLoader", @@ -1147,26 +1061,19 @@ def test_given_partition_routing_and_incremental_sync_then_stream_is_not_concurr "description": "The identifier", "type": ["null", "string"], }, - "name": { - "description": "The name of the party member", - "type": ["null", "string"] - } - } - } - } + "name": {"description": "The name of the party member", "type": ["null", "string"]}, + }, + }, + }, }, }, - "streams": [ - "#/definitions/incremental_party_members_skills_stream" - ], - "check": { - "stream_names": ["incremental_party_members_skills"] - }, + "streams": ["#/definitions/incremental_party_members_skills_stream"], + "check": {"stream_names": ["incremental_party_members_skills"]}, "concurrency_level": { "type": "ConcurrencyLevel", "default_concurrency": "{{ config['num_workers'] or 10 }}", "max_concurrency": 25, - } + }, } catalog = ConfiguredAirbyteCatalog( @@ -1201,11 +1108,9 @@ def get_mocked_read_records_output(stream_name: str) -> Mapping[tuple[str, str], StreamSlice(cursor_slice={"start": "2024-07-01", "end": "2024-07-31"}, partition={}), StreamSlice(cursor_slice={"start": "2024-08-01", "end": "2024-08-31"}, partition={}), StreamSlice(cursor_slice={"start": "2024-09-01", "end": "2024-09-09"}, partition={}), - # Slices used during incremental checkpoint sync - StreamSlice(cursor_slice={'start': '2024-07-26', 'end': '2024-08-25'}, partition={}), - StreamSlice(cursor_slice={'start': '2024-08-26', 'end': '2024-09-09'}, partition={}), - + StreamSlice(cursor_slice={"start": "2024-07-26", "end": "2024-08-25"}, partition={}), + StreamSlice(cursor_slice={"start": "2024-08-26", "end": "2024-09-09"}, partition={}), # Slices used during incremental sync with some partitions that exit with an error StreamSlice(cursor_slice={"start": "2024-07-05", "end": "2024-08-04"}, partition={}), StreamSlice(cursor_slice={"start": "2024-08-05", "end": "2024-09-04"}, partition={}), @@ -1226,13 +1131,12 @@ def get_mocked_read_records_output(stream_name: str) -> Mapping[tuple[str, str], StreamSlice(cursor_slice={"start": "2024-07-31", "end": "2024-08-14"}, partition={}), StreamSlice(cursor_slice={"start": "2024-08-15", "end": "2024-08-29"}, partition={}), StreamSlice(cursor_slice={"start": "2024-08-30", "end": "2024-09-09"}, partition={}), - # Slices used during incremental checkpoint sync. Unsuccessful partitions use the P5D lookback window which explains # the skew of records midway through StreamSlice(cursor_slice={"start": "2024-07-01", "end": "2024-07-16"}, partition={}), - StreamSlice(cursor_slice={'start': '2024-07-30', 'end': '2024-08-13'}, partition={}), - StreamSlice(cursor_slice={'start': '2024-08-14', 'end': '2024-08-14'}, partition={}), - StreamSlice(cursor_slice={'start': '2024-09-04', 'end': '2024-09-09'}, partition={}), + StreamSlice(cursor_slice={"start": "2024-07-30", "end": "2024-08-13"}, partition={}), + StreamSlice(cursor_slice={"start": "2024-08-14", "end": "2024-08-14"}, partition={}), + StreamSlice(cursor_slice={"start": "2024-09-04", "end": "2024-09-09"}, partition={}), ] records = [ @@ -1264,7 +1168,10 @@ def get_mocked_read_records_output(stream_name: str) -> Mapping[tuple[str, str], case _: raise ValueError(f"Stream '{stream_name}' does not have associated mocked records") - return {(_slice.get("start"), _slice.get("end")): [Record(data=stream_data, associated_slice=_slice) for stream_data in records] for _slice in slices} + return { + (_slice.get("start"), _slice.get("end")): [Record(data=stream_data, associated_slice=_slice) for stream_data in records] + for _slice in slices + } def get_records_for_stream(stream_name: str, messages: List[AirbyteMessage]) -> List[AirbyteRecordMessage]: diff --git a/unit_tests/sources/file_based/in_memory_files_source.py b/unit_tests/sources/file_based/in_memory_files_source.py index bf3bb796..b8448dbc 100644 --- a/unit_tests/sources/file_based/in_memory_files_source.py +++ b/unit_tests/sources/file_based/in_memory_files_source.py @@ -127,7 +127,6 @@ def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str raise NotImplementedError(f"No implementation for file type: {self.file_type}") def _make_csv_file_contents(self, file_name: str) -> IOBase: - # Some tests define the csv as an array of strings to make it easier to validate the handling # of quotes, delimiter, and escpare chars. if isinstance(self.files[file_name]["contents"][0], str): diff --git a/unit_tests/sources/file_based/scenarios/csv_scenarios.py b/unit_tests/sources/file_based/scenarios/csv_scenarios.py index 2fff455b..d88c38ec 100644 --- a/unit_tests/sources/file_based/scenarios/csv_scenarios.py +++ b/unit_tests/sources/file_based/scenarios/csv_scenarios.py @@ -453,16 +453,12 @@ "title": "Delivery Type", "default": "use_records_transfer", "const": "use_records_transfer", - "enum": [ - "use_records_transfer" - ], - "type": "string" + "enum": ["use_records_transfer"], + "type": "string", } }, "description": "Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.", - "required": [ - "delivery_type" - ] + "required": ["delivery_type"], }, { "title": "Copy Raw Files", @@ -472,19 +468,15 @@ "title": "Delivery Type", "default": "use_file_transfer", "const": "use_file_transfer", - "enum": [ - "use_file_transfer" - ], - "type": "string" + "enum": ["use_file_transfer"], + "type": "string", } }, "description": "Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.", - "required": [ - "delivery_type" - ] - } - ] - }, + "required": ["delivery_type"], + }, + ], + }, }, "required": ["streams"], }, diff --git a/unit_tests/sources/file_based/stream/concurrent/test_adapters.py b/unit_tests/sources/file_based/stream/concurrent/test_adapters.py index 9d10ff5e..fc4b5c22 100644 --- a/unit_tests/sources/file_based/stream/concurrent/test_adapters.py +++ b/unit_tests/sources/file_based/stream/concurrent/test_adapters.py @@ -70,12 +70,18 @@ def test_file_based_stream_partition_generator(sync_mode): [ pytest.param( TypeTransformer(TransformConfig.NoTransform), - [Record({"data": "1"}, Mock(spec=FileBasedStreamPartition, stream_name=Mock(return_value=_STREAM_NAME))), Record({"data": "2"}, Mock(spec=FileBasedStreamPartition, stream_name=Mock(return_value=_STREAM_NAME)))], + [ + Record({"data": "1"}, Mock(spec=FileBasedStreamPartition, stream_name=Mock(return_value=_STREAM_NAME))), + Record({"data": "2"}, Mock(spec=FileBasedStreamPartition, stream_name=Mock(return_value=_STREAM_NAME))), + ], id="test_no_transform", ), pytest.param( TypeTransformer(TransformConfig.DefaultSchemaNormalization), - [Record({"data": 1}, Mock(spec=FileBasedStreamPartition, stream_name=Mock(return_value=_STREAM_NAME))), Record({"data": 2}, Mock(spec=FileBasedStreamPartition, stream_name=Mock(return_value=_STREAM_NAME)))], + [ + Record({"data": 1}, Mock(spec=FileBasedStreamPartition, stream_name=Mock(return_value=_STREAM_NAME))), + Record({"data": 2}, Mock(spec=FileBasedStreamPartition, stream_name=Mock(return_value=_STREAM_NAME))), + ], id="test_default_transform", ), ], diff --git a/unit_tests/sources/file_based/stream/test_default_file_based_stream.py b/unit_tests/sources/file_based/stream/test_default_file_based_stream.py index f6c31927..85318b52 100644 --- a/unit_tests/sources/file_based/stream/test_default_file_based_stream.py +++ b/unit_tests/sources/file_based/stream/test_default_file_based_stream.py @@ -256,7 +256,7 @@ def test_yield_and_raise_collected(self) -> None: class DefaultFileBasedStreamFileTransferTest(unittest.TestCase): _NOW = datetime(2022, 10, 22, tzinfo=timezone.utc) - _A_RECORD = {'bytes': 10, 'file_relative_path': 'relative/path/file.csv', 'file_url': '/absolute/path/file.csv'} + _A_RECORD = {"bytes": 10, "file_relative_path": "relative/path/file.csv", "file_url": "/absolute/path/file.csv"} def setUp(self) -> None: self._stream_config = Mock() @@ -281,7 +281,7 @@ def setUp(self) -> None: validation_policy=self._validation_policy, cursor=self._cursor, errors_collector=FileBasedErrorsCollector(), - use_file_transfer=True + use_file_transfer=True, ) def test_when_read_records_from_slice_then_return_records(self) -> None: diff --git a/unit_tests/sources/message/test_repository.py b/unit_tests/sources/message/test_repository.py index 48778b65..45502e45 100644 --- a/unit_tests/sources/message/test_repository.py +++ b/unit_tests/sources/message/test_repository.py @@ -100,7 +100,6 @@ def test_given_message_emitted_when_consume_queue_then_return_empty(self): class TestLogAppenderMessageRepositoryDecorator: - _DICT_TO_APPEND = {"airbyte_cdk": {"stream": {"is_substream": False}}} @pytest.fixture() diff --git a/unit_tests/sources/mock_server_tests/mock_source_fixture.py b/unit_tests/sources/mock_server_tests/mock_source_fixture.py index ece5039b..20631854 100644 --- a/unit_tests/sources/mock_server_tests/mock_source_fixture.py +++ b/unit_tests/sources/mock_server_tests/mock_source_fixture.py @@ -34,7 +34,6 @@ def reasons_for_unavailable_status_codes( class IntegrationStream(HttpStream, ABC): - url_base = "https://api.airbyte-test.com/v1/" primary_key = "id" page_size = 100 diff --git a/unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py b/unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py index 4d4fb5c4..38c44c08 100644 --- a/unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +++ b/unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py @@ -63,8 +63,18 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: state_converter = StreamFacadeConcurrentConnectorStateConverter() return [ - self.convert_to_concurrent_stream(stream.logger, stream, state_manager, self.initialize_cursor( - stream, state_manager, state_converter, self._cursor_boundaries, None, EpochValueConcurrentStreamStateConverter.get_end_provider()) + self.convert_to_concurrent_stream( + stream.logger, + stream, + state_manager, + self.initialize_cursor( + stream, + state_manager, + state_converter, + self._cursor_boundaries, + None, + EpochValueConcurrentStreamStateConverter.get_end_provider(), + ), ) for stream in self._streams ] diff --git a/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py b/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py index b8ed5e72..05f3adbe 100644 --- a/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +++ b/unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py @@ -21,7 +21,17 @@ _id_only_stream = DefaultStream( partition_generator=InMemoryPartitionGenerator( - [InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, InMemoryPartition("partition1", "stream1", None, [])), Record({"id": "2"}, InMemoryPartition("partition1", "stream1", None, []))])] + [ + InMemoryPartition( + "partition1", + "stream1", + None, + [ + Record({"id": "1"}, InMemoryPartition("partition1", "stream1", None, [])), + Record({"id": "2"}, InMemoryPartition("partition1", "stream1", None, [])), + ], + ) + ] ), name="stream1", json_schema={ @@ -57,7 +67,17 @@ _id_only_stream_with_primary_key = DefaultStream( partition_generator=InMemoryPartitionGenerator( - [InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, InMemoryPartition("partition1", "stream1", None, [])), Record({"id": "2"}, InMemoryPartition("partition1", "stream1", None, []))])] + [ + InMemoryPartition( + "partition1", + "stream1", + None, + [ + Record({"id": "1"}, InMemoryPartition("partition1", "stream1", None, [])), + Record({"id": "2"}, InMemoryPartition("partition1", "stream1", None, [])), + ], + ) + ] ), name="stream1", json_schema={ @@ -76,8 +96,24 @@ _id_only_stream_multiple_partitions = DefaultStream( partition_generator=InMemoryPartitionGenerator( [ - InMemoryPartition("partition1", "stream1", {"p": "1"}, [Record({"id": "1"}, InMemoryPartition("partition1", "stream1", None, [])), Record({"id": "2"}, InMemoryPartition("partition1", "stream1", None, []))]), - InMemoryPartition("partition2", "stream1", {"p": "2"}, [Record({"id": "3"}, InMemoryPartition("partition1", "stream1", None, [])), Record({"id": "4"}, InMemoryPartition("partition1", "stream1", None, []))]), + InMemoryPartition( + "partition1", + "stream1", + {"p": "1"}, + [ + Record({"id": "1"}, InMemoryPartition("partition1", "stream1", None, [])), + Record({"id": "2"}, InMemoryPartition("partition1", "stream1", None, [])), + ], + ), + InMemoryPartition( + "partition2", + "stream1", + {"p": "2"}, + [ + Record({"id": "3"}, InMemoryPartition("partition1", "stream1", None, [])), + Record({"id": "4"}, InMemoryPartition("partition1", "stream1", None, [])), + ], + ), ] ), name="stream1", @@ -97,8 +133,24 @@ _id_only_stream_multiple_partitions_concurrency_level_two = DefaultStream( partition_generator=InMemoryPartitionGenerator( [ - InMemoryPartition("partition1", "stream1", {"p": "1"}, [Record({"id": "1"}, InMemoryPartition("partition1", "stream1", None, [])), Record({"id": "2"}, InMemoryPartition("partition1", "stream1", None, []))]), - InMemoryPartition("partition2", "stream1", {"p": "2"}, [Record({"id": "3"}, InMemoryPartition("partition1", "stream1", None, [])), Record({"id": "4"}, InMemoryPartition("partition1", "stream1", None, []))]), + InMemoryPartition( + "partition1", + "stream1", + {"p": "1"}, + [ + Record({"id": "1"}, InMemoryPartition("partition1", "stream1", None, [])), + Record({"id": "2"}, InMemoryPartition("partition1", "stream1", None, [])), + ], + ), + InMemoryPartition( + "partition2", + "stream1", + {"p": "2"}, + [ + Record({"id": "3"}, InMemoryPartition("partition1", "stream1", None, [])), + Record({"id": "4"}, InMemoryPartition("partition1", "stream1", None, [])), + ], + ), ] ), name="stream1", @@ -117,7 +169,14 @@ _stream_raising_exception = DefaultStream( partition_generator=InMemoryPartitionGenerator( - [InMemoryPartition("partition1", "stream1", None, [Record({"id": "1"}, InMemoryPartition("partition1", "stream1", None, [])), ValueError("test exception")])] + [ + InMemoryPartition( + "partition1", + "stream1", + None, + [Record({"id": "1"}, InMemoryPartition("partition1", "stream1", None, [])), ValueError("test exception")], + ) + ] ), name="stream1", json_schema={ @@ -241,8 +300,10 @@ "partition1", "stream2", None, - [Record({"id": "10", "key": "v1"}, InMemoryPartition("partition1", "stream2", None, [])), - Record({"id": "20", "key": "v2"}, InMemoryPartition("partition1", "stream2", None, []))], + [ + Record({"id": "10", "key": "v1"}, InMemoryPartition("partition1", "stream2", None, [])), + Record({"id": "20", "key": "v2"}, InMemoryPartition("partition1", "stream2", None, [])), + ], ) ] ), diff --git a/unit_tests/sources/streams/concurrent/test_adapters.py b/unit_tests/sources/streams/concurrent/test_adapters.py index c5c44a78..d1dffcdd 100644 --- a/unit_tests/sources/streams/concurrent/test_adapters.py +++ b/unit_tests/sources/streams/concurrent/test_adapters.py @@ -385,12 +385,7 @@ def test_cursor_partition_generator(): cursor.generate_slices.return_value = [(datetime.datetime(year=2024, month=1, day=1), datetime.datetime(year=2024, month=1, day=2))] partition_generator = CursorPartitionGenerator( - stream, - message_repository, - cursor, - connector_state_converter, - cursor_field, - slice_boundary_fields + stream, message_repository, cursor, connector_state_converter, cursor_field, slice_boundary_fields ) partitions = list(partition_generator.generate()) diff --git a/unit_tests/sources/streams/concurrent/test_cursor.py b/unit_tests/sources/streams/concurrent/test_cursor.py index 5da0a55b..5ef8c60c 100644 --- a/unit_tests/sources/streams/concurrent/test_cursor.py +++ b/unit_tests/sources/streams/concurrent/test_cursor.py @@ -407,7 +407,10 @@ def test_given_difference_between_slices_match_slice_range_and_cursor_granularit slices = list(cursor.generate_slices()) assert slices == [ - (datetime.fromtimestamp(31, timezone.utc), datetime.fromtimestamp(40, timezone.utc)), # FIXME there should probably be the granularity at the beginning too + ( + datetime.fromtimestamp(31, timezone.utc), + datetime.fromtimestamp(40, timezone.utc), + ), # FIXME there should probably be the granularity at the beginning too ] @freezegun.freeze_time(time_to_freeze=datetime.fromtimestamp(50, timezone.utc)) @@ -512,9 +515,7 @@ def test_slices_with_records_when_close_then_most_recent_cursor_value_from_most_ cursor.close_partition(second_partition) assert self._state_manager.update_state_for_stream.call_args_list[-1].args[2] == { - "slices": [ - {"end": 20, "start": 0, "most_recent_cursor_value": 15} - ], + "slices": [{"end": 20, "start": 0, "most_recent_cursor_value": 15}], "state_type": "date-range", } @@ -528,9 +529,7 @@ def test_last_slice_without_records_when_close_then_most_recent_cursor_value_is_ cursor.close_partition(second_partition) assert self._state_manager.update_state_for_stream.call_args_list[-1].args[2] == { - "slices": [ - {"end": 20, "start": 0, "most_recent_cursor_value": 5} - ], + "slices": [{"end": 20, "start": 0, "most_recent_cursor_value": 5}], "state_type": "date-range", } @@ -544,9 +543,7 @@ def test_most_recent_cursor_value_outside_of_boundaries_when_close_then_most_rec cursor.close_partition(partition) assert self._state_manager.update_state_for_stream.call_args_list[-1].args[2] == { - "slices": [ - {"end": 10, "start": 0, "most_recent_cursor_value": 15} - ], + "slices": [{"end": 10, "start": 0, "most_recent_cursor_value": 15}], "state_type": "date-range", } @@ -556,11 +553,11 @@ def test_most_recent_cursor_value_on_sequential_state_when_close_then_cursor_val cursor.observe(_record(7, partition=partition)) cursor.close_partition(partition) - assert self._state_manager.update_state_for_stream.call_args_list[-1].args[2] == { - _A_CURSOR_FIELD_KEY: 7 - } + assert self._state_manager.update_state_for_stream.call_args_list[-1].args[2] == {_A_CURSOR_FIELD_KEY: 7} - def test_non_continuous_slices_on_sequential_state_when_close_then_cursor_value_is_most_recent_cursor_value_of_first_slice(self) -> None: + def test_non_continuous_slices_on_sequential_state_when_close_then_cursor_value_is_most_recent_cursor_value_of_first_slice( + self, + ) -> None: cursor = self._cursor_with_slice_boundary_fields(is_sequential_state=True) first_partition = _partition({_LOWER_SLICE_BOUNDARY_FIELD: 0, _UPPER_SLICE_BOUNDARY_FIELD: 10}) third_partition = _partition({_LOWER_SLICE_BOUNDARY_FIELD: 20, _UPPER_SLICE_BOUNDARY_FIELD: 30}) # second partition has failed @@ -569,9 +566,7 @@ def test_non_continuous_slices_on_sequential_state_when_close_then_cursor_value_ cursor.close_partition(third_partition) - assert self._state_manager.update_state_for_stream.call_args_list[-1].args[2] == { - _A_CURSOR_FIELD_KEY: 7 - } + assert self._state_manager.update_state_for_stream.call_args_list[-1].args[2] == {_A_CURSOR_FIELD_KEY: 7} @freezegun.freeze_time(time_to_freeze=datetime.fromtimestamp(10, timezone.utc)) def test_given_overflowing_slice_gap_when_generate_slices_then_cap_upper_bound_to_end_provider(self) -> None: @@ -613,7 +608,7 @@ def test_given_overflowing_slice_gap_when_generate_slices_then_cap_upper_bound_t (datetime(2024, 1, 21, 0, 0, tzinfo=timezone.utc), datetime(2024, 1, 30, 23, 59, 59, tzinfo=timezone.utc)), (datetime(2024, 1, 31, 0, 0, tzinfo=timezone.utc), datetime(2024, 2, 9, 23, 59, 59, tzinfo=timezone.utc)), (datetime(2024, 2, 10, 0, 0, tzinfo=timezone.utc), datetime(2024, 2, 19, 23, 59, 59, tzinfo=timezone.utc)), - (datetime(2024, 2, 20, 0, 0, tzinfo=timezone.utc), datetime(2024, 3, 1, 0, 0, 0, tzinfo=timezone.utc)) + (datetime(2024, 2, 20, 0, 0, tzinfo=timezone.utc), datetime(2024, 3, 1, 0, 0, 0, tzinfo=timezone.utc)), ], id="test_datetime_based_cursor_all_fields", ), @@ -630,12 +625,12 @@ def test_given_overflowing_slice_gap_when_generate_slices_then_cap_upper_bound_t "end": "2024-02-10T00:00:00.000000+0000", } ], - "state_type": "date-range" + "state_type": "date-range", }, [ (datetime(2024, 2, 5, 0, 0, 0, tzinfo=timezone.utc), datetime(2024, 2, 14, 23, 59, 59, tzinfo=timezone.utc)), (datetime(2024, 2, 15, 0, 0, 0, tzinfo=timezone.utc), datetime(2024, 2, 24, 23, 59, 59, tzinfo=timezone.utc)), - (datetime(2024, 2, 25, 0, 0, 0, tzinfo=timezone.utc), datetime(2024, 3, 1, 0, 0, 0, tzinfo=timezone.utc)) + (datetime(2024, 2, 25, 0, 0, 0, tzinfo=timezone.utc), datetime(2024, 3, 1, 0, 0, 0, tzinfo=timezone.utc)), ], id="test_datetime_based_cursor_with_state", ), @@ -652,7 +647,7 @@ def test_given_overflowing_slice_gap_when_generate_slices_then_cap_upper_bound_t "end": "2024-01-21T00:00:00.000000+0000", } ], - "state_type": "date-range" + "state_type": "date-range", }, [ (datetime(2024, 1, 20, 0, 0, tzinfo=timezone.utc), datetime(2024, 2, 8, 23, 59, 59, tzinfo=timezone.utc)), @@ -675,7 +670,7 @@ def test_given_overflowing_slice_gap_when_generate_slices_then_cap_upper_bound_t ], id="test_datetime_based_cursor_using_large_step_duration", ), - ] + ], ) def test_generate_slices_concurrent_cursor_from_datetime_based_cursor( start_datetime, @@ -758,10 +753,7 @@ def test_observe_concurrent_cursor_from_datetime_based_cursor(): message_repository = Mock(spec=MessageRepository) state_manager = Mock(spec=ConnectorStateManager) - config = { - "start_time": "2024-08-01T00:00:00.000000+0000", - "dynamic_cursor_key": "updated_at" - } + config = {"start_time": "2024-08-01T00:00:00.000000+0000", "dynamic_cursor_key": "updated_at"} datetime_based_cursor = DatetimeBasedCursor( start_datetime=MinMaxDatetime(datetime="{{ config.start_time }}", parameters={}), @@ -799,13 +791,16 @@ def test_observe_concurrent_cursor_from_datetime_based_cursor(): ) record_1 = Record( - partition=partition, data={"id": "999", "updated_at": "2024-08-23T00:00:00.000000+0000", "name": "kratos", "mythology": "greek"}, + partition=partition, + data={"id": "999", "updated_at": "2024-08-23T00:00:00.000000+0000", "name": "kratos", "mythology": "greek"}, ) record_2 = Record( - partition=partition, data={"id": "1000", "updated_at": "2024-08-22T00:00:00.000000+0000", "name": "odin", "mythology": "norse"}, + partition=partition, + data={"id": "1000", "updated_at": "2024-08-22T00:00:00.000000+0000", "name": "odin", "mythology": "norse"}, ) record_3 = Record( - partition=partition, data={"id": "500", "updated_at": "2024-08-24T00:00:00.000000+0000", "name": "freya", "mythology": "norse"}, + partition=partition, + data={"id": "500", "updated_at": "2024-08-24T00:00:00.000000+0000", "name": "freya", "mythology": "norse"}, ) concurrent_cursor.observe(record_1) @@ -826,10 +821,7 @@ def test_close_partition_concurrent_cursor_from_datetime_based_cursor(): message_repository = Mock(spec=MessageRepository) state_manager = Mock(spec=ConnectorStateManager) - config = { - "start_time": "2024-08-01T00:00:00.000000+0000", - "dynamic_cursor_key": "updated_at" - } + config = {"start_time": "2024-08-01T00:00:00.000000+0000", "dynamic_cursor_key": "updated_at"} datetime_based_cursor = DatetimeBasedCursor( start_datetime=MinMaxDatetime(datetime="{{ config.start_time }}", parameters={}), @@ -867,7 +859,8 @@ def test_close_partition_concurrent_cursor_from_datetime_based_cursor(): ) record_1 = Record( - partition=partition, data={"id": "999", "updated_at": "2024-08-23T00:00:00.000000+0000", "name": "kratos", "mythology": "greek"}, + partition=partition, + data={"id": "999", "updated_at": "2024-08-23T00:00:00.000000+0000", "name": "kratos", "mythology": "greek"}, ) concurrent_cursor.observe(record_1) @@ -878,8 +871,14 @@ def test_close_partition_concurrent_cursor_from_datetime_based_cursor(): "gods", _A_STREAM_NAMESPACE, { - "slices": [{"end": "2024-08-23T00:00:00.000Z", "start": "2024-08-01T00:00:00.000Z", "most_recent_cursor_value": "2024-08-23T00:00:00.000Z"}], - "state_type": "date-range" + "slices": [ + { + "end": "2024-08-23T00:00:00.000Z", + "start": "2024-08-01T00:00:00.000Z", + "most_recent_cursor_value": "2024-08-23T00:00:00.000Z", + } + ], + "state_type": "date-range", }, ) @@ -889,10 +888,7 @@ def test_close_partition_with_slice_range_concurrent_cursor_from_datetime_based_ message_repository = Mock(spec=MessageRepository) state_manager = Mock(spec=ConnectorStateManager) - config = { - "start_time": "2024-07-01T00:00:00.000000+0000", - "dynamic_cursor_key": "updated_at" - } + config = {"start_time": "2024-07-01T00:00:00.000000+0000", "dynamic_cursor_key": "updated_at"} datetime_based_cursor = DatetimeBasedCursor( start_datetime=MinMaxDatetime(datetime="{{ config.start_time }}", parameters={}), @@ -932,16 +928,20 @@ def test_close_partition_with_slice_range_concurrent_cursor_from_datetime_based_ ) partition_0 = _partition( - {"start_time": "2024-07-01T00:00:00.000000+0000", "end_time": "2024-07-16T00:00:00.000000+0000"}, _stream_name="gods", + {"start_time": "2024-07-01T00:00:00.000000+0000", "end_time": "2024-07-16T00:00:00.000000+0000"}, + _stream_name="gods", ) partition_3 = _partition( - {"start_time": "2024-08-15T00:00:00.000000+0000", "end_time": "2024-08-30T00:00:00.000000+0000"}, _stream_name="gods", + {"start_time": "2024-08-15T00:00:00.000000+0000", "end_time": "2024-08-30T00:00:00.000000+0000"}, + _stream_name="gods", ) record_1 = Record( - partition=partition_0, data={"id": "1000", "updated_at": "2024-07-05T00:00:00.000000+0000", "name": "loki", "mythology": "norse"}, + partition=partition_0, + data={"id": "1000", "updated_at": "2024-07-05T00:00:00.000000+0000", "name": "loki", "mythology": "norse"}, ) record_2 = Record( - partition=partition_3, data={"id": "999", "updated_at": "2024-08-20T00:00:00.000000+0000", "name": "kratos", "mythology": "greek"}, + partition=partition_3, + data={"id": "999", "updated_at": "2024-08-20T00:00:00.000000+0000", "name": "kratos", "mythology": "greek"}, ) concurrent_cursor.observe(record_1) @@ -956,10 +956,18 @@ def test_close_partition_with_slice_range_concurrent_cursor_from_datetime_based_ _A_STREAM_NAMESPACE, { "slices": [ - {"start": "2024-07-01T00:00:00.000Z", "end": "2024-07-16T00:00:00.000Z", "most_recent_cursor_value": "2024-07-05T00:00:00.000Z"}, - {"start": "2024-08-15T00:00:00.000Z", "end": "2024-08-30T00:00:00.000Z", "most_recent_cursor_value": "2024-08-20T00:00:00.000Z"}, + { + "start": "2024-07-01T00:00:00.000Z", + "end": "2024-07-16T00:00:00.000Z", + "most_recent_cursor_value": "2024-07-05T00:00:00.000Z", + }, + { + "start": "2024-08-15T00:00:00.000Z", + "end": "2024-08-30T00:00:00.000Z", + "most_recent_cursor_value": "2024-08-20T00:00:00.000Z", + }, ], - "state_type": "date-range" + "state_type": "date-range", }, ) assert state_manager.update_state_for_stream.call_count == 2 @@ -970,10 +978,7 @@ def test_close_partition_with_slice_range_granularity_concurrent_cursor_from_dat message_repository = Mock(spec=MessageRepository) state_manager = Mock(spec=ConnectorStateManager) - config = { - "start_time": "2024-07-01T00:00:00.000000+0000", - "dynamic_cursor_key": "updated_at" - } + config = {"start_time": "2024-07-01T00:00:00.000000+0000", "dynamic_cursor_key": "updated_at"} datetime_based_cursor = DatetimeBasedCursor( start_datetime=MinMaxDatetime(datetime="{{ config.start_time }}", parameters={}), @@ -1015,22 +1020,28 @@ def test_close_partition_with_slice_range_granularity_concurrent_cursor_from_dat ) partition_0 = _partition( - {"start_time": "2024-07-01T00:00:00.000000+0000", "end_time": "2024-07-15T00:00:00.000000+0000"}, _stream_name="gods", + {"start_time": "2024-07-01T00:00:00.000000+0000", "end_time": "2024-07-15T00:00:00.000000+0000"}, + _stream_name="gods", ) partition_1 = _partition( - {"start_time": "2024-07-16T00:00:00.000000+0000", "end_time": "2024-07-31T00:00:00.000000+0000"}, _stream_name="gods", + {"start_time": "2024-07-16T00:00:00.000000+0000", "end_time": "2024-07-31T00:00:00.000000+0000"}, + _stream_name="gods", ) partition_3 = _partition( - {"start_time": "2024-08-15T00:00:00.000000+0000", "end_time": "2024-08-29T00:00:00.000000+0000"}, _stream_name="gods", + {"start_time": "2024-08-15T00:00:00.000000+0000", "end_time": "2024-08-29T00:00:00.000000+0000"}, + _stream_name="gods", ) record_1 = Record( - partition=partition_0, data={"id": "1000", "updated_at": "2024-07-05T00:00:00.000000+0000", "name": "loki", "mythology": "norse"}, + partition=partition_0, + data={"id": "1000", "updated_at": "2024-07-05T00:00:00.000000+0000", "name": "loki", "mythology": "norse"}, ) record_2 = Record( - partition=partition_1, data={"id": "2000", "updated_at": "2024-07-25T00:00:00.000000+0000", "name": "freya", "mythology": "norse"}, + partition=partition_1, + data={"id": "2000", "updated_at": "2024-07-25T00:00:00.000000+0000", "name": "freya", "mythology": "norse"}, ) record_3 = Record( - partition=partition_3, data={"id": "999", "updated_at": "2024-08-20T00:00:00.000000+0000", "name": "kratos", "mythology": "greek"}, + partition=partition_3, + data={"id": "999", "updated_at": "2024-08-20T00:00:00.000000+0000", "name": "kratos", "mythology": "greek"}, ) concurrent_cursor.observe(record_1) @@ -1047,11 +1058,18 @@ def test_close_partition_with_slice_range_granularity_concurrent_cursor_from_dat _A_STREAM_NAMESPACE, { "slices": [ - {"start": "2024-07-01T00:00:00.000Z", "end": "2024-07-31T00:00:00.000Z", "most_recent_cursor_value": "2024-07-25T00:00:00.000Z"}, - {"start": "2024-08-15T00:00:00.000Z", "end": "2024-08-29T00:00:00.000Z", "most_recent_cursor_value": "2024-08-20T00:00:00.000Z"} - + { + "start": "2024-07-01T00:00:00.000Z", + "end": "2024-07-31T00:00:00.000Z", + "most_recent_cursor_value": "2024-07-25T00:00:00.000Z", + }, + { + "start": "2024-08-15T00:00:00.000Z", + "end": "2024-08-29T00:00:00.000Z", + "most_recent_cursor_value": "2024-08-20T00:00:00.000Z", + }, ], - "state_type": "date-range" + "state_type": "date-range", }, ) assert state_manager.update_state_for_stream.call_count == 3 diff --git a/unit_tests/sources/streams/http/error_handlers/test_default_backoff_strategy.py b/unit_tests/sources/streams/http/error_handlers/test_default_backoff_strategy.py index 67e7e350..7bb02c55 100644 --- a/unit_tests/sources/streams/http/error_handlers/test_default_backoff_strategy.py +++ b/unit_tests/sources/streams/http/error_handlers/test_default_backoff_strategy.py @@ -22,7 +22,6 @@ def backoff_time( def test_given_valid_arguments_default_backoff_strategy_returns_values(): - response = requests.Response() response.headers["Retry-After"] = 123 backoff_strategy = CustomBackoffStrategy() diff --git a/unit_tests/sources/streams/http/error_handlers/test_http_status_error_handler.py b/unit_tests/sources/streams/http/error_handlers/test_http_status_error_handler.py index 6da3e15b..3ec1cc1d 100644 --- a/unit_tests/sources/streams/http/error_handlers/test_http_status_error_handler.py +++ b/unit_tests/sources/streams/http/error_handlers/test_http_status_error_handler.py @@ -41,7 +41,6 @@ def test_given_error_code_in_response_http_status_error_handler_returns_expected def test_given_no_response_argument_returns_expected_action(): - error_resolution = HttpStatusErrorHandler(logger).interpret_response() assert error_resolution.response_action == ResponseAction.FAIL @@ -49,7 +48,6 @@ def test_given_no_response_argument_returns_expected_action(): def test_given_unmapped_status_error_returns_retry_action_as_transient_error(): - response = requests.Response() response.status_code = 508 @@ -61,7 +59,6 @@ def test_given_unmapped_status_error_returns_retry_action_as_transient_error(): def test_given_requests_exception_returns_retry_action_as_transient_error(): - error_resolution = HttpStatusErrorHandler(logger).interpret_response(requests.RequestException()) assert error_resolution.response_action == ResponseAction.RETRY @@ -69,7 +66,6 @@ def test_given_requests_exception_returns_retry_action_as_transient_error(): def test_given_unmapped_exception_returns_retry_action_as_system_error(): - error_resolution = HttpStatusErrorHandler(logger).interpret_response(Exception()) assert error_resolution.response_action == ResponseAction.RETRY @@ -77,7 +73,6 @@ def test_given_unmapped_exception_returns_retry_action_as_system_error(): def test_given_unexpected_response_type_returns_fail_action_as_system_error(): - error_resolution = HttpStatusErrorHandler(logger).interpret_response("unexpected response type") assert error_resolution.response_action == ResponseAction.FAIL @@ -86,7 +81,6 @@ def test_given_unexpected_response_type_returns_fail_action_as_system_error(): def test_given_injected_error_mapping_returns_expected_action(): - default_error_handler = HttpStatusErrorHandler(logger) mock_response = MagicMock(spec=requests.Response) diff --git a/unit_tests/sources/streams/http/error_handlers/test_json_error_message_parser.py b/unit_tests/sources/streams/http/error_handlers/test_json_error_message_parser.py index 90ea36bc..72a37722 100644 --- a/unit_tests/sources/streams/http/error_handlers/test_json_error_message_parser.py +++ b/unit_tests/sources/streams/http/error_handlers/test_json_error_message_parser.py @@ -22,7 +22,8 @@ (b'{"error_message": "error_message error message"}', "error_message error message"), (b'{"msg": "msg error message"}', "msg error message"), (b'{"reason": "reason error message"}', "reason error message"), - (b'{"status_message": "status_message error message"}', "status_message error message"),], + (b'{"status_message": "status_message error message"}', "status_message error message"), + ], ) def test_given_error_message_in_response_body_parse_response_error_message_returns_error_message(response_body, expected_error_message): response = requests.Response() diff --git a/unit_tests/sources/streams/http/test_http.py b/unit_tests/sources/streams/http/test_http.py index 8737289a..3aeb2186 100644 --- a/unit_tests/sources/streams/http/test_http.py +++ b/unit_tests/sources/streams/http/test_http.py @@ -301,7 +301,6 @@ def request2response(self, request, context): return json.dumps({"body": request.text, "content_type": request.headers.get("Content-Type")}) def test_json_body(self, mocker, requests_mock): - stream = PostHttpStream() mocker.patch.object(stream, "request_body_json", return_value=self.json_body) @@ -312,7 +311,6 @@ def test_json_body(self, mocker, requests_mock): assert json.loads(response["body"]) == self.json_body def test_text_body(self, mocker, requests_mock): - stream = PostHttpStream() mocker.patch.object(stream, "request_body_data", return_value=self.data_body) @@ -323,7 +321,6 @@ def test_text_body(self, mocker, requests_mock): assert response["body"] == self.data_body def test_form_body(self, mocker, requests_mock): - stream = PostHttpStream() mocker.patch.object(stream, "request_body_data", return_value=self.form_body) @@ -657,7 +654,6 @@ def test_join_url(test_name, base_url, path, expected_full_url): ], ) def test_duplicate_request_params_are_deduped(deduplicate_query_params, path, params, expected_url): - stream = StubBasicReadHttpStream(deduplicate_query_params) if expected_url is None: diff --git a/unit_tests/test/test_entrypoint_wrapper.py b/unit_tests/test/test_entrypoint_wrapper.py index 11dfc587..37533057 100644 --- a/unit_tests/test/test_entrypoint_wrapper.py +++ b/unit_tests/test/test_entrypoint_wrapper.py @@ -120,11 +120,7 @@ def _validate_tmp_json_file(expected, file_path) -> None: def _validate_tmp_catalog(expected, file_path) -> None: - assert ConfiguredAirbyteCatalogSerializer.load( - orjson.loads( - open(file_path).read() - ) - ) == expected + assert ConfiguredAirbyteCatalogSerializer.load(orjson.loads(open(file_path).read())) == expected def _create_tmp_file_validation(entrypoint, expected_config, expected_catalog: Optional[Any] = None, expected_state: Optional[Any] = None): diff --git a/unit_tests/test_entrypoint.py b/unit_tests/test_entrypoint.py index 123a4450..3d4ffefa 100644 --- a/unit_tests/test_entrypoint.py +++ b/unit_tests/test_entrypoint.py @@ -257,12 +257,7 @@ def test_run_check_with_config_error(entrypoint: AirbyteEntrypoint, mocker, spec expected_messages = [ orjson.dumps(AirbyteMessageSerializer.dump(MESSAGE_FROM_REPOSITORY)).decode(), orjson.dumps(AirbyteMessageSerializer.dump(expected_trace)).decode(), - _wrap_message( - AirbyteConnectionStatus( - status=Status.FAILED, - message=AirbyteTracedException.from_exception(exception).message - ) - ), + _wrap_message(AirbyteConnectionStatus(status=Status.FAILED, message=AirbyteTracedException.from_exception(exception).message)), ] assert messages == expected_messages diff --git a/unit_tests/utils/test_traced_exception.py b/unit_tests/utils/test_traced_exception.py index ea559a31..90c83165 100644 --- a/unit_tests/utils/test_traced_exception.py +++ b/unit_tests/utils/test_traced_exception.py @@ -118,7 +118,9 @@ def test_given_both_init_and_as_message_with_stream_descriptor_when_as_airbyte_m assert message.trace.error.stream_descriptor == _A_STREAM_DESCRIPTOR -def test_given_both_init_and_as_sanitized_airbyte_message_with_stream_descriptor_when_as_airbyte_message_use_init_stream_descriptor() -> None: +def test_given_both_init_and_as_sanitized_airbyte_message_with_stream_descriptor_when_as_airbyte_message_use_init_stream_descriptor() -> ( + None +): traced_exc = AirbyteTracedException(stream_descriptor=_A_STREAM_DESCRIPTOR) message = traced_exc.as_sanitized_airbyte_message(stream_descriptor=_ANOTHER_STREAM_DESCRIPTOR) assert message.trace.error.stream_descriptor == _A_STREAM_DESCRIPTOR @@ -130,7 +132,9 @@ def test_given_both_from_exception_and_as_message_with_stream_descriptor_when_as assert message.trace.error.stream_descriptor == _A_STREAM_DESCRIPTOR -def test_given_both_from_exception_and_as_sanitized_airbyte_message_with_stream_descriptor_when_as_airbyte_message_use_init_stream_descriptor() -> None: +def test_given_both_from_exception_and_as_sanitized_airbyte_message_with_stream_descriptor_when_as_airbyte_message_use_init_stream_descriptor() -> ( + None +): traced_exc = AirbyteTracedException.from_exception(_AN_EXCEPTION, stream_descriptor=_A_STREAM_DESCRIPTOR) message = traced_exc.as_sanitized_airbyte_message(stream_descriptor=_ANOTHER_STREAM_DESCRIPTOR) assert message.trace.error.stream_descriptor == _A_STREAM_DESCRIPTOR