diff --git a/airbyte_cdk/config_observation.py b/airbyte_cdk/config_observation.py index 4a80a8e9..40d6a89a 100644 --- a/airbyte_cdk/config_observation.py +++ b/airbyte_cdk/config_observation.py @@ -57,7 +57,7 @@ def __setitem__(self, item: Any, value: Any) -> None: for i, sub_value in enumerate(value): if isinstance(sub_value, MutableMapping): value[i] = ObservedDict(sub_value, self.observer) - super(ObservedDict, self).__setitem__(item, value) + super().__setitem__(item, value) if self.update_on_unchanged_value or value != previous_value: self.observer.update() diff --git a/airbyte_cdk/connector.py b/airbyte_cdk/connector.py index 9b0da16e..da42bcac 100644 --- a/airbyte_cdk/connector.py +++ b/airbyte_cdk/connector.py @@ -4,12 +4,11 @@ from __future__ import annotations import json -import logging import os import pkgutil from abc import ABC, abstractmethod from collections.abc import Mapping -from typing import Any, Generic, Protocol, TypeVar +from typing import TYPE_CHECKING, Any, Generic, Protocol, TypeVar import yaml @@ -20,6 +19,10 @@ ) +if TYPE_CHECKING: + import logging + + def load_optional_package_file(package: str, filename: str) -> bytes | None: """Gets a resource from a package, returning None if it does not exist""" try: @@ -50,7 +53,7 @@ def read_config(config_path: str) -> Mapping[str, Any]: @staticmethod def _read_json_file(file_path: str) -> Any: - with open(file_path) as file: + with open(file_path, encoding="utf-8") as file: contents = file.read() try: @@ -62,7 +65,7 @@ def _read_json_file(file_path: str) -> Any: @staticmethod def write_config(config: TConfig, config_path: str) -> None: - with open(config_path, "w") as fh: + with open(config_path, "w", encoding="utf-8") as fh: fh.write(json.dumps(config)) def spec(self, logger: logging.Logger) -> ConnectorSpecification: diff --git a/airbyte_cdk/connector_builder/connector_builder_handler.py b/airbyte_cdk/connector_builder/connector_builder_handler.py index e90be3ff..8b335920 100644 --- a/airbyte_cdk/connector_builder/connector_builder_handler.py +++ b/airbyte_cdk/connector_builder/connector_builder_handler.py @@ -4,9 +4,8 @@ from __future__ import annotations import dataclasses -from collections.abc import Mapping from datetime import datetime -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.connector_builder.message_grouper import MessageGrouper from airbyte_cdk.models import ( @@ -17,7 +16,6 @@ Type, ) from airbyte_cdk.models import Type as MessageType -from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ( ModelToComponentFactory, @@ -26,6 +24,12 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource + + DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE = 5 DEFAULT_MAXIMUM_NUMBER_OF_SLICES = 5 DEFAULT_MAXIMUM_RECORDS = 100 diff --git a/airbyte_cdk/connector_builder/main.py b/airbyte_cdk/connector_builder/main.py index d110bf2e..39f1a941 100644 --- a/airbyte_cdk/connector_builder/main.py +++ b/airbyte_cdk/connector_builder/main.py @@ -4,10 +4,9 @@ from __future__ import annotations import sys -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any -from orjson import orjson +import orjson from airbyte_cdk.connector import BaseConnector from airbyte_cdk.connector_builder.connector_builder_handler import ( @@ -25,11 +24,18 @@ ConfiguredAirbyteCatalog, ConfiguredAirbyteCatalogSerializer, ) -from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource from airbyte_cdk.sources.source import Source from airbyte_cdk.utils.traced_exception import AirbyteTracedException +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.declarative.manifest_declarative_source import ( + ManifestDeclarativeSource, + ) + + def get_config_and_catalog_from_args( args: list[str], ) -> tuple[str, Mapping[str, Any], ConfiguredAirbyteCatalog | None, Any]: diff --git a/airbyte_cdk/connector_builder/message_grouper.py b/airbyte_cdk/connector_builder/message_grouper.py index c9fae791..7839a743 100644 --- a/airbyte_cdk/connector_builder/message_grouper.py +++ b/airbyte_cdk/connector_builder/message_grouper.py @@ -5,10 +5,9 @@ import json import logging -from collections.abc import Iterable, Iterator, Mapping from copy import deepcopy from json import JSONDecodeError -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.connector_builder.models import ( AuxiliaryRequest, @@ -31,18 +30,28 @@ TraceType, ) from airbyte_cdk.models import Type as MessageType -from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.sources.utils.slice_logger import SliceLogger -from airbyte_cdk.sources.utils.types import JsonType from airbyte_cdk.utils import AirbyteTracedException from airbyte_cdk.utils.datetime_format_inferrer import DatetimeFormatInferrer from airbyte_cdk.utils.schema_inferrer import SchemaInferrer, SchemaValidationException +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator, Mapping + + from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource + from airbyte_cdk.sources.utils.types import JsonType + + class MessageGrouper: logger = logging.getLogger("airbyte.connector-builder") - def __init__(self, max_pages_per_slice: int, max_slices: int, max_record_limit: int = 1000): + def __init__( + self, + max_pages_per_slice: int, + max_slices: int, + max_record_limit: int = 1000, + ) -> None: self._max_pages_per_slice = max_pages_per_slice self._max_slices = max_slices self._max_record_limit = max_record_limit diff --git a/airbyte_cdk/destinations/destination.py b/airbyte_cdk/destinations/destination.py index 74644c0c..2264c594 100644 --- a/airbyte_cdk/destinations/destination.py +++ b/airbyte_cdk/destinations/destination.py @@ -8,8 +8,7 @@ import logging import sys from abc import ABC, abstractmethod -from collections.abc import Iterable, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from orjson import orjson @@ -26,6 +25,10 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + logger = logging.getLogger("airbyte") @@ -62,7 +65,7 @@ def _run_write( input_stream: io.TextIOWrapper, ) -> Iterable[AirbyteMessage]: catalog = ConfiguredAirbyteCatalogSerializer.load( - orjson.loads(open(configured_catalog_path).read()) + orjson.loads(open(configured_catalog_path, encoding="utf-8").read()) ) input_messages = self._parse_input_stream(input_stream) logger.info("Begin writing to the destination...") @@ -109,7 +112,7 @@ def parse_args(self, args: list[str]) -> argparse.Namespace: cmd = parsed_args.command if not cmd: raise Exception("No command entered. ") - if cmd not in ["spec", "check", "write"]: + if cmd not in {"spec", "check", "write"}: # This is technically dead code since parse_args() would fail if this was the case # But it's non-obvious enough to warrant placing it here anyways raise Exception(f"Unknown command entered: {cmd}") @@ -134,7 +137,7 @@ def run_cmd(self, parsed_args: argparse.Namespace) -> Iterable[AirbyteMessage]: if connection_status and cmd == "check": yield connection_status return - raise traced_exc + raise if cmd == "check": yield self._run_check(config=config) diff --git a/airbyte_cdk/destinations/vector_db_based/document_processor.py b/airbyte_cdk/destinations/vector_db_based/document_processor.py index 983d90d6..6d5baf71 100644 --- a/airbyte_cdk/destinations/vector_db_based/document_processor.py +++ b/airbyte_cdk/destinations/vector_db_based/document_processor.py @@ -5,9 +5,8 @@ import json import logging -from collections.abc import Mapping from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any import dpath from langchain.text_splitter import Language, RecursiveCharacterTextSplitter @@ -29,6 +28,10 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException, FailureType +if TYPE_CHECKING: + from collections.abc import Mapping + + METADATA_STREAM_FIELD = "_ab_stream" METADATA_RECORD_ID_FIELD = "_ab_record_id" @@ -116,8 +119,13 @@ def _get_text_splitter( ), disallowed_special=(), ) + return None - def __init__(self, config: ProcessingConfigModel, catalog: ConfiguredAirbyteCatalog): + def __init__( + self, + config: ProcessingConfigModel, + catalog: ConfiguredAirbyteCatalog, + ) -> None: self.streams = { create_stream_identifier(stream.stream): stream for stream in catalog.streams } @@ -154,9 +162,7 @@ def process(self, record: AirbyteRecordMessage) -> tuple[list[Chunk], str | None for chunk_document in self._split_document(doc) ] id_to_delete = ( - doc.metadata[METADATA_RECORD_ID_FIELD] - if METADATA_RECORD_ID_FIELD in doc.metadata - else None + doc.metadata.get(METADATA_RECORD_ID_FIELD, None) ) return chunks, id_to_delete diff --git a/airbyte_cdk/destinations/vector_db_based/embedder.py b/airbyte_cdk/destinations/vector_db_based/embedder.py index cba3e5f3..a4c670ec 100644 --- a/airbyte_cdk/destinations/vector_db_based/embedder.py +++ b/airbyte_cdk/destinations/vector_db_based/embedder.py @@ -6,27 +6,30 @@ import os from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import cast +from typing import TYPE_CHECKING, cast from langchain.embeddings.cohere import CohereEmbeddings from langchain.embeddings.fake import FakeEmbeddings from langchain.embeddings.localai import LocalAIEmbeddings from langchain.embeddings.openai import OpenAIEmbeddings -from airbyte_cdk.destinations.vector_db_based.config import ( - AzureOpenAIEmbeddingConfigModel, - CohereEmbeddingConfigModel, - FakeEmbeddingConfigModel, - FromFieldEmbeddingConfigModel, - OpenAICompatibleEmbeddingConfigModel, - OpenAIEmbeddingConfigModel, - ProcessingConfigModel, -) from airbyte_cdk.destinations.vector_db_based.utils import create_chunks, format_exception -from airbyte_cdk.models import AirbyteRecordMessage from airbyte_cdk.utils.traced_exception import AirbyteTracedException, FailureType +if TYPE_CHECKING: + from airbyte_cdk.destinations.vector_db_based.config import ( + AzureOpenAIEmbeddingConfigModel, + CohereEmbeddingConfigModel, + FakeEmbeddingConfigModel, + FromFieldEmbeddingConfigModel, + OpenAICompatibleEmbeddingConfigModel, + OpenAIEmbeddingConfigModel, + ProcessingConfigModel, + ) + from airbyte_cdk.models import AirbyteRecordMessage + + @dataclass class Document: page_content: str @@ -67,7 +70,11 @@ def embedding_dimensions(self) -> int: class BaseOpenAIEmbedder(Embedder): - def __init__(self, embeddings: OpenAIEmbeddings, chunk_size: int): + def __init__( + self, + embeddings: OpenAIEmbeddings, + chunk_size: int, + ) -> None: super().__init__() self.embeddings = embeddings self.chunk_size = chunk_size @@ -103,7 +110,11 @@ def embedding_dimensions(self) -> int: class OpenAIEmbedder(BaseOpenAIEmbedder): - def __init__(self, config: OpenAIEmbeddingConfigModel, chunk_size: int): + def __init__( + self, + config: OpenAIEmbeddingConfigModel, + chunk_size: int, + ) -> None: super().__init__( OpenAIEmbeddings( openai_api_key=config.openai_key, max_retries=15, disallowed_special=() @@ -113,7 +124,11 @@ def __init__(self, config: OpenAIEmbeddingConfigModel, chunk_size: int): class AzureOpenAIEmbedder(BaseOpenAIEmbedder): - def __init__(self, config: AzureOpenAIEmbeddingConfigModel, chunk_size: int): + def __init__( + self, + config: AzureOpenAIEmbeddingConfigModel, + chunk_size: int, + ) -> None: # Azure OpenAI API has — as of 20230927 — a limit of 16 documents per request super().__init__( OpenAIEmbeddings( @@ -134,7 +149,7 @@ def __init__(self, config: AzureOpenAIEmbeddingConfigModel, chunk_size: int): class CohereEmbedder(Embedder): - def __init__(self, config: CohereEmbeddingConfigModel): + def __init__(self, config: CohereEmbeddingConfigModel) -> None: super().__init__() # Client is set internally self.embeddings = CohereEmbeddings( @@ -161,7 +176,7 @@ def embedding_dimensions(self) -> int: class FakeEmbedder(Embedder): - def __init__(self, config: FakeEmbeddingConfigModel): + def __init__(self, config: FakeEmbeddingConfigModel) -> None: super().__init__() self.embeddings = FakeEmbeddings(size=OPEN_AI_VECTOR_SIZE) @@ -188,7 +203,7 @@ def embedding_dimensions(self) -> int: class OpenAICompatibleEmbedder(Embedder): - def __init__(self, config: OpenAICompatibleEmbeddingConfigModel): + def __init__(self, config: OpenAICompatibleEmbeddingConfigModel) -> None: super().__init__() self.config = config # Client is set internally @@ -228,7 +243,7 @@ def embedding_dimensions(self) -> int: class FromFieldEmbedder(Embedder): - def __init__(self, config: FromFieldEmbeddingConfigModel): + def __init__(self, config: FromFieldEmbeddingConfigModel) -> None: super().__init__() self.config = config @@ -249,7 +264,7 @@ def embed_documents(self, documents: list[Document]) -> list[list[float] | None] message=f"Record {str(data)[:250]}... in stream {document.record.stream} does not contain embedding vector field {self.config.field_name}. Please check your embedding configuration, the embedding vector field has to be set correctly on every record.", ) field = data[self.config.field_name] - if not isinstance(field, list) or not all(isinstance(x, (int, float)) for x in field): + if not isinstance(field, list) or not all(isinstance(x, int | float) for x in field): raise AirbyteTracedException( internal_message="Embedding vector field not a list of numbers", failure_type=FailureType.config_error, @@ -289,7 +304,7 @@ def create_from_config( | OpenAICompatibleEmbeddingConfigModel, processing_config: ProcessingConfigModel, ) -> Embedder: - if embedding_config.mode == "azure_openai" or embedding_config.mode == "openai": + if embedding_config.mode in {"azure_openai", "openai"}: return cast( Embedder, embedder_map[embedding_config.mode](embedding_config, processing_config.chunk_size), diff --git a/airbyte_cdk/destinations/vector_db_based/indexer.py b/airbyte_cdk/destinations/vector_db_based/indexer.py index 1ce58965..032144ca 100644 --- a/airbyte_cdk/destinations/vector_db_based/indexer.py +++ b/airbyte_cdk/destinations/vector_db_based/indexer.py @@ -5,11 +5,14 @@ import itertools from abc import ABC, abstractmethod -from collections.abc import Generator, Iterable -from typing import Any, TypeVar +from typing import TYPE_CHECKING, Any, TypeVar -from airbyte_cdk.destinations.vector_db_based.document_processor import Chunk -from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog + +if TYPE_CHECKING: + from collections.abc import Generator, Iterable + + from airbyte_cdk.destinations.vector_db_based.document_processor import Chunk + from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog class Indexer(ABC): @@ -19,7 +22,7 @@ class Indexer(ABC): In a destination connector, implement a custom indexer by extending this class and implementing the abstract methods. """ - def __init__(self, config: Any): + def __init__(self, config: Any) -> None: self.config = config pass diff --git a/airbyte_cdk/destinations/vector_db_based/test_utils.py b/airbyte_cdk/destinations/vector_db_based/test_utils.py index 80cc17d2..2654068f 100644 --- a/airbyte_cdk/destinations/vector_db_based/test_utils.py +++ b/airbyte_cdk/destinations/vector_db_based/test_utils.py @@ -59,5 +59,5 @@ def _record(self, stream: str, str_value: str, int_value: int) -> AirbyteMessage ) def setUp(self) -> None: - with open("secrets/config.json") as f: + with open("secrets/config.json", encoding="utf-8") as f: self.config = json.loads(f.read()) diff --git a/airbyte_cdk/destinations/vector_db_based/utils.py b/airbyte_cdk/destinations/vector_db_based/utils.py index 288b2df6..6930e2e4 100644 --- a/airbyte_cdk/destinations/vector_db_based/utils.py +++ b/airbyte_cdk/destinations/vector_db_based/utils.py @@ -5,12 +5,15 @@ import itertools import traceback -from collections.abc import Iterable, Iterator -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.models import AirbyteRecordMessage, AirbyteStream +if TYPE_CHECKING: + from collections.abc import Iterable, Iterator + + def format_exception(exception: Exception) -> str: return ( str(exception) diff --git a/airbyte_cdk/destinations/vector_db_based/writer.py b/airbyte_cdk/destinations/vector_db_based/writer.py index 8a9a0da1..6b6d1132 100644 --- a/airbyte_cdk/destinations/vector_db_based/writer.py +++ b/airbyte_cdk/destinations/vector_db_based/writer.py @@ -4,15 +4,20 @@ from __future__ import annotations from collections import defaultdict -from collections.abc import Iterable +from typing import TYPE_CHECKING -from airbyte_cdk.destinations.vector_db_based.config import ProcessingConfigModel from airbyte_cdk.destinations.vector_db_based.document_processor import Chunk, DocumentProcessor from airbyte_cdk.destinations.vector_db_based.embedder import Document, Embedder -from airbyte_cdk.destinations.vector_db_based.indexer import Indexer from airbyte_cdk.models import AirbyteMessage, ConfiguredAirbyteCatalog, Type +if TYPE_CHECKING: + from collections.abc import Iterable + + from airbyte_cdk.destinations.vector_db_based.config import ProcessingConfigModel + from airbyte_cdk.destinations.vector_db_based.indexer import Indexer + + class Writer: """The Writer class is orchestrating the document processor, the embedder and the indexer: * Incoming records are passed through the document processor to generate chunks diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index 163d8fce..a5441c73 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -12,16 +12,14 @@ import sys import tempfile from collections import defaultdict -from collections.abc import Iterable, Mapping from functools import wraps -from typing import Any +from typing import TYPE_CHECKING, Any from urllib.parse import urlparse import requests from orjson import orjson from requests import PreparedRequest, Response, Session -from airbyte_cdk.connector import TConfig from airbyte_cdk.exception_handler import init_uncaught_exception_handler from airbyte_cdk.logger import init_logger from airbyte_cdk.models import ( # type: ignore [attr-defined] @@ -45,6 +43,12 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + from airbyte_cdk.connector import TConfig + + logger = init_logger("airbyte") VALID_URL_SCHEMES = ["https"] @@ -52,7 +56,7 @@ class AirbyteEntrypoint: - def __init__(self, source: Source): + def __init__(self, source: Source) -> None: init_uncaught_exception_handler(logger) # Deployment mode is read when instantiating the entrypoint because it is the common path shared by syncs and connector builder test requests @@ -188,7 +192,7 @@ def check( # The platform uses the exit code to surface unexpected failures so we raise the exception if the failure type not a config error # If the failure is not exceptional, we'll emit a failed connection status message and return if traced_exc.failure_type != FailureType.config_error: - raise traced_exc + raise if connection_status: yield from self._emit_queued_messages(self.source) yield connection_status @@ -201,7 +205,7 @@ def check( # The platform uses the exit code to surface unexpected failures so we raise the exception if the failure type not a config error # If the failure is not exceptional, we'll emit a failed connection status message and return if traced_exc.failure_type != FailureType.config_error: - raise traced_exc + raise else: yield AirbyteMessage( type=Type.CONNECTION_STATUS, diff --git a/airbyte_cdk/exception_handler.py b/airbyte_cdk/exception_handler.py index 7525bb85..5c493816 100644 --- a/airbyte_cdk/exception_handler.py +++ b/airbyte_cdk/exception_handler.py @@ -3,16 +3,19 @@ # from __future__ import annotations -import logging import sys -from collections.abc import Mapping -from types import TracebackType -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets from airbyte_cdk.utils.traced_exception import AirbyteTracedException +if TYPE_CHECKING: + import logging + from collections.abc import Mapping + from types import TracebackType + + def assemble_uncaught_exception( exception_type: type[BaseException], exception_value: BaseException ) -> AirbyteTracedException: @@ -49,7 +52,7 @@ def hook_fn( def generate_failed_streams_error_message(stream_failures: Mapping[str, list[Exception]]) -> str: failures = "\n".join( [ - f"{stream}: {filter_secrets(exception.__repr__())}" + f"{stream}: {filter_secrets(repr(exception))}" for stream, exceptions in stream_failures.items() for exception in exceptions ] diff --git a/airbyte_cdk/logger.py b/airbyte_cdk/logger.py index 67c8588e..0687c6c2 100644 --- a/airbyte_cdk/logger.py +++ b/airbyte_cdk/logger.py @@ -6,8 +6,7 @@ import json import logging import logging.config -from collections.abc import Callable, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from orjson import orjson @@ -21,6 +20,10 @@ from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets +if TYPE_CHECKING: + from collections.abc import Callable, Mapping + + LOGGING_CONFIG = { "version": 1, "disable_existing_loggers": False, diff --git a/airbyte_cdk/models/airbyte_protocol.py b/airbyte_cdk/models/airbyte_protocol.py index ca390e5c..0b9ede19 100644 --- a/airbyte_cdk/models/airbyte_protocol.py +++ b/airbyte_cdk/models/airbyte_protocol.py @@ -3,15 +3,18 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Annotated, Any - -from serpyco_rs.metadata import Alias +from typing import TYPE_CHECKING, Annotated, Any from airbyte_protocol_dataclasses.models import * # noqa: F403 # Allow '*' -from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage + +if TYPE_CHECKING: + from collections.abc import Mapping + + from serpyco_rs.metadata import Alias + + from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage # ruff: noqa: F405 # ignore fuzzy import issues with 'import *' diff --git a/airbyte_cdk/models/airbyte_protocol_serializers.py b/airbyte_cdk/models/airbyte_protocol_serializers.py index 8ae8f2cc..5d6e48b5 100644 --- a/airbyte_cdk/models/airbyte_protocol_serializers.py +++ b/airbyte_cdk/models/airbyte_protocol_serializers.py @@ -19,7 +19,7 @@ class AirbyteStateBlobType(CustomType[AirbyteStateBlob, dict[str, Any]]): def serialize(self, value: AirbyteStateBlob) -> dict[str, Any]: # cant use orjson.dumps() directly because private attributes are excluded, e.g. "__ab_full_refresh_sync_complete" - return {k: v for k, v in value.__dict__.items()} + return dict(value.__dict__.items()) def deserialize(self, value: dict[str, Any]) -> AirbyteStateBlob: return AirbyteStateBlob(value) diff --git a/airbyte_cdk/sources/abstract_source.py b/airbyte_cdk/sources/abstract_source.py index e24a6967..9c6dc81d 100644 --- a/airbyte_cdk/sources/abstract_source.py +++ b/airbyte_cdk/sources/abstract_source.py @@ -3,10 +3,9 @@ # from __future__ import annotations -import logging from abc import ABC, abstractmethod -from collections.abc import Iterable, Iterator, Mapping, MutableMapping from typing import ( + TYPE_CHECKING, Any, ) @@ -27,8 +26,6 @@ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository from airbyte_cdk.sources.source import Source -from airbyte_cdk.sources.streams import Stream -from airbyte_cdk.sources.streams.core import StreamData from airbyte_cdk.sources.streams.http.http import HttpStream from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, split_config @@ -40,6 +37,14 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException +if TYPE_CHECKING: + import logging + from collections.abc import Iterable, Iterator, Mapping, MutableMapping + + from airbyte_cdk.sources.streams import Stream + from airbyte_cdk.sources.streams.core import StreamData + + _default_message_repository = InMemoryMessageRepository() diff --git a/airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py b/airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py index 3940a077..7594d481 100644 --- a/airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +++ b/airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py @@ -3,32 +3,37 @@ # from __future__ import annotations -import logging -from collections.abc import Iterable +from typing import TYPE_CHECKING from airbyte_cdk.exception_handler import generate_failed_streams_error_message from airbyte_cdk.models import AirbyteMessage, AirbyteStreamStatus, FailureType, StreamDescriptor from airbyte_cdk.models import Type as MessageType -from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import ( - PartitionGenerationCompletedSentinel, -) -from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException -from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager -from airbyte_cdk.sources.message import MessageRepository -from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream -from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer -from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader -from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition -from airbyte_cdk.sources.streams.concurrent.partitions.record import Record -from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message -from airbyte_cdk.sources.utils.slice_logger import SliceLogger from airbyte_cdk.utils import AirbyteTracedException from airbyte_cdk.utils.stream_status_utils import ( as_airbyte_message as stream_status_as_airbyte_message, ) +if TYPE_CHECKING: + import logging + from collections.abc import Iterable + + from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import ( + PartitionGenerationCompletedSentinel, + ) + from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException + from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager + from airbyte_cdk.sources.message import MessageRepository + from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream + from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer + from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader + from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition + from airbyte_cdk.sources.streams.concurrent.partitions.record import Record + from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel + from airbyte_cdk.sources.utils.slice_logger import SliceLogger + + class ConcurrentReadProcessor: def __init__( self, @@ -39,7 +44,7 @@ def __init__( slice_logger: SliceLogger, message_repository: MessageRepository, partition_reader: PartitionReader, - ): + ) -> None: """This class is responsible for handling items from a concurrent stream read process. :param stream_instances_to_read_from: List of streams to read from :param partition_enqueuer: PartitionEnqueuer instance @@ -208,10 +213,8 @@ def is_done(self) -> bool: 3. All partitions for all streams are closed """ is_done = all( - [ - self._is_stream_done(stream_name) - for stream_name in self._stream_name_to_instance.keys() - ] + self._is_stream_done(stream_name) + for stream_name in self._stream_name_to_instance ) if is_done and self._exceptions_per_stream_name: error_message = generate_failed_streams_error_message(self._exceptions_per_stream_name) diff --git a/airbyte_cdk/sources/concurrent_source/concurrent_source.py b/airbyte_cdk/sources/concurrent_source/concurrent_source.py index 45a206d2..e1879e22 100644 --- a/airbyte_cdk/sources/concurrent_source/concurrent_source.py +++ b/airbyte_cdk/sources/concurrent_source/concurrent_source.py @@ -4,11 +4,9 @@ from __future__ import annotations import concurrent -import logging -from collections.abc import Iterable, Iterator from queue import Queue +from typing import TYPE_CHECKING -from airbyte_cdk.models import AirbyteMessage from airbyte_cdk.sources.concurrent_source.concurrent_read_processor import ConcurrentReadProcessor from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import ( PartitionGenerationCompletedSentinel, @@ -16,7 +14,6 @@ from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository -from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition @@ -28,6 +25,14 @@ from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger +if TYPE_CHECKING: + import logging + from collections.abc import Iterable, Iterator + + from airbyte_cdk.models import AirbyteMessage + from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream + + class ConcurrentSource: """A Source that reads data from multiple AbstractStreams concurrently. It does so by submitting partition generation, and partition read tasks to a thread pool. diff --git a/airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py b/airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py index fd184423..f51f0be4 100644 --- a/airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +++ b/airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py @@ -3,18 +3,12 @@ # from __future__ import annotations -import logging from abc import ABC -from collections.abc import Callable, Iterator, Mapping, MutableMapping from datetime import timedelta -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.models import AirbyteMessage, AirbyteStateMessage, ConfiguredAirbyteCatalog from airbyte_cdk.sources import AbstractSource -from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource -from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager -from airbyte_cdk.sources.streams import Stream -from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade from airbyte_cdk.sources.streams.concurrent.cursor import ( @@ -25,9 +19,19 @@ FinalStateCursor, GapType, ) -from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import ( - AbstractStreamStateConverter, -) + + +if TYPE_CHECKING: + import logging + from collections.abc import Callable, Iterator, Mapping, MutableMapping + + from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource + from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager + from airbyte_cdk.sources.streams import Stream + from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream + from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import ( + AbstractStreamStateConverter, + ) DEFAULT_LOOKBACK_SECONDS = 0 diff --git a/airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py b/airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py index 98265dcf..26fd0509 100644 --- a/airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py +++ b/airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py @@ -3,15 +3,19 @@ # from __future__ import annotations -from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream +from typing import TYPE_CHECKING -class PartitionGenerationCompletedSentinel: +if TYPE_CHECKING: + from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream + + +class PartitionGenerationCompletedSentinel: # noqa: PLW1641 # TODO: Should implement __hash__() """A sentinel object indicating all partitions for a stream were produced. Includes a pointer to the stream that was processed. """ - def __init__(self, stream: AbstractStream): + def __init__(self, stream: AbstractStream) -> None: """:param stream: The stream that was processed""" self.stream = stream diff --git a/airbyte_cdk/sources/concurrent_source/stream_thread_exception.py b/airbyte_cdk/sources/concurrent_source/stream_thread_exception.py index 5667a1e8..5e84738e 100644 --- a/airbyte_cdk/sources/concurrent_source/stream_thread_exception.py +++ b/airbyte_cdk/sources/concurrent_source/stream_thread_exception.py @@ -2,8 +2,9 @@ from __future__ import annotations -class StreamThreadException(Exception): - def __init__(self, exception: Exception, stream_name: str): +# TODO: (N818) Should implement __hash__() +class StreamThreadException(Exception): # noqa: PLW1641 # Should implement __hash__ + def __init__(self, exception: Exception, stream_name: str) -> None: self._exception = exception self._stream_name = stream_name diff --git a/airbyte_cdk/sources/concurrent_source/thread_pool_manager.py b/airbyte_cdk/sources/concurrent_source/thread_pool_manager.py index 1c59325d..23a48731 100644 --- a/airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +++ b/airbyte_cdk/sources/concurrent_source/thread_pool_manager.py @@ -3,11 +3,14 @@ # from __future__ import annotations -import logging import threading -from collections.abc import Callable -from concurrent.futures import Future, ThreadPoolExecutor -from typing import Any +from typing import TYPE_CHECKING, Any + + +if TYPE_CHECKING: + import logging + from collections.abc import Callable + from concurrent.futures import Future, ThreadPoolExecutor class ThreadPoolManager: @@ -20,7 +23,7 @@ def __init__( threadpool: ThreadPoolExecutor, logger: logging.Logger, max_concurrent_tasks: int = DEFAULT_MAX_QUEUE_SIZE, - ): + ) -> None: """:param threadpool: The threadpool to use :param logger: The logger to use :param max_concurrent_tasks: The maximum number of tasks that can be pending at the same time @@ -78,7 +81,7 @@ def _shutdown(self) -> None: self._threadpool.shutdown(wait=False, cancel_futures=True) def is_done(self) -> bool: - return all([f.done() for f in self._futures]) + return all(f.done() for f in self._futures) def check_for_errors_and_shutdown(self) -> None: """Check if any of the futures have an exception, and raise it if so. If all futures are done, shutdown the threadpool. diff --git a/airbyte_cdk/sources/connector_state_manager.py b/airbyte_cdk/sources/connector_state_manager.py index c700f8bb..43811832 100644 --- a/airbyte_cdk/sources/connector_state_manager.py +++ b/airbyte_cdk/sources/connector_state_manager.py @@ -4,9 +4,8 @@ from __future__ import annotations import copy -from collections.abc import Mapping, MutableMapping from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.models import ( AirbyteMessage, @@ -19,6 +18,10 @@ from airbyte_cdk.models import Type as MessageType +if TYPE_CHECKING: + from collections.abc import Mapping, MutableMapping + + @dataclass(frozen=True) class HashableStreamDescriptor: """Helper class that overrides the existing StreamDescriptor class that is auto generated from the Airbyte Protocol and @@ -34,7 +37,7 @@ class ConnectorStateManager: interface. It also provides methods to extract and update state """ - def __init__(self, state: list[AirbyteStateMessage] | None = None): + def __init__(self, state: list[AirbyteStateMessage] | None = None) -> None: shared_state, per_stream_states = self._extract_from_state_message(state) # We explicitly throw an error if we receive a GLOBAL state message that contains a shared_state because API sources are @@ -59,7 +62,7 @@ def get_stream_state(self, stream_name: str, namespace: str | None) -> MutableMa HashableStreamDescriptor(name=stream_name, namespace=namespace) ) if stream_state: - return copy.deepcopy({k: v for k, v in stream_state.__dict__.items()}) + return copy.deepcopy(dict(stream_state.__dict__.items())) return {} def update_state_for_stream( diff --git a/airbyte_cdk/sources/declarative/async_job/job.py b/airbyte_cdk/sources/declarative/async_job/job.py index 3c561bbb..3dfde0ba 100644 --- a/airbyte_cdk/sources/declarative/async_job/job.py +++ b/airbyte_cdk/sources/declarative/async_job/job.py @@ -2,10 +2,14 @@ from __future__ import annotations from datetime import timedelta +from typing import TYPE_CHECKING from .status import AsyncJobStatus from airbyte_cdk.sources.declarative.async_job.timer import Timer -from airbyte_cdk.sources.types import StreamSlice + + +if TYPE_CHECKING: + from airbyte_cdk.sources.types import StreamSlice class AsyncJob: diff --git a/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py b/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py index e485e5a1..e0c29bc2 100644 --- a/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +++ b/airbyte_cdk/sources/declarative/async_job/job_orchestrator.py @@ -6,15 +6,14 @@ import time import traceback import uuid -from collections.abc import Generator, Iterable, Mapping from datetime import timedelta from typing import ( + TYPE_CHECKING, Any, Generic, TypeVar, ) -from airbyte_cdk import StreamSlice from airbyte_cdk.logger import lazy_log from airbyte_cdk.models import FailureType from airbyte_cdk.sources.declarative.async_job.job import AsyncJob @@ -22,13 +21,19 @@ ConcurrentJobLimitReached, JobTracker, ) -from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus -from airbyte_cdk.sources.message import MessageRepository from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets from airbyte_cdk.utils.traced_exception import AirbyteTracedException +if TYPE_CHECKING: + from collections.abc import Generator, Iterable, Mapping + + from airbyte_cdk import StreamSlice + from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository + from airbyte_cdk.sources.message import MessageRepository + + LOGGER = logging.getLogger("airbyte") _NO_TIMEOUT = timedelta.max _API_SIDE_RUNNING_STATUS = {AsyncJobStatus.RUNNING, AsyncJobStatus.TIMED_OUT} @@ -45,10 +50,8 @@ def __init__(self, jobs: list[AsyncJob], stream_slice: StreamSlice) -> None: def has_reached_max_attempt(self) -> bool: return any( - map( - lambda attempt_count: attempt_count >= self._MAX_NUMBER_OF_ATTEMPTS, - self._attempts_per_job.values(), - ) + attempt_count >= self._MAX_NUMBER_OF_ATTEMPTS + for attempt_count in self._attempts_per_job.values() ) def replace_job(self, job_to_replace: AsyncJob, new_jobs: list[AsyncJob]) -> None: @@ -77,7 +80,7 @@ def stream_slice(self) -> StreamSlice: @property def status(self) -> AsyncJobStatus: """Given different job statuses, the priority is: FAILED, TIMED_OUT, RUNNING. Else, it means everything is completed.""" - statuses = set(map(lambda job: job.status(), self.jobs)) + statuses = {job.status() for job in self.jobs} if statuses == {AsyncJobStatus.COMPLETED}: return AsyncJobStatus.COMPLETED if AsyncJobStatus.FAILED in statuses: @@ -121,7 +124,7 @@ def has_next(self) -> bool: return True def add_at_the_beginning(self, item: T) -> None: - self._buffer = [item] + self._buffer + self._buffer = [item, *self._buffer] class AsyncJobOrchestrator: @@ -140,7 +143,8 @@ def __init__( slices: Iterable[StreamSlice], job_tracker: JobTracker, message_repository: MessageRepository, - exceptions_to_break_on: Iterable[type[Exception]] = tuple(), + exceptions_to_break_on: Iterable[type[Exception]] = (), + *, has_bulk_parent: bool = False, ) -> None: """If the stream slices provided as a parameters relies on a async job streams that relies on the same JobTracker, `has_bulk_parent` @@ -226,7 +230,7 @@ def _start_job(self, _slice: StreamSlice, previous_job_id: str | None = None) -> LOGGER.warning(f"Exception has occurred during job creation: {exception}") if self._is_breaking_exception(exception): self._job_tracker.remove_job(id_to_replace) - raise exception + raise return self._keep_api_budget_with_failed_job(_slice, exception, id_to_replace) def _keep_api_budget_with_failed_job( @@ -304,7 +308,7 @@ def _process_completed_partition(self, partition: AsyncPartition) -> None: Args: partition (AsyncPartition): The completed partition to process. """ - job_ids = list(map(lambda job: job.api_job_id(), {job for job in partition.jobs})) + job_ids = [job.api_job_id() for job in set(partition.jobs)] LOGGER.info( f"The following jobs for stream slice {partition.stream_slice} have been completed: {job_ids}." ) @@ -421,7 +425,7 @@ def create_and_get_completed_partitions(self) -> Iterable[AsyncPartition]: f"Caught exception that stops the processing of the jobs: {exception}" ) self._abort_all_running_jobs() - raise exception + raise self._non_breaking_exceptions.append(exception) @@ -434,10 +438,7 @@ def create_and_get_completed_partitions(self) -> Iterable[AsyncPartition]: raise AirbyteTracedException( message="", internal_message="\n".join( - [ - filter_secrets(exception.__repr__()) - for exception in self._non_breaking_exceptions - ] + [filter_secrets(repr(exception)) for exception in self._non_breaking_exceptions] ), failure_type=FailureType.config_error, ) diff --git a/airbyte_cdk/sources/declarative/async_job/job_tracker.py b/airbyte_cdk/sources/declarative/async_job/job_tracker.py index aff54038..2b4063d4 100644 --- a/airbyte_cdk/sources/declarative/async_job/job_tracker.py +++ b/airbyte_cdk/sources/declarative/async_job/job_tracker.py @@ -16,7 +16,7 @@ class ConcurrentJobLimitReached(Exception): class JobTracker: - def __init__(self, limit: int): + def __init__(self, limit: int) -> None: self._jobs: set[str] = set() self._limit = limit self._lock = threading.Lock() diff --git a/airbyte_cdk/sources/declarative/async_job/repository.py b/airbyte_cdk/sources/declarative/async_job/repository.py index ebd1a92d..1ff9d7b4 100644 --- a/airbyte_cdk/sources/declarative/async_job/repository.py +++ b/airbyte_cdk/sources/declarative/async_job/repository.py @@ -2,11 +2,14 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Iterable, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.declarative.async_job.job import AsyncJob -from airbyte_cdk.sources.types import StreamSlice + +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + from airbyte_cdk.sources.declarative.async_job.job import AsyncJob + from airbyte_cdk.sources.types import StreamSlice class AsyncJobRepository: diff --git a/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py b/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py index e1867412..c9dfefd0 100644 --- a/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +++ b/airbyte_cdk/sources/declarative/auth/declarative_authenticator.py @@ -3,15 +3,18 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_token import ( AbstractHeaderAuthenticator, ) +if TYPE_CHECKING: + from collections.abc import Mapping + + @dataclass class DeclarativeAuthenticator(AbstractHeaderAuthenticator): """Interface used to associate which authenticators can be used as part of the declarative framework""" diff --git a/airbyte_cdk/sources/declarative/auth/jwt.py b/airbyte_cdk/sources/declarative/auth/jwt.py index 669dfa63..162b393f 100644 --- a/airbyte_cdk/sources/declarative/auth/jwt.py +++ b/airbyte_cdk/sources/declarative/auth/jwt.py @@ -4,10 +4,9 @@ from __future__ import annotations import base64 -from collections.abc import Mapping from dataclasses import InitVar, dataclass from datetime import datetime -from typing import Any +from typing import TYPE_CHECKING, Any import jwt @@ -17,6 +16,10 @@ from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString +if TYPE_CHECKING: + from collections.abc import Mapping + + class JwtAlgorithm(str): """Enum for supported JWT algorithms""" diff --git a/airbyte_cdk/sources/declarative/auth/oauth.py b/airbyte_cdk/sources/declarative/auth/oauth.py index 90210165..75b51fc9 100644 --- a/airbyte_cdk/sources/declarative/auth/oauth.py +++ b/airbyte_cdk/sources/declarative/auth/oauth.py @@ -3,9 +3,8 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any +from typing import TYPE_CHECKING, Any import pendulum @@ -21,6 +20,10 @@ ) +if TYPE_CHECKING: + from collections.abc import Mapping + + @dataclass class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAuthenticator): """Generates OAuth2.0 access tokens from an OAuth2.0 refresh token and client credentials based on diff --git a/airbyte_cdk/sources/declarative/auth/selective_authenticator.py b/airbyte_cdk/sources/declarative/auth/selective_authenticator.py index 0769c33e..98815667 100644 --- a/airbyte_cdk/sources/declarative/auth/selective_authenticator.py +++ b/airbyte_cdk/sources/declarative/auth/selective_authenticator.py @@ -3,15 +3,18 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any import dpath from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator +if TYPE_CHECKING: + from collections.abc import Mapping + + @dataclass class SelectiveAuthenticator(DeclarativeAuthenticator): """Authenticator that selects concrete implementation based on specific config value.""" diff --git a/airbyte_cdk/sources/declarative/auth/token.py b/airbyte_cdk/sources/declarative/auth/token.py index ae6b091d..1d433638 100644 --- a/airbyte_cdk/sources/declarative/auth/token.py +++ b/airbyte_cdk/sources/declarative/auth/token.py @@ -5,21 +5,25 @@ import base64 import logging -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any import requests from cachetools import TTLCache, cached from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator -from airbyte_cdk.sources.declarative.auth.token_provider import TokenProvider from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.requesters.request_option import ( RequestOption, RequestOptionType, ) -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.declarative.auth.token_provider import TokenProvider + from airbyte_cdk.sources.types import Config @dataclass @@ -232,9 +236,8 @@ def auth_header(self) -> str: @property def token(self) -> str: - if self._session_token.eval(self.config): - if self.is_valid_session_token(): - return str(self._session_token.eval(self.config)) + if self._session_token.eval(self.config) and self.is_valid_session_token(): + return str(self._session_token.eval(self.config)) if self._password.eval(self.config) and self._username.eval(self.config): username = self._username.eval(self.config) password = self._password.eval(self.config) diff --git a/airbyte_cdk/sources/declarative/auth/token_provider.py b/airbyte_cdk/sources/declarative/auth/token_provider.py index facb3d63..ca18ba8a 100644 --- a/airbyte_cdk/sources/declarative/auth/token_provider.py +++ b/airbyte_cdk/sources/declarative/auth/token_provider.py @@ -3,25 +3,30 @@ # from __future__ import annotations -import datetime from abc import abstractmethod -from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any +from typing import TYPE_CHECKING, Any import dpath import pendulum -from isodate import Duration from pendulum import DateTime -from airbyte_cdk.sources.declarative.decoders.decoder import Decoder from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder from airbyte_cdk.sources.declarative.exceptions import ReadException from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString -from airbyte_cdk.sources.declarative.requesters.requester import Requester from airbyte_cdk.sources.http_logger import format_http_message from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + import datetime + from collections.abc import Mapping + + from isodate import Duration + + from airbyte_cdk.sources.declarative.decoders.decoder import Decoder + from airbyte_cdk.sources.declarative.requesters.requester import Requester + from airbyte_cdk.sources.types import Config class TokenProvider: diff --git a/airbyte_cdk/sources/declarative/checks/check_stream.py b/airbyte_cdk/sources/declarative/checks/check_stream.py index efe8e9d7..70da6475 100644 --- a/airbyte_cdk/sources/declarative/checks/check_stream.py +++ b/airbyte_cdk/sources/declarative/checks/check_stream.py @@ -3,17 +3,21 @@ # from __future__ import annotations -import logging import traceback -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk import AbstractSource from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker from airbyte_cdk.sources.streams.http.availability_strategy import HttpAvailabilityStrategy +if TYPE_CHECKING: + import logging + from collections.abc import Mapping + + from airbyte_cdk import AbstractSource + + @dataclass class CheckStream(ConnectionChecker): """Checks the connections by checking availability of one or many streams selected by the developer @@ -51,7 +55,8 @@ def check_connection( return False, reason except Exception as error: logger.error( - f"Encountered an error trying to connect to stream {stream_name}. Error: \n {traceback.format_exc()}" + f"Encountered an error trying to connect to stream {stream_name}. " + f"Error: \n {traceback.format_exc()}" ) return False, f"Unable to connect to stream {stream_name} - {error}" return True, None diff --git a/airbyte_cdk/sources/declarative/checks/connection_checker.py b/airbyte_cdk/sources/declarative/checks/connection_checker.py index 23edafac..bc35f845 100644 --- a/airbyte_cdk/sources/declarative/checks/connection_checker.py +++ b/airbyte_cdk/sources/declarative/checks/connection_checker.py @@ -3,12 +3,15 @@ # from __future__ import annotations -import logging from abc import ABC, abstractmethod -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk import AbstractSource + +if TYPE_CHECKING: + import logging + from collections.abc import Mapping + + from airbyte_cdk import AbstractSource class ConnectionChecker(ABC): diff --git a/airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py b/airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py index 504b6435..0eb8da9d 100644 --- a/airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +++ b/airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py @@ -3,12 +3,16 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation import InterpolatedString -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config @dataclass diff --git a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py index ff02ed14..c8d5aca8 100644 --- a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py +++ b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py @@ -3,9 +3,7 @@ # from __future__ import annotations -import logging -from collections.abc import Iterator, Mapping -from typing import Any, Generic +from typing import TYPE_CHECKING, Any, Generic from airbyte_cdk.models import ( AirbyteCatalog, @@ -27,16 +25,10 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( DatetimeBasedCursor as DatetimeBasedCursorModel, ) -from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ( - ModelToComponentFactory, -) from airbyte_cdk.sources.declarative.requesters import HttpRequester from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever from airbyte_cdk.sources.declarative.transformations.add_fields import AddFields -from airbyte_cdk.sources.declarative.types import ConnectionDefinition from airbyte_cdk.sources.source import TState -from airbyte_cdk.sources.streams import Stream -from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream from airbyte_cdk.sources.streams.concurrent.adapters import CursorPartitionGenerator from airbyte_cdk.sources.streams.concurrent.availability_strategy import ( AlwaysAvailableAvailabilityStrategy, @@ -45,6 +37,18 @@ from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream +if TYPE_CHECKING: + import logging + from collections.abc import Iterator, Mapping + + from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ( + ModelToComponentFactory, + ) + from airbyte_cdk.sources.declarative.types import ConnectionDefinition + from airbyte_cdk.sources.streams import Stream + from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream + + class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): # By default, we defer to a value of 1 which represents running a connector using the Concurrent CDK engine on only one thread. SINGLE_THREADED_CONCURRENCY_LEVEL = 1 @@ -122,9 +126,9 @@ def read( # ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of the concurrent # streams must be saved so that they can be removed from the catalog before starting synchronous streams if self._concurrent_streams: - concurrent_stream_names = set( - [concurrent_stream.name for concurrent_stream in self._concurrent_streams] - ) + concurrent_stream_names = { + concurrent_stream.name for concurrent_stream in self._concurrent_streams + } selected_concurrent_streams = self._select_streams( streams=self._concurrent_streams, configured_catalog=catalog diff --git a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py index 95029301..daa2b958 100644 --- a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +++ b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py @@ -3,15 +3,18 @@ # from __future__ import annotations -import datetime as dt -from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString +if TYPE_CHECKING: + import datetime as dt + from collections.abc import Mapping + + @dataclass class MinMaxDatetime: """Compares the provided date against optional minimum or maximum times. If date is earlier than @@ -102,9 +105,7 @@ def create( ) -> MinMaxDatetime: if parameters is None: parameters = {} - if isinstance(interpolated_string_or_min_max_datetime, InterpolatedString) or isinstance( - interpolated_string_or_min_max_datetime, str - ): + if isinstance(interpolated_string_or_min_max_datetime, InterpolatedString | str): return MinMaxDatetime( datetime=interpolated_string_or_min_max_datetime, parameters=parameters ) diff --git a/airbyte_cdk/sources/declarative/declarative_source.py b/airbyte_cdk/sources/declarative/declarative_source.py index 27eadf13..7047f4d0 100644 --- a/airbyte_cdk/sources/declarative/declarative_source.py +++ b/airbyte_cdk/sources/declarative/declarative_source.py @@ -3,13 +3,17 @@ # from __future__ import annotations -import logging from abc import abstractmethod -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.abstract_source import AbstractSource -from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker + + +if TYPE_CHECKING: + import logging + from collections.abc import Mapping + + from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker class DeclarativeSource(AbstractSource): diff --git a/airbyte_cdk/sources/declarative/declarative_stream.py b/airbyte_cdk/sources/declarative/declarative_stream.py index bb3ccad1..a61c0c03 100644 --- a/airbyte_cdk/sources/declarative/declarative_stream.py +++ b/airbyte_cdk/sources/declarative/declarative_stream.py @@ -3,23 +3,17 @@ # from __future__ import annotations -import logging -from collections.abc import Iterable, Mapping, MutableMapping from dataclasses import InitVar, dataclass, field -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.models import SyncMode from airbyte_cdk.sources.declarative.incremental import ( GlobalSubstreamCursor, PerPartitionCursor, PerPartitionWithGlobalCursor, ) from airbyte_cdk.sources.declarative.interpolation import InterpolatedString -from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever -from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever from airbyte_cdk.sources.declarative.schema import DefaultSchemaLoader -from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader from airbyte_cdk.sources.streams.checkpoint import ( CheckpointMode, CheckpointReader, @@ -30,6 +24,16 @@ from airbyte_cdk.sources.types import Config, StreamSlice +if TYPE_CHECKING: + import logging + from collections.abc import Iterable, Mapping, MutableMapping + + from airbyte_cdk.models import SyncMode + from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration + from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever + from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader + + @dataclass class DeclarativeStream(Stream): """DeclarativeStream is a Stream that delegates most of its logic to its schema_load and retriever @@ -211,7 +215,7 @@ def _get_checkpoint_reader( checkpoint_mode = self._checkpoint_mode if isinstance( - cursor, (GlobalSubstreamCursor, PerPartitionCursor, PerPartitionWithGlobalCursor) + cursor, GlobalSubstreamCursor | PerPartitionCursor | PerPartitionWithGlobalCursor ): self.has_multiple_slices = True return CursorBasedCheckpointReader( diff --git a/airbyte_cdk/sources/declarative/decoders/decoder.py b/airbyte_cdk/sources/declarative/decoders/decoder.py index e0930b7e..64df736b 100644 --- a/airbyte_cdk/sources/declarative/decoders/decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/decoder.py @@ -4,11 +4,14 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Generator, MutableMapping from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any -import requests + +if TYPE_CHECKING: + from collections.abc import Generator, MutableMapping + + import requests @dataclass diff --git a/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte_cdk/sources/declarative/decoders/json_decoder.py index 917cdb15..5627097c 100644 --- a/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -4,9 +4,8 @@ from __future__ import annotations import logging -from collections.abc import Generator, Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any import requests from orjson import orjson @@ -14,6 +13,10 @@ from airbyte_cdk.sources.declarative.decoders.decoder import Decoder +if TYPE_CHECKING: + from collections.abc import Generator, Mapping + + logger = logging.getLogger("airbyte") diff --git a/airbyte_cdk/sources/declarative/decoders/noop_decoder.py b/airbyte_cdk/sources/declarative/decoders/noop_decoder.py index ea7e8d32..197f168e 100644 --- a/airbyte_cdk/sources/declarative/decoders/noop_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/noop_decoder.py @@ -2,14 +2,17 @@ from __future__ import annotations import logging -from collections.abc import Generator, Mapping -from typing import Any - -import requests +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.decoders.decoder import Decoder +if TYPE_CHECKING: + from collections.abc import Generator, Mapping + + import requests + + logger = logging.getLogger("airbyte") diff --git a/airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py b/airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py index 13180c7e..8fa641c8 100644 --- a/airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +++ b/airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py @@ -4,15 +4,18 @@ from __future__ import annotations import logging -from collections.abc import Generator, MutableMapping from dataclasses import dataclass -from typing import Any - -import requests +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.decoders import Decoder +if TYPE_CHECKING: + from collections.abc import Generator, MutableMapping + + import requests + + logger = logging.getLogger("airbyte") @@ -20,7 +23,7 @@ class PaginationDecoderDecorator(Decoder): """Decoder to wrap other decoders when instantiating a DefaultPaginator in order to bypass decoding if the response is streamed.""" - def __init__(self, decoder: Decoder): + def __init__(self, decoder: Decoder) -> None: self._decoder = decoder @property diff --git a/airbyte_cdk/sources/declarative/decoders/xml_decoder.py b/airbyte_cdk/sources/declarative/decoders/xml_decoder.py index b6cc1253..43d0f6a2 100644 --- a/airbyte_cdk/sources/declarative/decoders/xml_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/xml_decoder.py @@ -4,17 +4,21 @@ from __future__ import annotations import logging -from collections.abc import Generator, Mapping, MutableMapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from xml.parsers.expat import ExpatError -import requests import xmltodict from airbyte_cdk.sources.declarative.decoders.decoder import Decoder +if TYPE_CHECKING: + from collections.abc import Generator, Mapping, MutableMapping + + import requests + + logger = logging.getLogger("airbyte") diff --git a/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py b/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py index be45d56b..555cf6a5 100644 --- a/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +++ b/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py @@ -3,17 +3,22 @@ # from __future__ import annotations -from collections.abc import Iterable, Mapping, MutableMapping from dataclasses import InitVar, dataclass, field -from typing import Any +from typing import TYPE_CHECKING, Any import dpath -import requests from airbyte_cdk.sources.declarative.decoders import Decoder, JsonDecoder from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping, MutableMapping + + import requests + + from airbyte_cdk.sources.types import Config @dataclass diff --git a/airbyte_cdk/sources/declarative/extractors/http_selector.py b/airbyte_cdk/sources/declarative/extractors/http_selector.py index a165f033..414eab88 100644 --- a/airbyte_cdk/sources/declarative/extractors/http_selector.py +++ b/airbyte_cdk/sources/declarative/extractors/http_selector.py @@ -4,12 +4,15 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Iterable, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any -import requests -from airbyte_cdk.sources.types import Record, StreamSlice, StreamState +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + import requests + + from airbyte_cdk.sources.types import Record, StreamSlice, StreamState class HttpSelector: diff --git a/airbyte_cdk/sources/declarative/extractors/record_extractor.py b/airbyte_cdk/sources/declarative/extractors/record_extractor.py index 25c9ca4c..95974b6d 100644 --- a/airbyte_cdk/sources/declarative/extractors/record_extractor.py +++ b/airbyte_cdk/sources/declarative/extractors/record_extractor.py @@ -4,11 +4,14 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Iterable, Mapping from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any -import requests + +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + import requests @dataclass diff --git a/airbyte_cdk/sources/declarative/extractors/record_filter.py b/airbyte_cdk/sources/declarative/extractors/record_filter.py index 450ada8c..cc0601a1 100644 --- a/airbyte_cdk/sources/declarative/extractors/record_filter.py +++ b/airbyte_cdk/sources/declarative/extractors/record_filter.py @@ -3,20 +3,24 @@ # from __future__ import annotations -import datetime -from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.declarative.incremental import ( - DatetimeBasedCursor, - GlobalSubstreamCursor, - PerPartitionWithGlobalCursor, -) from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean from airbyte_cdk.sources.types import Config, StreamSlice, StreamState +if TYPE_CHECKING: + import datetime + from collections.abc import Iterable, Mapping + + from airbyte_cdk.sources.declarative.incremental import ( + DatetimeBasedCursor, + GlobalSubstreamCursor, + PerPartitionWithGlobalCursor, + ) + + @dataclass class RecordFilter: """Filter applied on a list of Records @@ -64,7 +68,7 @@ def __init__( date_time_based_cursor: DatetimeBasedCursor, substream_cursor: PerPartitionWithGlobalCursor | GlobalSubstreamCursor | None, **kwargs: Any, - ): + ) -> None: super().__init__(**kwargs) self._date_time_based_cursor = date_time_based_cursor self._substream_cursor = substream_cursor diff --git a/airbyte_cdk/sources/declarative/extractors/record_selector.py b/airbyte_cdk/sources/declarative/extractors/record_selector.py index 9b88b6e9..e10a2567 100644 --- a/airbyte_cdk/sources/declarative/extractors/record_selector.py +++ b/airbyte_cdk/sources/declarative/extractors/record_selector.py @@ -3,21 +3,25 @@ # from __future__ import annotations -from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass, field -from typing import Any - -import requests +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector -from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor -from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter from airbyte_cdk.sources.declarative.models import SchemaNormalization -from airbyte_cdk.sources.declarative.transformations import RecordTransformation from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + import requests + + from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor + from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter + from airbyte_cdk.sources.declarative.transformations import RecordTransformation + + SCHEMA_TRANSFORMER_TYPE_MAPPING = { SchemaNormalization.None_: TransformConfig.NoTransform, SchemaNormalization.Default: TransformConfig.DefaultSchemaNormalization, diff --git a/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py b/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py index fb12a53d..7acbd1e7 100644 --- a/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +++ b/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py @@ -7,9 +7,8 @@ import os import uuid import zlib -from collections.abc import Iterable, Mapping from contextlib import closing -from typing import Any +from typing import TYPE_CHECKING, Any import pandas as pd import requests @@ -18,6 +17,10 @@ from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + EMPTY_STR: str = "" DEFAULT_ENCODING: str = "utf-8" DOWNLOAD_CHUNK_SIZE: int = 1024 * 10 @@ -135,13 +138,15 @@ def _read_with_chunks( ) for chunk in chunks: chunk = chunk.replace({nan: None}).to_dict(orient="records") - for row in chunk: - yield row + yield from chunk # Yield rows from chunks except pd.errors.EmptyDataError as e: self.logger.info(f"Empty data received. {e}") yield from [] except OSError as ioe: - raise ValueError(f"The IO/Error occured while reading tmp data. Called: {path}", ioe) + raise ValueError( + f"The IO/Error occurred while reading tmp data. Called: {path}", + ioe, + ) from None finally: # remove binary tmp file, after data is read os.remove(path) diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index f7e69095..c3b38eec 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -4,10 +4,9 @@ from __future__ import annotations import datetime -from collections.abc import Callable, Iterable, Mapping, MutableMapping from dataclasses import InitVar, dataclass, field from datetime import timedelta -from typing import Any +from typing import TYPE_CHECKING, Any from isodate import Duration, duration_isoformat, parse_duration @@ -21,10 +20,15 @@ RequestOption, RequestOptionType, ) -from airbyte_cdk.sources.message import MessageRepository from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Mapping, MutableMapping + + from airbyte_cdk.sources.message import MessageRepository + + @dataclass class DatetimeBasedCursor(DeclarativeCursor): """Slices the stream over a datetime range and create a state with format {: } @@ -168,16 +172,19 @@ def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: raise ValueError( f"Stream slice {stream_slice} should not have a partition. Got {stream_slice.partition}." ) - cursor_value_str_by_cursor_value_datetime = dict( - map( - # we need to ensure the cursor value is preserved as is in the state else the CATs might complain of something like - # 2023-01-04T17:30:19.000Z' <= '2023-01-04T17:30:19.000000Z' - lambda datetime_str: (self.parse_date(datetime_str), datetime_str), # type: ignore # because of the filter on the next line, this will only be called with a str - filter( - lambda item: item, [self._cursor, self._highest_observed_cursor_field_value] - ), + # we need to ensure the cursor value is preserved as is in the state else the CATs might + # complain of something like + # 2023-01-04T17:30:19.000Z' <= '2023-01-04T17:30:19.000000Z' + cursor_value_str_by_cursor_value_datetime = { + self.parse_date(datetime_str): datetime_str + for datetime_str in filter( + lambda item: item, + [ + self._cursor, + self._highest_observed_cursor_field_value, + ], ) - ) + } self._cursor = ( cursor_value_str_by_cursor_value_datetime[ max(cursor_value_str_by_cursor_value_datetime.keys()) @@ -297,7 +304,7 @@ def _get_date( return comparator(cursor_date, default_date) def parse_date(self, date: str) -> datetime.datetime: - for datetime_format in self.cursor_datetime_formats + [self.datetime_format]: + for datetime_format in [*self.cursor_datetime_formats, self.datetime_format]: try: return self._parser.parse(date, datetime_format) except ValueError: @@ -421,9 +428,7 @@ def is_greater_than_or_equal(self, first: Record, second: Record) -> bool: second_cursor_value = second.get(cursor_field) if first_cursor_value and second_cursor_value: return self.parse_date(first_cursor_value) >= self.parse_date(second_cursor_value) - if first_cursor_value: - return True - return False + return bool(first_cursor_value) def set_runtime_lookback_window(self, lookback_window_in_seconds: int) -> None: """Updates the lookback window based on a given number of seconds if the new duration diff --git a/airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py b/airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py index 62590cee..27795765 100644 --- a/airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py @@ -5,15 +5,21 @@ import threading import time -from collections.abc import Callable, Iterable, Mapping -from typing import Any, TypeVar +from typing import TYPE_CHECKING, Any, TypeVar -from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor -from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter from airbyte_cdk.sources.types import Record, StreamSlice, StreamState +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Mapping + + from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import ( + DatetimeBasedCursor, + ) + from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter + + T = TypeVar("T") @@ -76,7 +82,11 @@ class GlobalSubstreamCursor(DeclarativeCursor): - When using the `incremental_dependency` option, the sync will progress through parent records, preventing the sync from getting infinitely stuck. However, it is crucial to understand the requirements for both the `global_substream_cursor` and `incremental_dependency` options to avoid data loss. """ - def __init__(self, stream_cursor: DatetimeBasedCursor, partition_router: PartitionRouter): + def __init__( + self, + stream_cursor: DatetimeBasedCursor, + partition_router: PartitionRouter, + ) -> None: self._stream_cursor = stream_cursor self._partition_router = partition_router self._timer = Timer() diff --git a/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py index fcc8287d..2e09196b 100644 --- a/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py @@ -5,22 +5,26 @@ import logging from collections import OrderedDict -from collections.abc import Callable, Iterable, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor -from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import ( PerPartitionKeySerializer, ) from airbyte_cdk.sources.types import Record, StreamSlice, StreamState +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Mapping + + from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter + + logger = logging.getLogger("airbyte") class CursorFactory: - def __init__(self, create_function: Callable[[], DeclarativeCursor]): + def __init__(self, create_function: Callable[[], DeclarativeCursor]) -> None: self._create_function = create_function def create(self) -> DeclarativeCursor: @@ -51,7 +55,11 @@ class PerPartitionCursor(DeclarativeCursor): _VALUE = 1 _state_to_migrate_from: Mapping[str, Any] = {} - def __init__(self, cursor_factory: CursorFactory, partition_router: PartitionRouter): + def __init__( + self, + cursor_factory: CursorFactory, + partition_router: PartitionRouter, + ) -> None: self._cursor_factory = cursor_factory self._partition_router = partition_router # The dict is ordered to ensure that once the maximum number of partitions is reached, @@ -332,5 +340,4 @@ def _get_cursor(self, record: Record) -> DeclarativeCursor: raise ValueError( "Invalid state as stream slices that are emitted should refer to an existing cursor" ) - cursor = self._cursor_per_partition[partition_key] - return cursor + return self._cursor_per_partition[partition_key] diff --git a/airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py b/airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py index 7797c5af..26f97ed1 100644 --- a/airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +++ b/airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py @@ -3,10 +3,8 @@ # from __future__ import annotations -from collections.abc import Iterable, Mapping, MutableMapping -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import ( GlobalSubstreamCursor, @@ -16,8 +14,16 @@ CursorFactory, PerPartitionCursor, ) -from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter -from airbyte_cdk.sources.types import Record, StreamSlice, StreamState + + +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping, MutableMapping + + from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import ( + DatetimeBasedCursor, + ) + from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter + from airbyte_cdk.sources.types import Record, StreamSlice, StreamState class PerPartitionWithGlobalCursor(DeclarativeCursor): @@ -73,7 +79,7 @@ def __init__( cursor_factory: CursorFactory, partition_router: PartitionRouter, stream_cursor: DatetimeBasedCursor, - ): + ) -> None: self._partition_router = partition_router self._per_partition_cursor = PerPartitionCursor(cursor_factory, partition_router) self._global_cursor = GlobalSubstreamCursor(stream_cursor, partition_router) diff --git a/airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py b/airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py index c437a7be..2645a1e2 100644 --- a/airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py @@ -1,15 +1,18 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from __future__ import annotations -from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.incremental import DeclarativeCursor from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState from airbyte_cdk.sources.streams.checkpoint.checkpoint_reader import FULL_REFRESH_COMPLETE_STATE +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + @dataclass class ResumableFullRefreshCursor(DeclarativeCursor): parameters: InitVar[Mapping[str, Any]] diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py b/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py index 8c3b0509..484658a4 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py @@ -3,12 +3,16 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Final +from typing import TYPE_CHECKING, Any, Final from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config FALSE_VALUES: Final[list[Any]] = [ diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py b/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py index d011485f..a41280fc 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py @@ -3,12 +3,16 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config @dataclass diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py b/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py index d37b96e6..c46dc0d7 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py @@ -3,18 +3,22 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any, Union +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation -from airbyte_cdk.sources.types import Config -NestedMappingEntry = Union[ - dict[str, "NestedMapping"], list["NestedMapping"], str, int, float, bool, None -] -NestedMapping = Union[dict[str, NestedMappingEntry], str] +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config + + +NestedMappingEntry = ( + dict[str, "NestedMapping"] | list["NestedMapping"] | str | int | float | bool | None +) +NestedMapping = dict[str, NestedMappingEntry] | str @dataclass diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py b/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py index 773b00cb..7e56afaf 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py @@ -3,16 +3,20 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config @dataclass -class InterpolatedString: +class InterpolatedString: # noqa: PLW1641 # Ignore missing __hash__() """Wrapper around a raw string to be interpolated with the Jinja2 templating engine Attributes: diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolation.py b/airbyte_cdk/sources/declarative/interpolation/interpolation.py index 525fb0be..30cf5072 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolation.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolation.py @@ -4,9 +4,11 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.types import Config + +if TYPE_CHECKING: + from airbyte_cdk.sources.types import Config class Interpolation(ABC): diff --git a/airbyte_cdk/sources/declarative/interpolation/jinja.py b/airbyte_cdk/sources/declarative/interpolation/jinja.py index a39e0cfc..536f482a 100644 --- a/airbyte_cdk/sources/declarative/interpolation/jinja.py +++ b/airbyte_cdk/sources/declarative/interpolation/jinja.py @@ -4,19 +4,24 @@ from __future__ import annotations import ast -from collections.abc import Mapping from functools import cache -from typing import Any +from typing import TYPE_CHECKING, Any from jinja2 import meta -from jinja2.environment import Template from jinja2.exceptions import UndefinedError from jinja2.sandbox import SandboxedEnvironment from airbyte_cdk.sources.declarative.interpolation.filters import filters from airbyte_cdk.sources.declarative.interpolation.interpolation import Interpolation from airbyte_cdk.sources.declarative.interpolation.macros import macros -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from jinja2.environment import Template + + from airbyte_cdk.sources.types import Config class StreamPartitionAccessEnvironment(SandboxedEnvironment): diff --git a/airbyte_cdk/sources/declarative/interpolation/macros.py b/airbyte_cdk/sources/declarative/interpolation/macros.py index 33d562cc..cbc05003 100644 --- a/airbyte_cdk/sources/declarative/interpolation/macros.py +++ b/airbyte_cdk/sources/declarative/interpolation/macros.py @@ -57,7 +57,7 @@ def timestamp(dt: float | str) -> int | float: :param dt: datetime to convert to timestamp :return: unix timestamp """ - if isinstance(dt, (int, float)): + if isinstance(dt, int | float): return int(dt) return _str_to_datetime(dt).astimezone(pytz.utc).timestamp() diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index a24b4b06..52642e91 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -7,22 +7,14 @@ import logging import pkgutil import re -from collections.abc import Iterator, Mapping from copy import deepcopy from importlib import metadata -from typing import Any +from typing import TYPE_CHECKING, Any import yaml from jsonschema.exceptions import ValidationError from jsonschema.validators import validate -from airbyte_cdk.models import ( - AirbyteConnectionStatus, - AirbyteMessage, - AirbyteStateMessage, - ConfiguredAirbyteCatalog, - ConnectorSpecification, -) from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( @@ -41,9 +33,6 @@ from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import ( ModelToComponentFactory, ) -from airbyte_cdk.sources.message import MessageRepository -from airbyte_cdk.sources.streams.core import Stream -from airbyte_cdk.sources.types import ConnectionDefinition from airbyte_cdk.sources.utils.slice_logger import ( AlwaysLogSliceLogger, DebugSliceLogger, @@ -51,6 +40,21 @@ ) +if TYPE_CHECKING: + from collections.abc import Iterator, Mapping + + from airbyte_cdk.models import ( + AirbyteConnectionStatus, + AirbyteMessage, + AirbyteStateMessage, + ConfiguredAirbyteCatalog, + ConnectorSpecification, + ) + from airbyte_cdk.sources.message import MessageRepository + from airbyte_cdk.sources.streams.core import Stream + from airbyte_cdk.sources.types import ConnectionDefinition + + class ManifestDeclarativeSource(DeclarativeSource): """Declarative source defined by a manifest of low-code components that define source connector behavior""" @@ -60,7 +64,7 @@ def __init__( debug: bool = False, emit_connector_builder_messages: bool = False, component_factory: ModelToComponentFactory | None = None, - ): + ) -> None: """:param source_config(Mapping[str, Any]): The manifest of low-code components that describe the source connector :param debug(bool): True if debug mode is enabled :param component_factory(ModelToComponentFactory): optional factory if ModelToComponentFactory's default behaviour needs to be tweaked @@ -105,7 +109,7 @@ def connection_checker(self) -> ConnectionChecker: check_stream = self._constructor.create_component( CheckStreamModel, check, - dict(), + {}, emit_connector_builder_messages=self._emit_connector_builder_messages, ) if isinstance(check_stream, ConnectionChecker): @@ -120,7 +124,7 @@ def streams(self, config: Mapping[str, Any]) -> list[Stream]: ) stream_configs = self._stream_configs(self._source_config) - source_streams = [ + return [ self._constructor.create_component( DeclarativeStreamModel, stream_config, @@ -130,8 +134,6 @@ def streams(self, config: Mapping[str, Any]) -> list[Stream]: for stream_config in self._initialize_cache_for_parent_streams(deepcopy(stream_configs)) ] - return source_streams - @staticmethod def _initialize_cache_for_parent_streams( stream_configs: list[dict[str, Any]], @@ -183,7 +185,11 @@ def spec(self, logger: logging.Logger) -> ConnectorSpecification: if spec: if "type" not in spec: spec["type"] = "Spec" - spec_component = self._constructor.create_component(SpecModel, spec, dict()) + spec_component = self._constructor.create_component( + model_type=SpecModel, + component_definition=spec, + config={}, + ) return spec_component.generate_spec() return super().spec(logger) @@ -239,14 +245,15 @@ def _validate_source(self) -> None: ) from e cdk_version = metadata.version("airbyte_cdk") - cdk_major, cdk_minor, cdk_patch = self._get_version_parts(cdk_version, "airbyte-cdk") + cdk_major, cdk_minor, _ = self._get_version_parts(cdk_version, "airbyte-cdk") manifest_version = self._source_config.get("version") if manifest_version is None: raise RuntimeError( "Manifest version is not defined in the manifest. This is unexpected since it should be a required field. Please contact support." ) - manifest_major, manifest_minor, manifest_patch = self._get_version_parts( - manifest_version, "manifest" + manifest_major, manifest_minor, _ = self._get_version_parts( + manifest_version, + "manifest", ) if cdk_major < manifest_major or ( @@ -267,7 +274,7 @@ def _validate_source(self) -> None: def _get_version_parts(version: str, version_type: str) -> tuple[int, int, int]: """Takes a semantic version represented as a string and splits it into a tuple of its major, minor, and patch versions.""" version_parts = re.split(r"\.", version) - if len(version_parts) != 3 or not all([part.isdigit() for part in version_parts]): + if len(version_parts) != 3 or not all(part.isdigit() for part in version_parts): raise ValidationError( f"The {version_type} version {version} specified is not a valid version format (ex. 1.2.3)" ) diff --git a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py index e34a11bc..f137709c 100644 --- a/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py @@ -1,15 +1,19 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from __future__ import annotations -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration -from airbyte_cdk.sources.declarative.models import DatetimeBasedCursor, SubstreamPartitionRouter from airbyte_cdk.sources.declarative.models.declarative_component_schema import ParentStreamConfig +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.declarative.models import DatetimeBasedCursor, SubstreamPartitionRouter + + def _is_already_migrated(stream_state: Mapping[str, Any]) -> bool: return "states" in stream_state @@ -36,7 +40,7 @@ def __init__( cursor: DatetimeBasedCursor, config: Mapping[str, Any], parameters: Mapping[str, Any], - ): + ) -> None: self._partition_router = partition_router self._cursor = cursor self._config = config @@ -57,7 +61,6 @@ def _get_partition_field(self, partition_router: SubstreamPartitionRouter) -> st if isinstance(parent_stream_config, ParentStreamConfig) else parent_stream_config.get("partition_field") # type: ignore # See above comment on why parent_stream_config might be a dict ) - return partition_field def should_migrate(self, stream_state: Mapping[str, Any]) -> bool: @@ -76,7 +79,7 @@ def should_migrate(self, stream_state: Mapping[str, Any]) -> bool: } """ if stream_state: - for key, value in stream_state.items(): + for value in stream_state.values(): if isinstance(value, dict): keys = list(value.keys()) if len(keys) != 1: diff --git a/airbyte_cdk/sources/declarative/migrations/state_migration.py b/airbyte_cdk/sources/declarative/migrations/state_migration.py index 6146073a..2645fb76 100644 --- a/airbyte_cdk/sources/declarative/migrations/state_migration.py +++ b/airbyte_cdk/sources/declarative/migrations/state_migration.py @@ -2,8 +2,11 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any + + +if TYPE_CHECKING: + from collections.abc import Mapping class StateMigration: diff --git a/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py b/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py index 978f037c..da6eb7fa 100644 --- a/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +++ b/airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py @@ -4,8 +4,11 @@ from __future__ import annotations import copy -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any + + +if TYPE_CHECKING: + from collections.abc import Mapping PARAMETERS_STR = "$parameters" diff --git a/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py b/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py index 7e1ae263..0ccffb50 100644 --- a/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +++ b/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py @@ -4,8 +4,7 @@ from __future__ import annotations import re -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.parsers.custom_exceptions import ( CircularReferenceException, @@ -13,6 +12,10 @@ ) +if TYPE_CHECKING: + from collections.abc import Mapping + + REF_TAG = "$ref" diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index 243b6bdb..c64d0ab2 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -11,6 +11,7 @@ from collections.abc import Callable, Mapping, MutableMapping from functools import partial from typing import ( + TYPE_CHECKING, Any, get_args, get_origin, @@ -18,13 +19,10 @@ ) from isodate import parse_duration -from pydantic.v1 import BaseModel from airbyte_cdk.models import FailureType, Level -from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator from airbyte_cdk.sources.declarative.async_job.job_tracker import JobTracker -from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus from airbyte_cdk.sources.declarative.auth import DeclarativeOauth2Authenticator, JwtAuthenticator from airbyte_cdk.sources.declarative.auth.declarative_authenticator import ( @@ -154,7 +152,8 @@ CustomRetriever as CustomRetrieverModel, ) from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( - CustomSchemaLoader as CustomSchemaLoader, + CustomSchemaLoader, + ValueType, ) from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( CustomTransformation as CustomTransformationModel, @@ -268,7 +267,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( SubstreamPartitionRouter as SubstreamPartitionRouterModel, ) -from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType from airbyte_cdk.sources.declarative.models.declarative_component_schema import ( WaitTimeFromHeader as WaitTimeFromHeaderModel, ) @@ -332,7 +330,6 @@ JsonFileSchemaLoader, ) from airbyte_cdk.sources.declarative.spec import Spec -from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer from airbyte_cdk.sources.declarative.transformations import ( AddFields, RecordTransformation, @@ -353,10 +350,18 @@ DateTimeStreamStateConverter, ) from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction -from airbyte_cdk.sources.types import Config from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer +if TYPE_CHECKING: + from pydantic.v1 import BaseModel + + from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager + from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository + from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer + from airbyte_cdk.sources.types import Config + + ComponentDefinition = Mapping[str, Any] @@ -371,7 +376,7 @@ def __init__( disable_retries: bool = False, disable_cache: bool = False, message_repository: MessageRepository | None = None, - ): + ) -> None: self._init_mappings() self._limit_pages_fetched_per_slice = limit_pages_fetched_per_slice self._limit_slices_fetched = limit_slices_fetched @@ -601,7 +606,7 @@ def create_legacy_to_per_partition_state_migration( ) partition_router = retriever.partition_router if not isinstance( - partition_router, (SubstreamPartitionRouterModel, CustomPartitionRouterModel) + partition_router, SubstreamPartitionRouterModel | CustomPartitionRouterModel ): raise ValueError( f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" @@ -894,13 +899,13 @@ def create_cursor_pagination( self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any ) -> CursorPaginationStrategy: if isinstance(decoder, PaginationDecoderDecorator): - if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)): + if not isinstance(decoder.decoder, JsonDecoder | XmlDecoder): raise ValueError( f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." ) decoder_to_use = decoder else: - if not isinstance(decoder, (JsonDecoder, XmlDecoder)): + if not isinstance(decoder, JsonDecoder | XmlDecoder): raise ValueError( f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." ) @@ -969,7 +974,7 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> kwargs = { class_field: model_args[class_field] - for class_field in component_fields.keys() + for class_field in component_fields if class_field in model_args } return custom_component_class(**kwargs) @@ -1162,7 +1167,7 @@ def create_declarative_stream( "substream_cursor": ( combined_slicers if isinstance( - combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor) + combined_slicers, PerPartitionWithGlobalCursor | GlobalSubstreamCursor ) else None ), @@ -1358,7 +1363,7 @@ def create_default_paginator( cursor_used_for_stop_condition: DeclarativeCursor | None = None, ) -> DefaultPaginator | PaginatorTestReadDecorator: if decoder: - if not isinstance(decoder, (JsonDecoder, XmlDecoder)): + if not isinstance(decoder, JsonDecoder | XmlDecoder): raise ValueError( f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." ) @@ -1402,11 +1407,8 @@ def create_dpath_extractor( decoder: Decoder | None = None, **kwargs: Any, ) -> DpathExtractor: - if decoder: - decoder_to_use = decoder - else: - decoder_to_use = JsonDecoder(parameters={}) - model_field_path: list[InterpolatedString | str] = [x for x in model.field_path] + decoder_to_use = decoder or JsonDecoder(parameters={}) + model_field_path: list[InterpolatedString | str] = list(model.field_path) return DpathExtractor( decoder=decoder_to_use, field_path=model_field_path, @@ -1482,10 +1484,7 @@ def create_http_requester( def create_http_response_filter( model: HttpResponseFilterModel, config: Config, **kwargs: Any ) -> HttpResponseFilter: - if model.action: - action = ResponseAction(model.action.value) - else: - action = None + action = ResponseAction(model.action.value) if model.action else None failure_type = FailureType(model.failure_type.value) if model.failure_type else None @@ -1668,13 +1667,13 @@ def create_offset_increment( model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any ) -> OffsetIncrement: if isinstance(decoder, PaginationDecoderDecorator): - if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)): + if not isinstance(decoder.decoder, JsonDecoder | XmlDecoder): raise ValueError( f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." ) decoder_to_use = decoder else: - if not isinstance(decoder, (JsonDecoder, XmlDecoder)): + if not isinstance(decoder, JsonDecoder | XmlDecoder): raise ValueError( f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." ) diff --git a/airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py b/airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py index 6278cb3a..0cfa2bd4 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +++ b/airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py @@ -6,9 +6,8 @@ import itertools import logging from collections import ChainMap -from collections.abc import Callable, Iterable, Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import ( @@ -17,6 +16,10 @@ from airbyte_cdk.sources.types import StreamSlice, StreamState +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Mapping + + def check_for_substream_in_slicers( slicers: Iterable[PartitionRouter], log_warning: Callable[[str], None] ) -> None: @@ -152,10 +155,7 @@ def stream_slices(self) -> Iterable[StreamSlice]: raise ValueError( f"There should only be a single cursor slice. Found {cursor_slices}" ) - if cursor_slices: - cursor_slice = cursor_slices[0] - else: - cursor_slice = {} + cursor_slice = cursor_slices[0] if cursor_slices else {} yield StreamSlice(partition=partition, cursor_slice=cursor_slice) def set_initial_state(self, stream_state: StreamState) -> None: diff --git a/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py index d701d6b6..5f9b2e07 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py @@ -3,9 +3,8 @@ # from __future__ import annotations -from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter @@ -16,6 +15,10 @@ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + @dataclass class ListPartitionRouter(PartitionRouter): """Partition router that iterates over the values of a list diff --git a/airbyte_cdk/sources/declarative/partition_routers/partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/partition_router.py index 48373eb4..43703771 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/partition_router.py @@ -4,11 +4,16 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Mapping from dataclasses import dataclass +from typing import TYPE_CHECKING from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer -from airbyte_cdk.sources.types import StreamState + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import StreamState @dataclass diff --git a/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py index d30a7655..8b01497b 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py @@ -3,14 +3,17 @@ # from __future__ import annotations -from collections.abc import Iterable, Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter from airbyte_cdk.sources.types import StreamSlice, StreamState +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + @dataclass class SinglePartitionRouter(PartitionRouter): """Partition router returning only a stream slice""" diff --git a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py index c3d7b7cd..e4be83b7 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py @@ -282,7 +282,7 @@ def set_initial_state(self, stream_state: StreamState) -> None: # If `parent_state` doesn't exist and at least one parent stream has an incremental dependency, # copy the child state to parent streams with incremental dependencies. incremental_dependency = any( - [parent_config.incremental_dependency for parent_config in self.parent_stream_configs] + parent_config.incremental_dependency for parent_config in self.parent_stream_configs ) if not parent_state and not incremental_dependency: return diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py index a3574860..bb9a87c8 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py @@ -3,15 +3,19 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any - -import requests +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.streams.http.error_handlers import BackoffStrategy -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + from collections.abc import Mapping + + import requests + + from airbyte_cdk.sources.types import Config @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py index 80cc0aa4..b5d68dca 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py @@ -3,15 +3,19 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any - -import requests +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.streams.http.error_handlers import BackoffStrategy -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + from collections.abc import Mapping + + import requests + + from airbyte_cdk.sources.types import Config @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py index 1061b6b3..dbebbce5 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py @@ -4,9 +4,13 @@ from __future__ import annotations import numbers -from re import Pattern +from typing import TYPE_CHECKING -import requests + +if TYPE_CHECKING: + from re import Pattern + + import requests def get_numeric_value_from_header( diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py index 9ed0ddd1..1bd40133 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py @@ -4,9 +4,8 @@ from __future__ import annotations import re -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any import requests @@ -18,10 +17,15 @@ from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategy import ( BackoffStrategy, ) -from airbyte_cdk.sources.types import Config from airbyte_cdk.utils import AirbyteTracedException +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config + + @dataclass class WaitTimeFromHeaderBackoffStrategy(BackoffStrategy): """Extract wait time from http header diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py index f2a90d74..5b86e8d5 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py @@ -6,9 +6,8 @@ import numbers import re import time -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any import requests @@ -19,7 +18,12 @@ from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategy import ( BackoffStrategy, ) -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py index 37bb5e97..2a120d4a 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py @@ -3,11 +3,8 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any - -import requests +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler from airbyte_cdk.sources.streams.http.error_handlers.response_models import ( @@ -17,6 +14,12 @@ ) +if TYPE_CHECKING: + from collections.abc import Mapping + + import requests + + @dataclass class CompositeErrorHandler(ErrorHandler): """Error handler that sequentially iterates over a list of `ErrorHandler`s @@ -54,7 +57,7 @@ def max_retries(self) -> int | None: @property def max_time(self) -> int | None: - return max([error_handler.max_time or 0 for error_handler in self.error_handlers]) + return max(error_handler.max_time or 0 for error_handler in self.error_handlers) def interpret_response( self, response_or_exception: requests.Response | Exception | None @@ -70,8 +73,7 @@ def interpret_response( return matched_error_resolution if ( - matched_error_resolution.response_action == ResponseAction.RETRY - or matched_error_resolution.response_action == ResponseAction.IGNORE + matched_error_resolution.response_action in {ResponseAction.RETRY, ResponseAction.IGNORE} ): return matched_error_resolution if matched_error_resolution: diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py index ca9cfa83..b004098f 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py @@ -3,9 +3,8 @@ # from __future__ import annotations -from collections.abc import Mapping, MutableMapping from dataclasses import InitVar, dataclass, field -from typing import Any +from typing import TYPE_CHECKING, Any import requests @@ -21,7 +20,12 @@ ErrorResolution, create_fallback_error_resolution, ) -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + from collections.abc import Mapping, MutableMapping + + from airbyte_cdk.sources.types import Config @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py index 61cbe3f9..88afbf89 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py @@ -23,7 +23,7 @@ def matches( ) -> ErrorResolution | None: default_mapped_error_resolution = None - if isinstance(response_or_exception, (requests.Response, Exception)): + if isinstance(response_or_exception, requests.Response | Exception): mapped_key: int | type = ( response_or_exception.status_code if isinstance(response_or_exception, requests.Response) diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py index 39727841..ed1ff444 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py @@ -3,9 +3,8 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any import requests @@ -20,7 +19,12 @@ ErrorResolution, ResponseAction, ) -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config @dataclass @@ -80,7 +84,7 @@ def matches( else response_or_exception.__class__ ) - if isinstance(mapped_key, (int, Exception)): + if isinstance(mapped_key, int | Exception): default_mapped_error_resolution = self._match_default_error_mapping(mapped_key) else: default_mapped_error_resolution = None diff --git a/airbyte_cdk/sources/declarative/requesters/http_job_repository.py b/airbyte_cdk/sources/declarative/requesters/http_job_repository.py index 4c604d1f..465634b0 100644 --- a/airbyte_cdk/sources/declarative/requesters/http_job_repository.py +++ b/airbyte_cdk/sources/declarative/requesters/http_job_repository.py @@ -5,11 +5,7 @@ import uuid from collections.abc import Iterable, Mapping from dataclasses import dataclass, field -from datetime import timedelta -from typing import Any - -import requests -from requests import Response +from typing import TYPE_CHECKING, Any from airbyte_cdk import AirbyteMessage from airbyte_cdk.logger import lazy_log @@ -17,19 +13,27 @@ from airbyte_cdk.sources.declarative.async_job.job import AsyncJob from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus -from airbyte_cdk.sources.declarative.extractors.dpath_extractor import ( - DpathExtractor, - RecordExtractor, -) from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor import ( ResponseToFileExtractor, ) -from airbyte_cdk.sources.declarative.requesters.requester import Requester -from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever from airbyte_cdk.sources.types import Record, StreamSlice from airbyte_cdk.utils import AirbyteTracedException +if TYPE_CHECKING: + from datetime import timedelta + + import requests + from requests import Response + + from airbyte_cdk.sources.declarative.extractors.dpath_extractor import ( + DpathExtractor, + RecordExtractor, + ) + from airbyte_cdk.sources.declarative.requesters.requester import Requester + from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever + + LOGGER = logging.getLogger("airbyte") @@ -189,7 +193,7 @@ def fetch_records(self, job: AsyncJob) -> Iterable[Mapping[str, Any]]: elif isinstance(message, AirbyteMessage): if message.type == Type.RECORD: yield message.record.data # type: ignore # message.record won't be None here as the message is a record - elif isinstance(message, (dict, Mapping)): + elif isinstance(message, dict | Mapping): yield message else: raise TypeError(f"Unknown type `{type(message)}` for message") @@ -214,8 +218,7 @@ def _clean_up_job(self, job_id: str) -> None: del self._polling_job_response_by_id[job_id] def _get_create_job_stream_slice(self, job: AsyncJob) -> StreamSlice: - stream_slice = StreamSlice( + return StreamSlice( partition={"create_job_response": self._create_job_response_by_id[job.api_job_id()]}, cursor_slice={}, ) - return stream_slice diff --git a/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte_cdk/sources/declarative/requesters/http_requester.py index c1bf2cc4..d1443a14 100644 --- a/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -5,18 +5,14 @@ import logging import os -from collections.abc import Callable, Mapping, MutableMapping from dataclasses import InitVar, dataclass, field -from typing import Any +from typing import TYPE_CHECKING, Any from urllib.parse import urljoin -import requests - from airbyte_cdk.sources.declarative.auth.declarative_authenticator import ( DeclarativeAuthenticator, NoAuth, ) -from airbyte_cdk.sources.declarative.decoders import Decoder from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_options_provider import ( @@ -25,11 +21,19 @@ from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod, Requester from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository from airbyte_cdk.sources.streams.http import HttpClient -from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler -from airbyte_cdk.sources.types import Config, StreamSlice, StreamState from airbyte_cdk.utils.mapping_helpers import combine_mappings +if TYPE_CHECKING: + from collections.abc import Callable, Mapping, MutableMapping + + import requests + + from airbyte_cdk.sources.declarative.decoders import Decoder + from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler + from airbyte_cdk.sources.types import Config, StreamSlice, StreamState + + @dataclass class HttpRequester(Requester): """Default implementation of a Requester @@ -256,7 +260,7 @@ def _request_params( raise ValueError("Request params cannot be a string") for k, v in options.items(): - if isinstance(v, (dict,)): + if isinstance(v, dict): raise ValueError( f"Invalid value for `{k}` parameter. The values of request params cannot be an object." ) @@ -328,7 +332,7 @@ def send_request( request_body_json: Mapping[str, Any] | None = None, log_formatter: Callable[[requests.Response], Any] | None = None, ) -> requests.Response | None: - request, response = self._http_client.send_request( + _, response = self._http_client.send_request( http_method=self.get_method().value, url=self._join_url( self.get_url_base(), diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py index bb77ab92..66fb4fb4 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py @@ -3,11 +3,8 @@ # from __future__ import annotations -from collections.abc import Mapping, MutableMapping from dataclasses import InitVar, dataclass, field -from typing import Any - -import requests +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.decoders import ( Decoder, @@ -16,15 +13,22 @@ ) from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator -from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import ( - PaginationStrategy, -) from airbyte_cdk.sources.declarative.requesters.request_option import ( RequestOption, RequestOptionType, ) from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath -from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState + + +if TYPE_CHECKING: + from collections.abc import Mapping, MutableMapping + + import requests + + from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import ( + PaginationStrategy, + ) + from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py b/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py index ab27d811..f5205227 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py @@ -3,14 +3,18 @@ # from __future__ import annotations -from collections.abc import Mapping, MutableMapping from dataclasses import InitVar, dataclass -from typing import Any - -import requests +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator -from airbyte_cdk.sources.types import Record, StreamSlice, StreamState + + +if TYPE_CHECKING: + from collections.abc import Mapping, MutableMapping + + import requests + + from airbyte_cdk.sources.types import Record, StreamSlice, StreamState @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py b/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py index d326405f..e2131aa3 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py @@ -4,16 +4,20 @@ from __future__ import annotations from abc import ABC, abstractmethod -from collections.abc import Mapping from dataclasses import dataclass -from typing import Any - -import requests +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import ( RequestOptionsProvider, ) -from airbyte_cdk.sources.types import Record + + +if TYPE_CHECKING: + from collections.abc import Mapping + + import requests + + from airbyte_cdk.sources.types import Record @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py index 0c3f04d7..ff8c2ef9 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py @@ -3,11 +3,8 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any - -import requests +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.decoders import ( Decoder, @@ -19,7 +16,14 @@ from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import ( PaginationStrategy, ) -from airbyte_cdk.sources.types import Config, Record + + +if TYPE_CHECKING: + from collections.abc import Mapping + + import requests + + from airbyte_cdk.sources.types import Config, Record @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py index 02b9e154..e38dcaf8 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py @@ -3,11 +3,8 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any - -import requests +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.decoders import ( Decoder, @@ -18,7 +15,14 @@ from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import ( PaginationStrategy, ) -from airbyte_cdk.sources.types import Config, Record + + +if TYPE_CHECKING: + from collections.abc import Mapping + + import requests + + from airbyte_cdk.sources.types import Config, Record @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py index 5f860574..e039740c 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py @@ -3,17 +3,21 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any - -import requests +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation import InterpolatedString from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import ( PaginationStrategy, ) -from airbyte_cdk.sources.types import Config, Record + + +if TYPE_CHECKING: + from collections.abc import Mapping + + import requests + + from airbyte_cdk.sources.types import Config, Record @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py index 03afbbe5..9b21c717 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py @@ -5,11 +5,13 @@ from abc import abstractmethod from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any -import requests -from airbyte_cdk.sources.types import Record +if TYPE_CHECKING: + import requests + + from airbyte_cdk.sources.types import Record @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py index 41b5ce55..66a8c027 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py @@ -4,15 +4,18 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Any +from typing import TYPE_CHECKING, Any -import requests - -from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import ( PaginationStrategy, ) -from airbyte_cdk.sources.types import Record + + +if TYPE_CHECKING: + import requests + + from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor + from airbyte_cdk.sources.types import Record class PaginationStopCondition(ABC): @@ -26,7 +29,7 @@ def is_met(self, record: Record) -> bool: class CursorStopCondition(PaginationStopCondition): - def __init__(self, cursor: DeclarativeCursor): + def __init__(self, cursor: DeclarativeCursor) -> None: self._cursor = cursor def is_met(self, record: Record) -> bool: @@ -34,7 +37,11 @@ def is_met(self, record: Record) -> bool: class StopConditionPaginationStrategyDecorator(PaginationStrategy): - def __init__(self, _delegate: PaginationStrategy, stop_condition: PaginationStopCondition): + def __init__( + self, + _delegate: PaginationStrategy, + stop_condition: PaginationStopCondition, + ) -> None: self._delegate = _delegate self._stop_condition = stop_condition diff --git a/airbyte_cdk/sources/declarative/requesters/request_option.py b/airbyte_cdk/sources/declarative/requesters/request_option.py index 062f3a40..1c6e2d0a 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_option.py +++ b/airbyte_cdk/sources/declarative/requesters/request_option.py @@ -3,14 +3,17 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass from enum import Enum -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString +if TYPE_CHECKING: + from collections.abc import Mapping + + class RequestOptionType(Enum): """Describes where to set a value on a request""" diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py index 277ece70..26157195 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py @@ -3,9 +3,8 @@ # from __future__ import annotations -from collections.abc import Mapping, MutableMapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.requesters.request_option import ( @@ -15,7 +14,12 @@ from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import ( RequestOptionsProvider, ) -from airbyte_cdk.sources.types import Config, StreamSlice, StreamState + + +if TYPE_CHECKING: + from collections.abc import Mapping, MutableMapping + + from airbyte_cdk.sources.types import Config, StreamSlice, StreamState @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py index 5f73b01c..96695faa 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py @@ -3,14 +3,18 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import ( RequestOptionsProvider, ) -from airbyte_cdk.sources.types import StreamSlice, StreamState + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import StreamSlice, StreamState @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py index d0c22d63..1f0d1326 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py @@ -3,16 +3,20 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import ( InterpolatedNestedMapping, NestedMapping, ) from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString -from airbyte_cdk.sources.types import Config, StreamSlice, StreamState + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config, StreamSlice, StreamState @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py index 7a2e50c5..95ae06cd 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py @@ -3,13 +3,17 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString -from airbyte_cdk.sources.types import Config, StreamSlice, StreamState + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config, StreamSlice, StreamState @dataclass @@ -63,6 +67,5 @@ def eval_request_inputs( ) if isinstance(interpolated_value, dict): - non_null_tokens = {k: v for k, v in interpolated_value.items() if v is not None} - return non_null_tokens + return {k: v for k, v in interpolated_value.items() if v is not None} return interpolated_value # type: ignore[no-any-return] diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py index 00242446..8374bfd6 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py @@ -5,11 +5,10 @@ from collections.abc import Mapping, MutableMapping from dataclasses import InitVar, dataclass, field -from typing import Any, Union +from typing import TYPE_CHECKING, Any, Union from deprecated import deprecated -from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import ( InterpolatedNestedRequestInputProvider, ) @@ -20,10 +19,16 @@ RequestOptionsProvider, ) from airbyte_cdk.sources.source import ExperimentalClassWarning -from airbyte_cdk.sources.types import Config, StreamSlice, StreamState -RequestInput = Union[str, Mapping[str, str]] +if TYPE_CHECKING: + from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import ( + NestedMapping, + ) + from airbyte_cdk.sources.types import Config, StreamSlice, StreamState + + +RequestInput = str | Mapping[str, str] ValidRequestTypes = (str, list) diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py index 5cb38867..e1bbf181 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/request_options_provider.py @@ -4,11 +4,14 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Mapping from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.types import StreamSlice, StreamState + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import StreamSlice, StreamState @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/request_path.py b/airbyte_cdk/sources/declarative/requesters/request_path.py index b6792506..3ee15698 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_path.py +++ b/airbyte_cdk/sources/declarative/requesters/request_path.py @@ -3,9 +3,12 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any + + +if TYPE_CHECKING: + from collections.abc import Mapping @dataclass diff --git a/airbyte_cdk/sources/declarative/requesters/requester.py b/airbyte_cdk/sources/declarative/requesters/requester.py index 3a50a6be..e17c3204 100644 --- a/airbyte_cdk/sources/declarative/requesters/requester.py +++ b/airbyte_cdk/sources/declarative/requesters/requester.py @@ -4,17 +4,23 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Callable, Mapping, MutableMapping from enum import Enum -from typing import Any +from typing import TYPE_CHECKING, Any -import requests - -from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import ( RequestOptionsProvider, ) -from airbyte_cdk.sources.types import StreamSlice, StreamState + + +if TYPE_CHECKING: + from collections.abc import Callable, Mapping, MutableMapping + + import requests + + from airbyte_cdk.sources.declarative.auth.declarative_authenticator import ( + DeclarativeAuthenticator, + ) + from airbyte_cdk.sources.types import StreamSlice, StreamState class HttpMethod(Enum): diff --git a/airbyte_cdk/sources/declarative/retrievers/async_retriever.py b/airbyte_cdk/sources/declarative/retrievers/async_retriever.py index 60d5566f..02baebfb 100644 --- a/airbyte_cdk/sources/declarative/retrievers/async_retriever.py +++ b/airbyte_cdk/sources/declarative/retrievers/async_retriever.py @@ -1,27 +1,31 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from __future__ import annotations -from collections.abc import Callable, Iterable, Mapping from dataclasses import InitVar, dataclass, field -from typing import Any +from typing import TYPE_CHECKING, Any from deprecated.classic import deprecated from airbyte_cdk.models import FailureType -from airbyte_cdk.sources.declarative.async_job.job_orchestrator import ( - AsyncJobOrchestrator, - AsyncPartition, -) -from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector from airbyte_cdk.sources.declarative.partition_routers import SinglePartitionRouter from airbyte_cdk.sources.declarative.retrievers import Retriever -from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer from airbyte_cdk.sources.source import ExperimentalClassWarning -from airbyte_cdk.sources.streams.core import StreamData from airbyte_cdk.sources.types import Config, StreamSlice, StreamState from airbyte_cdk.utils.traced_exception import AirbyteTracedException +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Mapping + + from airbyte_cdk.sources.declarative.async_job.job_orchestrator import ( + AsyncJobOrchestrator, + AsyncPartition, + ) + from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector + from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer + from airbyte_cdk.sources.streams.core import StreamData + + @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) @dataclass class AsyncRetriever(Retriever): diff --git a/airbyte_cdk/sources/declarative/retrievers/retriever.py b/airbyte_cdk/sources/declarative/retrievers/retriever.py index 7c3b8c65..2c08ae3b 100644 --- a/airbyte_cdk/sources/declarative/retrievers/retriever.py +++ b/airbyte_cdk/sources/declarative/retrievers/retriever.py @@ -4,12 +4,15 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Iterable, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import StreamSlice -from airbyte_cdk.sources.streams.core import StreamData -from airbyte_cdk.sources.types import StreamState + +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import StreamSlice + from airbyte_cdk.sources.streams.core import StreamData + from airbyte_cdk.sources.types import StreamState class Retriever: diff --git a/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index 348860ad..38a77ec1 100644 --- a/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -9,34 +9,38 @@ from functools import partial from itertools import islice from typing import ( + TYPE_CHECKING, Any, ) -import requests - from airbyte_cdk.models import AirbyteMessage -from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector from airbyte_cdk.sources.declarative.incremental import ResumableFullRefreshCursor -from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor from airbyte_cdk.sources.declarative.interpolation import InterpolatedString from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import ( SinglePartitionRouter, ) from airbyte_cdk.sources.declarative.requesters.paginators.no_pagination import NoPagination -from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator from airbyte_cdk.sources.declarative.requesters.request_options import ( DefaultRequestOptionsProvider, RequestOptionsProvider, ) -from airbyte_cdk.sources.declarative.requesters.requester import Requester from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever -from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer from airbyte_cdk.sources.http_logger import format_http_message -from airbyte_cdk.sources.streams.core import StreamData from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState from airbyte_cdk.utils.mapping_helpers import combine_mappings +if TYPE_CHECKING: + import requests + + from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector + from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor + from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator + from airbyte_cdk.sources.declarative.requesters.requester import Requester + from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer + from airbyte_cdk.sources.streams.core import StreamData + + FULL_REFRESH_SYNC_COMPLETE_KEY = "__ab_full_refresh_sync_complete" @@ -95,7 +99,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: # This mapping is used during a resumable full refresh syncs to indicate whether a partition has started syncing # records. Partitions serve as the key and map to True if they already began processing records - self._partition_started: MutableMapping[Any, bool] = dict() + self._partition_started: MutableMapping[Any, bool] = {} @property # type: ignore def name(self) -> str: @@ -456,7 +460,7 @@ def _extract_record(stream_data: StreamData, stream_slice: StreamSlice) -> Recor if isinstance(stream_data, Record): # Record is not part of `StreamData` but is the most common implementation of `Mapping[str, Any]` which is part of `StreamData` return stream_data - if isinstance(stream_data, (dict, Mapping)): + if isinstance(stream_data, dict | Mapping): return Record(dict(stream_data), stream_slice) if isinstance(stream_data, AirbyteMessage) and stream_data.record: return Record(stream_data.record.data, stream_slice) diff --git a/airbyte_cdk/sources/declarative/schema/default_schema_loader.py b/airbyte_cdk/sources/declarative/schema/default_schema_loader.py index de0f98e8..bf9db995 100644 --- a/airbyte_cdk/sources/declarative/schema/default_schema_loader.py +++ b/airbyte_cdk/sources/declarative/schema/default_schema_loader.py @@ -4,13 +4,17 @@ from __future__ import annotations import logging -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader -from airbyte_cdk.sources.types import Config + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config @dataclass diff --git a/airbyte_cdk/sources/declarative/schema/inline_schema_loader.py b/airbyte_cdk/sources/declarative/schema/inline_schema_loader.py index 691ef32c..4444a0e1 100644 --- a/airbyte_cdk/sources/declarative/schema/inline_schema_loader.py +++ b/airbyte_cdk/sources/declarative/schema/inline_schema_loader.py @@ -3,13 +3,16 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader +if TYPE_CHECKING: + from collections.abc import Mapping + + @dataclass class InlineSchemaLoader(SchemaLoader): """Describes a stream's schema""" diff --git a/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py b/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py index a38cf9a9..b101b664 100644 --- a/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +++ b/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py @@ -6,16 +6,20 @@ import json import pkgutil import sys -from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader -from airbyte_cdk.sources.types import Config from airbyte_cdk.sources.utils.schema_helpers import ResourceSchemaLoader +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config + + def _default_file_path() -> str: # Schema files are always in "source_/schemas/.json # The connector's module name can be inferred by looking at the modules loaded and look for the one starting with source_ diff --git a/airbyte_cdk/sources/declarative/schema/schema_loader.py b/airbyte_cdk/sources/declarative/schema/schema_loader.py index b912eaa4..4052389c 100644 --- a/airbyte_cdk/sources/declarative/schema/schema_loader.py +++ b/airbyte_cdk/sources/declarative/schema/schema_loader.py @@ -4,9 +4,12 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Mapping from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any + + +if TYPE_CHECKING: + from collections.abc import Mapping @dataclass diff --git a/airbyte_cdk/sources/declarative/spec/spec.py b/airbyte_cdk/sources/declarative/spec/spec.py index dbf5398d..2a79bd71 100644 --- a/airbyte_cdk/sources/declarative/spec/spec.py +++ b/airbyte_cdk/sources/declarative/spec/spec.py @@ -3,16 +3,20 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.models import ( AdvancedAuth, ConnectorSpecification, ConnectorSpecificationSerializer, ) # type: ignore [attr-defined] -from airbyte_cdk.sources.declarative.models.declarative_component_schema import AuthFlow + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.declarative.models.declarative_component_schema import AuthFlow @dataclass diff --git a/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py b/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py index f08c4e4a..62220808 100644 --- a/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +++ b/airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py @@ -4,13 +4,18 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Iterable from dataclasses import dataclass +from typing import TYPE_CHECKING from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import ( RequestOptionsProvider, ) -from airbyte_cdk.sources.types import StreamSlice + + +if TYPE_CHECKING: + from collections.abc import Iterable + + from airbyte_cdk.sources.types import StreamSlice @dataclass diff --git a/airbyte_cdk/sources/declarative/transformations/add_fields.py b/airbyte_cdk/sources/declarative/transformations/add_fields.py index 82995b3b..d1e69d2a 100644 --- a/airbyte_cdk/sources/declarative/transformations/add_fields.py +++ b/airbyte_cdk/sources/declarative/transformations/add_fields.py @@ -3,15 +3,19 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass, field -from typing import Any +from typing import TYPE_CHECKING, Any import dpath from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString from airbyte_cdk.sources.declarative.transformations import RecordTransformation -from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState @dataclass(frozen=True) diff --git a/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py b/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py index 29b3dec0..49c3185e 100644 --- a/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +++ b/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py @@ -4,10 +4,13 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.declarative.transformations import RecordTransformation -from airbyte_cdk.sources.types import Config, StreamSlice, StreamState + + +if TYPE_CHECKING: + from airbyte_cdk.sources.types import Config, StreamSlice, StreamState @dataclass diff --git a/airbyte_cdk/sources/declarative/transformations/remove_fields.py b/airbyte_cdk/sources/declarative/transformations/remove_fields.py index 12c682f2..67e0ea65 100644 --- a/airbyte_cdk/sources/declarative/transformations/remove_fields.py +++ b/airbyte_cdk/sources/declarative/transformations/remove_fields.py @@ -3,16 +3,20 @@ # from __future__ import annotations -from collections.abc import Mapping from dataclasses import InitVar, dataclass -from typing import Any +from typing import TYPE_CHECKING, Any import dpath import dpath.exceptions from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean from airbyte_cdk.sources.declarative.transformations import RecordTransformation -from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState @dataclass diff --git a/airbyte_cdk/sources/declarative/transformations/transformation.py b/airbyte_cdk/sources/declarative/transformations/transformation.py index 8522d215..c8fb7abb 100644 --- a/airbyte_cdk/sources/declarative/transformations/transformation.py +++ b/airbyte_cdk/sources/declarative/transformations/transformation.py @@ -5,9 +5,11 @@ from abc import abstractmethod from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.types import Config, StreamSlice, StreamState + +if TYPE_CHECKING: + from airbyte_cdk.sources.types import Config, StreamSlice, StreamState @dataclass diff --git a/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte_cdk/sources/declarative/yaml_declarative_source.py index f4c22858..28659ee7 100644 --- a/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -4,8 +4,7 @@ from __future__ import annotations import pkgutil -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any import yaml @@ -13,7 +12,12 @@ from airbyte_cdk.sources.declarative.concurrent_declarative_source import ( ConcurrentDeclarativeSource, ) -from airbyte_cdk.sources.types import ConnectionDefinition + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.types import ConnectionDefinition class YamlDeclarativeSource(ConcurrentDeclarativeSource[list[AirbyteStateMessage]]): diff --git a/airbyte_cdk/sources/embedded/base_integration.py b/airbyte_cdk/sources/embedded/base_integration.py index f83a4966..929ad96a 100644 --- a/airbyte_cdk/sources/embedded/base_integration.py +++ b/airbyte_cdk/sources/embedded/base_integration.py @@ -4,8 +4,7 @@ from __future__ import annotations from abc import ABC, abstractmethod -from collections.abc import Iterable -from typing import Generic, TypeVar +from typing import TYPE_CHECKING, Generic, TypeVar from airbyte_cdk.connector import TConfig from airbyte_cdk.models import AirbyteRecordMessage, AirbyteStateMessage, SyncMode, Type @@ -14,16 +13,21 @@ get_stream, get_stream_names, ) -from airbyte_cdk.sources.embedded.runner import SourceRunner from airbyte_cdk.sources.embedded.tools import get_defined_id from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit +if TYPE_CHECKING: + from collections.abc import Iterable + + from airbyte_cdk.sources.embedded.runner import SourceRunner + + TOutput = TypeVar("TOutput") class BaseEmbeddedIntegration(ABC, Generic[TConfig, TOutput]): - def __init__(self, runner: SourceRunner[TConfig], config: TConfig): + def __init__(self, runner: SourceRunner[TConfig], config: TConfig) -> None: check_config_against_spec_or_exit(config, runner.spec()) self.source = runner diff --git a/airbyte_cdk/sources/embedded/runner.py b/airbyte_cdk/sources/embedded/runner.py index 3b0a8f5e..fa63cf5f 100644 --- a/airbyte_cdk/sources/embedded/runner.py +++ b/airbyte_cdk/sources/embedded/runner.py @@ -5,18 +5,22 @@ import logging from abc import ABC, abstractmethod -from collections.abc import Iterable -from typing import Generic +from typing import TYPE_CHECKING, Generic from airbyte_cdk.connector import TConfig -from airbyte_cdk.models import ( - AirbyteCatalog, - AirbyteMessage, - AirbyteStateMessage, - ConfiguredAirbyteCatalog, - ConnectorSpecification, -) -from airbyte_cdk.sources.source import Source + + +if TYPE_CHECKING: + from collections.abc import Iterable + + from airbyte_cdk.models import ( + AirbyteCatalog, + AirbyteMessage, + AirbyteStateMessage, + ConfiguredAirbyteCatalog, + ConnectorSpecification, + ) + from airbyte_cdk.sources.source import Source class SourceRunner(ABC, Generic[TConfig]): @@ -39,7 +43,7 @@ def read( class CDKRunner(SourceRunner[TConfig]): - def __init__(self, source: Source, name: str): + def __init__(self, source: Source, name: str) -> None: self._source = source self._logger = logging.getLogger(name) diff --git a/airbyte_cdk/sources/embedded/tools.py b/airbyte_cdk/sources/embedded/tools.py index 207f19bb..f3b7a608 100644 --- a/airbyte_cdk/sources/embedded/tools.py +++ b/airbyte_cdk/sources/embedded/tools.py @@ -3,12 +3,15 @@ # from __future__ import annotations -from collections.abc import Callable, Iterable -from typing import Any +from typing import TYPE_CHECKING, Any import dpath -from airbyte_cdk.models import AirbyteStream + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable + + from airbyte_cdk.models import AirbyteStream def get_first( diff --git a/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py index 7f64b8aa..3b755508 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py @@ -3,11 +3,9 @@ # from __future__ import annotations -import logging from abc import abstractmethod from typing import TYPE_CHECKING -from airbyte_cdk.sources import Source from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy from airbyte_cdk.sources.streams.concurrent.availability_strategy import ( AbstractAvailabilityStrategy, @@ -15,11 +13,14 @@ StreamAvailable, StreamUnavailable, ) -from airbyte_cdk.sources.streams.core import Stream if TYPE_CHECKING: + import logging + + from airbyte_cdk.sources import Source from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream + from airbyte_cdk.sources.streams.core import Stream class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy): @@ -46,7 +47,7 @@ def check_availability_and_parsability( class AbstractFileBasedAvailabilityStrategyWrapper(AbstractAvailabilityStrategy): - def __init__(self, stream: AbstractFileBasedStream): + def __init__(self, stream: AbstractFileBasedStream) -> None: self.stream = stream def check_availability(self, logger: logging.Logger) -> StreamAvailability: diff --git a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py index 5976db55..4f53ffa4 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py @@ -3,12 +3,10 @@ # from __future__ import annotations -import logging import traceback from typing import TYPE_CHECKING from airbyte_cdk import AirbyteTracedException -from airbyte_cdk.sources import Source from airbyte_cdk.sources.file_based.availability_strategy import ( AbstractFileBasedAvailabilityStrategy, ) @@ -17,17 +15,22 @@ CustomFileBasedException, FileBasedSourceError, ) -from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader -from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.schema_helpers import conforms_to_schema if TYPE_CHECKING: + import logging + + from airbyte_cdk.sources import Source + from airbyte_cdk.sources.file_based.file_based_stream_reader import ( + AbstractFileBasedStreamReader, + ) + from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy): - def __init__(self, stream_reader: AbstractFileBasedStreamReader): + def __init__(self, stream_reader: AbstractFileBasedStreamReader) -> None: self.stream_reader = stream_reader def check_availability( @@ -75,8 +78,8 @@ def check_availability_and_parsability( # If the parser is set to not check parsability, we still want to check that we can open the file. handle = stream.stream_reader.open_file(file, parser.file_read_mode, None, logger) handle.close() - except AirbyteTracedException as ate: - raise ate + except AirbyteTracedException: + raise except CheckAvailabilityError: return False, "".join(traceback.format_exc()) @@ -118,8 +121,8 @@ def _check_parse_record( # consider the connection check successful even though it means # we skip the schema validation check. return - except AirbyteTracedException as ate: - raise ate + except AirbyteTracedException: + raise except Exception as exc: raise CheckAvailabilityError( FileBasedSourceError.ERROR_READING_FILE, stream=stream.name, file=file.uri diff --git a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py index de0f10ad..0a8589ec 100644 --- a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +++ b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py @@ -5,16 +5,19 @@ import copy from abc import abstractmethod -from typing import Any, Literal +from typing import TYPE_CHECKING, Any, Literal import dpath from pydantic.v1 import AnyUrl, BaseModel, Field from airbyte_cdk import OneOfOptionConfig -from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.utils import schema_helpers +if TYPE_CHECKING: + from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig + + class DeliverRecords(BaseModel): class Config(OneOfOptionConfig): title = "Replicate Records" diff --git a/airbyte_cdk/sources/file_based/config/file_based_stream_config.py b/airbyte_cdk/sources/file_based/config/file_based_stream_config.py index 79fca918..3ec5a042 100644 --- a/airbyte_cdk/sources/file_based/config/file_based_stream_config.py +++ b/airbyte_cdk/sources/file_based/config/file_based_stream_config.py @@ -3,23 +3,27 @@ # from __future__ import annotations -from collections.abc import Mapping from enum import Enum -from typing import Any, Optional +from typing import TYPE_CHECKING, Any from pydantic.v1 import BaseModel, Field, validator -from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat -from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat -from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat -from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat -from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat -from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError from airbyte_cdk.sources.file_based.schema_helpers import type_mapping_to_jsonschema -PrimaryKeyType = Optional[str | list[str]] +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat + from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat + from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat + from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat + from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat + from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat + + +PrimaryKeyType = str | list[str] | None class ValidationPolicy(Enum): diff --git a/airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py b/airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py index 671c0324..71978d5a 100644 --- a/airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +++ b/airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py @@ -4,8 +4,11 @@ from __future__ import annotations from abc import ABC, abstractmethod +from typing import TYPE_CHECKING -from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser + +if TYPE_CHECKING: + from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser class AbstractDiscoveryPolicy(ABC): diff --git a/airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py b/airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py index 383b26b4..bb7e480f 100644 --- a/airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +++ b/airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py @@ -3,10 +3,15 @@ # from __future__ import annotations +from typing import TYPE_CHECKING + from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import ( AbstractDiscoveryPolicy, ) -from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser + + +if TYPE_CHECKING: + from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser DEFAULT_N_CONCURRENT_REQUESTS = 10 diff --git a/airbyte_cdk/sources/file_based/file_based_source.py b/airbyte_cdk/sources/file_based/file_based_source.py index ea66da01..d8a968db 100644 --- a/airbyte_cdk/sources/file_based/file_based_source.py +++ b/airbyte_cdk/sources/file_based/file_based_source.py @@ -3,12 +3,10 @@ # from __future__ import annotations -import logging import traceback from abc import ABC from collections import Counter -from collections.abc import Iterator, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from pydantic.v1.error_wrappers import ValidationError @@ -30,7 +28,6 @@ AbstractFileBasedAvailabilityStrategy, DefaultFileBasedAvailabilityStrategy, ) -from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec from airbyte_cdk.sources.file_based.config.file_based_stream_config import ( FileBasedStreamConfig, ValidationPolicy, @@ -44,9 +41,7 @@ FileBasedErrorsCollector, FileBasedSourceError, ) -from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader from airbyte_cdk.sources.file_based.file_types import default_parsers -from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser from airbyte_cdk.sources.file_based.schema_validation_policies import ( DEFAULT_SCHEMA_VALIDATION_POLICIES, AbstractSchemaValidationPolicy, @@ -58,14 +53,25 @@ FileBasedConcurrentCursor, FileBasedFinalStateCursor, ) -from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor from airbyte_cdk.sources.message.repository import InMemoryMessageRepository, MessageRepository -from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.concurrent.cursor import CursorField from airbyte_cdk.utils.analytics_message import create_analytics_message from airbyte_cdk.utils.traced_exception import AirbyteTracedException +if TYPE_CHECKING: + import logging + from collections.abc import Iterator, Mapping + + from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec + from airbyte_cdk.sources.file_based.file_based_stream_reader import ( + AbstractFileBasedStreamReader, + ) + from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser + from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor + from airbyte_cdk.sources.streams import Stream + + DEFAULT_CONCURRENCY = 100 MAX_CONCURRENCY = 100 INITIAL_N_PARTITIONS = MAX_CONCURRENCY // 2 @@ -91,7 +97,7 @@ def __init__( cursor_cls: type[ AbstractConcurrentFileBasedCursor | AbstractFileBasedCursor ] = FileBasedConcurrentCursor, - ): + ) -> None: self.stream_reader = stream_reader self.spec_class = spec_class self.config = config @@ -376,8 +382,7 @@ def _validate_input_schema(self, stream_config: FileBasedStreamConfig) -> None: @staticmethod def _use_file_transfer(parsed_config: AbstractFileBasedSpec) -> bool: - use_file_transfer = ( + return ( hasattr(parsed_config.delivery_method, "delivery_type") and parsed_config.delivery_method.delivery_type == "use_file_transfer" ) - return use_file_transfer diff --git a/airbyte_cdk/sources/file_based/file_based_stream_reader.py b/airbyte_cdk/sources/file_based/file_based_stream_reader.py index b04da502..613c53e1 100644 --- a/airbyte_cdk/sources/file_based/file_based_stream_reader.py +++ b/airbyte_cdk/sources/file_based/file_based_stream_reader.py @@ -3,19 +3,22 @@ # from __future__ import annotations -import logging from abc import ABC, abstractmethod -from collections.abc import Iterable from datetime import datetime from enum import Enum -from io import IOBase from os import makedirs, path -from typing import Any +from typing import TYPE_CHECKING, Any from wcmatch.glob import GLOBSTAR, globmatch -from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec -from airbyte_cdk.sources.file_based.remote_file import RemoteFile + +if TYPE_CHECKING: + import logging + from collections.abc import Iterable + from io import IOBase + + from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec + from airbyte_cdk.sources.file_based.remote_file import RemoteFile class FileReadMode(Enum): @@ -122,11 +125,10 @@ def get_prefixes_from_globs(globs: list[str]) -> set[str]: def use_file_transfer(self) -> bool: if self.config: - use_file_transfer = ( + return ( hasattr(self.config.delivery_method, "delivery_type") and self.config.delivery_method.delivery_type == "use_file_transfer" ) - return use_file_transfer return False @abstractmethod diff --git a/airbyte_cdk/sources/file_based/file_types/avro_parser.py b/airbyte_cdk/sources/file_based/file_types/avro_parser.py index 7a92a353..50603f39 100644 --- a/airbyte_cdk/sources/file_based/file_types/avro_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/avro_parser.py @@ -3,22 +3,26 @@ # from __future__ import annotations -import logging from collections.abc import Iterable, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any import fastavro from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat -from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError from airbyte_cdk.sources.file_based.file_based_stream_reader import ( AbstractFileBasedStreamReader, FileReadMode, ) from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser -from airbyte_cdk.sources.file_based.remote_file import RemoteFile -from airbyte_cdk.sources.file_based.schema_helpers import SchemaType + + +if TYPE_CHECKING: + import logging + + from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig + from airbyte_cdk.sources.file_based.remote_file import RemoteFile + from airbyte_cdk.sources.file_based.schema_helpers import SchemaType AVRO_TYPE_TO_JSON_TYPE = { @@ -72,13 +76,12 @@ async def infer_schema( raise ValueError( f"Only record based avro files are supported. Found {unsupported_type}" ) - json_schema = { + return { field["name"]: AvroParser._convert_avro_type_to_json( avro_format, field["name"], field["type"] ) for field in avro_schema["fields"] } - return json_schema @classmethod def _convert_avro_type_to_json( @@ -213,7 +216,7 @@ def _to_output_value( if record_type == "double" and avro_format.double_as_string: return str(record_value) return record_value - if record_type.get("logicalType") in ("decimal", "uuid"): + if record_type.get("logicalType") in {"decimal", "uuid"}: return str(record_value) if record_type.get("logicalType") == "date": return record_value.isoformat() diff --git a/airbyte_cdk/sources/file_based/file_types/csv_parser.py b/airbyte_cdk/sources/file_based/file_types/csv_parser.py index 013a2c33..86fad424 100644 --- a/airbyte_cdk/sources/file_based/file_types/csv_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/csv_parser.py @@ -5,13 +5,10 @@ import csv import json -import logging from abc import ABC, abstractmethod from collections import defaultdict -from collections.abc import Callable, Generator, Iterable, Mapping from functools import partial -from io import IOBase -from typing import Any +from typing import TYPE_CHECKING, Any from uuid import uuid4 from orjson import orjson @@ -23,18 +20,25 @@ CsvHeaderUserProvided, InferenceType, ) -from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError from airbyte_cdk.sources.file_based.file_based_stream_reader import ( AbstractFileBasedStreamReader, FileReadMode, ) from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser -from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.schema_helpers import TYPE_PYTHON_MAPPING, SchemaType from airbyte_cdk.utils.traced_exception import AirbyteTracedException +if TYPE_CHECKING: + import logging + from collections.abc import Callable, Generator, Iterable, Mapping + from io import IOBase + + from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig + from airbyte_cdk.sources.file_based.remote_file import RemoteFile + + DIALECT_NAME = "_config_dialect" @@ -152,7 +156,11 @@ def _skip_rows(fp: IOBase, rows_to_skip: int) -> None: class CsvParser(FileTypeParser): _MAX_BYTES_PER_FILE_FOR_SCHEMA_INFERENCE = 1_000_000 - def __init__(self, csv_reader: _CsvReader | None = None, csv_field_max_bytes: int = 2**31): + def __init__( + self, + csv_reader: _CsvReader | None = None, + csv_field_max_bytes: int = 2**31, + ) -> None: # Increase the maximum length of data that can be parsed in a single CSV field. The default is 128k, which is typically sufficient # but given the use of Airbyte in loading a large variety of data it is best to allow for a larger maximum field size to avoid # skipping data on load. https://stackoverflow.com/questions/15063936/csv-error-field-larger-than-field-limit-131072 @@ -279,7 +287,7 @@ def _to_nullable( null_values: set[str], strings_can_be_null: bool, ) -> dict[str, str | None]: - nullable = { + return { k: None if CsvParser._value_is_none( v, deduped_property_types.get(k), null_values, strings_can_be_null @@ -287,7 +295,6 @@ def _to_nullable( else v for k, v in row.items() } - return nullable @staticmethod def _value_is_none( @@ -387,7 +394,7 @@ def _cast_types( if warnings: logger.warning( - f"{FileBasedSourceError.ERROR_CASTING_VALUE.value}: {','.join([w for w in warnings])}", + f"{FileBasedSourceError.ERROR_CASTING_VALUE.value}: {','.join(list(warnings))}", ) return result diff --git a/airbyte_cdk/sources/file_based/file_types/excel_parser.py b/airbyte_cdk/sources/file_based/file_types/excel_parser.py index 6e129e16..a5407031 100644 --- a/airbyte_cdk/sources/file_based/file_types/excel_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/excel_parser.py @@ -3,17 +3,12 @@ # from __future__ import annotations -import logging -from collections.abc import Iterable, Mapping -from io import IOBase -from pathlib import Path -from typing import Any +from typing import TYPE_CHECKING, Any import pandas as pd from numpy import datetime64, issubdtype from numpy import dtype as dtype_ from orjson import orjson -from pydantic.v1 import BaseModel from airbyte_cdk.sources.file_based.config.file_based_stream_config import ( ExcelFormat, @@ -29,8 +24,18 @@ FileReadMode, ) from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser -from airbyte_cdk.sources.file_based.remote_file import RemoteFile -from airbyte_cdk.sources.file_based.schema_helpers import SchemaType + + +if TYPE_CHECKING: + import logging + from collections.abc import Iterable, Mapping + from io import IOBase + from pathlib import Path + + from pydantic.v1 import BaseModel + + from airbyte_cdk.sources.file_based.remote_file import RemoteFile + from airbyte_cdk.sources.file_based.schema_helpers import SchemaType class ExcelParser(FileTypeParser): @@ -70,7 +75,7 @@ async def infer_schema( prev_frame_column_type = fields.get(column) fields[column] = self.dtype_to_json_type(prev_frame_column_type, df_type) - schema = { + return { field: ( {"type": "string", "format": "date-time"} if fields[field] == "date-time" @@ -78,7 +83,6 @@ async def infer_schema( ) for field in fields } - return schema def parse_records( self, diff --git a/airbyte_cdk/sources/file_based/file_types/file_transfer.py b/airbyte_cdk/sources/file_based/file_types/file_transfer.py index 5dbf7d79..9170b234 100644 --- a/airbyte_cdk/sources/file_based/file_types/file_transfer.py +++ b/airbyte_cdk/sources/file_based/file_types/file_transfer.py @@ -3,14 +3,19 @@ # from __future__ import annotations -import logging import os -from collections.abc import Iterable -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig -from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader -from airbyte_cdk.sources.file_based.remote_file import RemoteFile + +if TYPE_CHECKING: + import logging + from collections.abc import Iterable + + from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig + from airbyte_cdk.sources.file_based.file_based_stream_reader import ( + AbstractFileBasedStreamReader, + ) + from airbyte_cdk.sources.file_based.remote_file import RemoteFile AIRBYTE_STAGING_DIRECTORY = os.getenv("AIRBYTE_STAGING_DIRECTORY", "/staging/files") @@ -37,5 +42,8 @@ def get_file( file=file, local_directory=self._local_directory, logger=logger ) except Exception as ex: - logger.error("An error has occurred while getting file: %s", str(ex)) - raise ex + logger.error( + "An error has occurred while getting file: %s", + str(ex), + ) + raise diff --git a/airbyte_cdk/sources/file_based/file_types/file_type_parser.py b/airbyte_cdk/sources/file_based/file_types/file_type_parser.py index f6f1825a..65e7170f 100644 --- a/airbyte_cdk/sources/file_based/file_types/file_type_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/file_type_parser.py @@ -3,18 +3,21 @@ # from __future__ import annotations -import logging from abc import ABC, abstractmethod -from collections.abc import Iterable, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig -from airbyte_cdk.sources.file_based.file_based_stream_reader import ( - AbstractFileBasedStreamReader, - FileReadMode, -) -from airbyte_cdk.sources.file_based.remote_file import RemoteFile -from airbyte_cdk.sources.file_based.schema_helpers import SchemaType + +if TYPE_CHECKING: + import logging + from collections.abc import Iterable, Mapping + + from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig + from airbyte_cdk.sources.file_based.file_based_stream_reader import ( + AbstractFileBasedStreamReader, + FileReadMode, + ) + from airbyte_cdk.sources.file_based.remote_file import RemoteFile + from airbyte_cdk.sources.file_based.schema_helpers import SchemaType Record = dict[str, Any] diff --git a/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py b/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py index ce8a0b7d..2b2a2ee0 100644 --- a/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py @@ -4,20 +4,16 @@ from __future__ import annotations import json -import logging -from collections.abc import Iterable, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from orjson import orjson -from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError from airbyte_cdk.sources.file_based.file_based_stream_reader import ( AbstractFileBasedStreamReader, FileReadMode, ) from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser -from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.schema_helpers import ( PYTHON_TYPE_MAPPING, SchemaType, @@ -25,6 +21,14 @@ ) +if TYPE_CHECKING: + import logging + from collections.abc import Iterable, Mapping + + from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig + from airbyte_cdk.sources.file_based.remote_file import RemoteFile + + class JsonlParser(FileTypeParser): MAX_BYTES_PER_FILE_FOR_SCHEMA_INFERENCE = 1_000_000 ENCODING = "utf8" @@ -141,3 +145,4 @@ def _instantiate_accumulator(line: bytes | str) -> bytes | str: return bytes("", json.detect_encoding(line)) if isinstance(line, str): return "" + return None diff --git a/airbyte_cdk/sources/file_based/file_types/parquet_parser.py b/airbyte_cdk/sources/file_based/file_types/parquet_parser.py index e916b3bb..e9deaff6 100644 --- a/airbyte_cdk/sources/file_based/file_types/parquet_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/parquet_parser.py @@ -4,10 +4,8 @@ from __future__ import annotations import json -import logging import os -from collections.abc import Iterable, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from urllib.parse import unquote import pyarrow as pa @@ -28,8 +26,14 @@ FileReadMode, ) from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser -from airbyte_cdk.sources.file_based.remote_file import RemoteFile -from airbyte_cdk.sources.file_based.schema_helpers import SchemaType + + +if TYPE_CHECKING: + import logging + from collections.abc import Iterable, Mapping + + from airbyte_cdk.sources.file_based.remote_file import RemoteFile + from airbyte_cdk.sources.file_based.schema_helpers import SchemaType class ParquetParser(FileTypeParser): @@ -153,7 +157,7 @@ def _scalar_to_python_value(parquet_value: Scalar, parquet_format: ParquetFormat return str(parquet_value.as_py()) if pa.types.is_map(parquet_value.type): - return {k: v for k, v in parquet_value.as_py()} + return dict(parquet_value.as_py()) if pa.types.is_null(parquet_value.type): return None diff --git a/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py b/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py index 401b3f00..5d69aded 100644 --- a/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py @@ -3,12 +3,10 @@ # from __future__ import annotations -import logging import traceback -from collections.abc import Iterable, Mapping from datetime import datetime from io import BytesIO, IOBase -from typing import Any +from typing import TYPE_CHECKING, Any import backoff import dpath @@ -21,7 +19,6 @@ ) from airbyte_cdk.models import FailureType -from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.config.unstructured_format import ( APIParameterConfigModel, APIProcessingConfigModel, @@ -35,11 +32,18 @@ ) from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser from airbyte_cdk.sources.file_based.remote_file import RemoteFile -from airbyte_cdk.sources.file_based.schema_helpers import SchemaType from airbyte_cdk.utils import is_cloud_environment from airbyte_cdk.utils.traced_exception import AirbyteTracedException +if TYPE_CHECKING: + import logging + from collections.abc import Iterable, Mapping + + from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig + from airbyte_cdk.sources.file_based.schema_helpers import SchemaType + + unstructured_partition_pdf = None unstructured_partition_docx = None unstructured_partition_pptx = None @@ -146,15 +150,17 @@ def parse_records( # otherwise, we raise the error to fail the sync if format.skip_unprocessable_files: exception_str = str(e) - logger.warn(f"File {file.uri} caused an error during parsing: {exception_str}.") + logger.warning( + f"File {file.uri} caused an error during parsing: {exception_str}." + ) yield { "content": None, "document_key": file.uri, "_ab_source_file_parse_error": exception_str, } - logger.warn(f"File {file.uri} cannot be parsed. Skipping it.") + logger.warning(f"File {file.uri} cannot be parsed. Skipping it.") else: - raise e + raise def _read_file( self, @@ -174,7 +180,7 @@ def _read_file( filetype = self._get_filetype(file_handle, remote_file) - if filetype == FileType.MD or filetype == FileType.TXT: + if filetype in {FileType.MD, FileType.TXT}: file_content: bytes = file_handle.read() decoded_content: str = optional_decode(file_content) return decoded_content @@ -193,12 +199,13 @@ def _read_file( # For other exceptions, re-throw as config error so the sync is stopped as problems with the external API need to be resolved by the user and are not considered part of the SLA. # Once this parser leaves experimental stage, we should consider making this a system error instead for issues that might be transient. if isinstance(e, RecordParseError): - raise e + raise raise AirbyteTracedException.from_exception( e, failure_type=FailureType.config_error - ) + ) from None return result + return None def _params_to_dict( self, params: list[APIParameterConfigModel] | None, strategy: str diff --git a/airbyte_cdk/sources/file_based/remote_file.py b/airbyte_cdk/sources/file_based/remote_file.py index 9bc6ea02..4b46f02e 100644 --- a/airbyte_cdk/sources/file_based/remote_file.py +++ b/airbyte_cdk/sources/file_based/remote_file.py @@ -3,11 +3,15 @@ # from __future__ import annotations -from datetime import datetime +from typing import TYPE_CHECKING from pydantic.v1 import BaseModel +if TYPE_CHECKING: + from datetime import datetime + + class RemoteFile(BaseModel): """A file in a file-based stream.""" diff --git a/airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py b/airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py index dcfde911..d0d421fd 100644 --- a/airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +++ b/airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py @@ -4,8 +4,11 @@ from __future__ import annotations from abc import ABC, abstractmethod -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any + + +if TYPE_CHECKING: + from collections.abc import Mapping class AbstractSchemaValidationPolicy(ABC): diff --git a/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py b/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py index bd1e3a7f..01890a2d 100644 --- a/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +++ b/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py @@ -3,8 +3,7 @@ # from __future__ import annotations -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.file_based.config.file_based_stream_config import ValidationPolicy from airbyte_cdk.sources.file_based.exceptions import ( @@ -15,6 +14,10 @@ from airbyte_cdk.sources.file_based.schema_validation_policies import AbstractSchemaValidationPolicy +if TYPE_CHECKING: + from collections.abc import Mapping + + class EmitRecordPolicy(AbstractSchemaValidationPolicy): name = "emit_record" diff --git a/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py b/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py index bf492c90..4038ac77 100644 --- a/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +++ b/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py @@ -4,36 +4,44 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Iterable, Mapping from functools import cache, cached_property -from typing import Any +from typing import TYPE_CHECKING, Any from deprecated import deprecated -from airbyte_cdk import AirbyteMessage -from airbyte_cdk.models import SyncMode -from airbyte_cdk.sources.file_based.availability_strategy import ( - AbstractFileBasedAvailabilityStrategy, -) -from airbyte_cdk.sources.file_based.config.file_based_stream_config import ( - FileBasedStreamConfig, - PrimaryKeyType, -) -from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy from airbyte_cdk.sources.file_based.exceptions import ( FileBasedErrorsCollector, FileBasedSourceError, RecordParseError, UndefinedParserError, ) -from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader -from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser -from airbyte_cdk.sources.file_based.remote_file import RemoteFile -from airbyte_cdk.sources.file_based.schema_validation_policies import AbstractSchemaValidationPolicy -from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor -from airbyte_cdk.sources.file_based.types import StreamSlice from airbyte_cdk.sources.streams import Stream -from airbyte_cdk.sources.streams.checkpoint import Cursor + + +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + from airbyte_cdk import AirbyteMessage + from airbyte_cdk.models import SyncMode + from airbyte_cdk.sources.file_based.availability_strategy import ( + AbstractFileBasedAvailabilityStrategy, + ) + from airbyte_cdk.sources.file_based.config.file_based_stream_config import ( + FileBasedStreamConfig, + PrimaryKeyType, + ) + from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy + from airbyte_cdk.sources.file_based.file_based_stream_reader import ( + AbstractFileBasedStreamReader, + ) + from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser + from airbyte_cdk.sources.file_based.remote_file import RemoteFile + from airbyte_cdk.sources.file_based.schema_validation_policies import ( + AbstractSchemaValidationPolicy, + ) + from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor + from airbyte_cdk.sources.file_based.types import StreamSlice + from airbyte_cdk.sources.streams.checkpoint import Cursor class AbstractFileBasedStream(Stream): @@ -62,7 +70,7 @@ def __init__( validation_policy: AbstractSchemaValidationPolicy, errors_collector: FileBasedErrorsCollector, cursor: AbstractFileBasedCursor, - ): + ) -> None: super().__init__() self.config = config self.catalog_schema = catalog_schema diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py b/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py index 37b68c40..b458e3ff 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py @@ -4,7 +4,6 @@ from __future__ import annotations import copy -import logging from collections.abc import Iterable, Mapping, MutableMapping from functools import cache from typing import TYPE_CHECKING, Any @@ -19,20 +18,12 @@ SyncMode, Type, ) -from airbyte_cdk.sources import AbstractSource -from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager from airbyte_cdk.sources.file_based.availability_strategy import ( AbstractFileBasedAvailabilityStrategy, AbstractFileBasedAvailabilityStrategyWrapper, ) -from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType -from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser -from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream from airbyte_cdk.sources.file_based.stream.concurrent.cursor import FileBasedFinalStateCursor -from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor -from airbyte_cdk.sources.file_based.types import StreamSlice -from airbyte_cdk.sources.message import MessageRepository from airbyte_cdk.sources.source import ExperimentalClassWarning from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream @@ -44,15 +35,25 @@ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator from airbyte_cdk.sources.streams.concurrent.partitions.record import Record -from airbyte_cdk.sources.streams.core import StreamData -from airbyte_cdk.sources.utils.schema_helpers import InternalConfig -from airbyte_cdk.sources.utils.slice_logger import SliceLogger if TYPE_CHECKING: + import logging + + from airbyte_cdk.sources import AbstractSource + from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager + from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType + from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser + from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.stream.concurrent.cursor import ( AbstractConcurrentFileBasedCursor, ) + from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor + from airbyte_cdk.sources.file_based.types import StreamSlice + from airbyte_cdk.sources.message import MessageRepository + from airbyte_cdk.sources.streams.core import StreamData + from airbyte_cdk.sources.utils.schema_helpers import InternalConfig + from airbyte_cdk.sources.utils.slice_logger import SliceLogger """ This module contains adapters to help enabling concurrency on File-based Stream objects without needing to migrate to AbstractStream @@ -115,7 +116,7 @@ def __init__( cursor: AbstractFileBasedCursor, slice_logger: SliceLogger, logger: logging.Logger, - ): + ) -> None: """:param stream: The underlying AbstractStream""" self._abstract_stream = stream self._legacy_stream = legacy_stream @@ -194,7 +195,7 @@ def read_records( ) -> Iterable[StreamData]: try: yield from self._read_records() - except Exception as exc: + except Exception: if hasattr(self._cursor, "state"): state = str(self._cursor.state) else: @@ -206,7 +207,7 @@ def read_records( level=Level.ERROR, message=f"Cursor State at time of exception: {state}" ), ) - raise exc + raise def _read_records(self) -> Iterable[StreamData]: for partition in self._abstract_stream.generate_partitions(): @@ -226,7 +227,7 @@ def __init__( cursor_field: list[str] | None, state: MutableMapping[str, Any] | None, cursor: AbstractConcurrentFileBasedCursor, - ): + ) -> None: self._stream = stream self._slice = _slice self._message_repository = message_repository @@ -277,7 +278,7 @@ def read(self) -> Iterable[Record]: if display_message: raise ExceptionWithDisplayMessage(display_message) from e else: - raise e + raise def to_slice(self) -> Mapping[str, Any] | None: if self._slice is None: @@ -326,7 +327,7 @@ def __init__( cursor_field: list[str] | None, state: MutableMapping[str, Any] | None, cursor: AbstractConcurrentFileBasedCursor, - ): + ) -> None: self._stream = stream self._message_repository = message_repository self._sync_mode = sync_mode diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py index 9b5ba76e..271a18df 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py @@ -3,22 +3,23 @@ # from __future__ import annotations -import logging from abc import ABC, abstractmethod -from collections.abc import Iterable, MutableMapping -from datetime import datetime from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor -from airbyte_cdk.sources.file_based.types import StreamState from airbyte_cdk.sources.streams.concurrent.cursor import Cursor -from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition -from airbyte_cdk.sources.streams.concurrent.partitions.record import Record if TYPE_CHECKING: + import logging + from collections.abc import Iterable, MutableMapping + from datetime import datetime + + from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition + from airbyte_cdk.sources.file_based.types import StreamState + from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition + from airbyte_cdk.sources.streams.concurrent.partitions.record import Record class AbstractConcurrentFileBasedCursor(Cursor, AbstractFileBasedCursor, ABC): diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py index 8356a348..1c08fdd5 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py @@ -3,29 +3,31 @@ # from __future__ import annotations -import logging -from collections.abc import Iterable, MutableMapping +import operator from datetime import datetime, timedelta from threading import RLock from typing import TYPE_CHECKING, Any from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, Type -from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager -from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.stream.concurrent.cursor.abstract_concurrent_file_based_cursor import ( AbstractConcurrentFileBasedCursor, ) from airbyte_cdk.sources.file_based.stream.cursor import DefaultFileBasedCursor -from airbyte_cdk.sources.file_based.types import StreamState -from airbyte_cdk.sources.message.repository import MessageRepository -from airbyte_cdk.sources.streams.concurrent.cursor import CursorField -from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition -from airbyte_cdk.sources.streams.concurrent.partitions.record import Record if TYPE_CHECKING: + import logging + from collections.abc import Iterable, MutableMapping + + from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager + from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition + from airbyte_cdk.sources.file_based.types import StreamState + from airbyte_cdk.sources.message.repository import MessageRepository + from airbyte_cdk.sources.streams.concurrent.cursor import CursorField + from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition + from airbyte_cdk.sources.streams.concurrent.partitions.record import Record _NULL_FILE = "" @@ -90,7 +92,7 @@ def set_pending_partitions(self, partitions: list[FileBasedStreamPartition]) -> if _slice is None: continue for file in _slice["files"]: - if file.uri in self._pending_files.keys(): + if file.uri in self._pending_files: raise RuntimeError( f"Already found file {_slice} in pending files. This is unexpected. Please contact Support." ) @@ -124,7 +126,7 @@ def _compute_earliest_file_in_history(self) -> RemoteFile | None: with self._state_lock: if self._file_to_datetime_history: filename, last_modified = min( - self._file_to_datetime_history.items(), key=lambda f: (f[1], f[0]) + self._file_to_datetime_history.items(), key=operator.itemgetter(1, 0) ) return RemoteFile( uri=filename, @@ -205,7 +207,7 @@ def _compute_latest_file_in_history(self) -> RemoteFile | None: with self._state_lock: if self._file_to_datetime_history: filename, last_modified = max( - self._file_to_datetime_history.items(), key=lambda f: (f[1], f[0]) + self._file_to_datetime_history.items(), key=operator.itemgetter(1, 0) ) return RemoteFile( uri=filename, diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py index 8a750b52..18c5ebbb 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py @@ -3,26 +3,27 @@ # from __future__ import annotations -import logging -from collections.abc import Iterable, MutableMapping from datetime import datetime from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager -from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig -from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.stream.concurrent.cursor.abstract_concurrent_file_based_cursor import ( AbstractConcurrentFileBasedCursor, ) -from airbyte_cdk.sources.file_based.types import StreamState -from airbyte_cdk.sources.message import MessageRepository from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY -from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition -from airbyte_cdk.sources.streams.concurrent.partitions.record import Record if TYPE_CHECKING: + import logging + from collections.abc import Iterable, MutableMapping + + from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig + from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition + from airbyte_cdk.sources.file_based.types import StreamState + from airbyte_cdk.sources.message import MessageRepository + from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition + from airbyte_cdk.sources.streams.concurrent.partitions.record import Record class FileBasedFinalStateCursor(AbstractConcurrentFileBasedCursor): @@ -34,7 +35,7 @@ def __init__( message_repository: MessageRepository, stream_namespace: str | None, **kwargs: Any, - ): + ) -> None: self._stream_name = stream_config.name self._stream_namespace = stream_namespace self._message_repository = message_repository diff --git a/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py b/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py index d8a46002..14ae7e0d 100644 --- a/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py @@ -3,22 +3,25 @@ # from __future__ import annotations -import logging from abc import ABC, abstractmethod -from collections.abc import Iterable, MutableMapping -from datetime import datetime -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig -from airbyte_cdk.sources.file_based.remote_file import RemoteFile -from airbyte_cdk.sources.file_based.types import StreamState + +if TYPE_CHECKING: + import logging + from collections.abc import Iterable, MutableMapping + from datetime import datetime + + from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig + from airbyte_cdk.sources.file_based.remote_file import RemoteFile + from airbyte_cdk.sources.file_based.types import StreamState class AbstractFileBasedCursor(ABC): """Abstract base class for cursors used by file-based streams.""" @abstractmethod - def __init__(self, stream_config: FileBasedStreamConfig, **kwargs: Any): + def __init__(self, stream_config: FileBasedStreamConfig, **kwargs: Any) -> None: """Common interface for all cursors.""" ... diff --git a/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py b/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py index e0802ee8..903cbf40 100644 --- a/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py @@ -3,17 +3,22 @@ # from __future__ import annotations -import logging -from collections.abc import Iterable, MutableMapping +import operator from datetime import datetime, timedelta -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.stream.cursor.abstract_file_based_cursor import ( AbstractFileBasedCursor, ) -from airbyte_cdk.sources.file_based.types import StreamState + + +if TYPE_CHECKING: + import logging + from collections.abc import Iterable, MutableMapping + + from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig + from airbyte_cdk.sources.file_based.types import StreamState class DefaultFileBasedCursor(AbstractFileBasedCursor): @@ -22,7 +27,7 @@ class DefaultFileBasedCursor(AbstractFileBasedCursor): DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" CURSOR_FIELD = "_ab_source_file_last_modified" - def __init__(self, stream_config: FileBasedStreamConfig, **_: Any): + def __init__(self, stream_config: FileBasedStreamConfig, **_: Any) -> None: super().__init__(stream_config) self._file_to_datetime_history: MutableMapping[str, str] = {} self._time_window_if_history_is_full = timedelta( @@ -58,8 +63,7 @@ def add_file(self, file: RemoteFile) -> None: ) def get_state(self) -> StreamState: - state = {"history": self._file_to_datetime_history, self.CURSOR_FIELD: self._get_cursor()} - return state + return {"history": self._file_to_datetime_history, self.CURSOR_FIELD: self._get_cursor()} def _get_cursor(self) -> str | None: """Returns the cursor value. @@ -69,7 +73,7 @@ def _get_cursor(self) -> str | None: """ if self._file_to_datetime_history.items(): filename, timestamp = max( - self._file_to_datetime_history.items(), key=lambda x: (x[1], x[0]) + self._file_to_datetime_history.items(), key=operator.itemgetter(1, 0) ) return f"{timestamp}_{filename}" return None @@ -126,7 +130,7 @@ def get_start_time(self) -> datetime: def _compute_earliest_file_in_history(self) -> RemoteFile | None: if self._file_to_datetime_history: filename, last_modified = min( - self._file_to_datetime_history.items(), key=lambda f: (f[1], f[0]) + self._file_to_datetime_history.items(), key=operator.itemgetter(1, 0) ) return RemoteFile( uri=filename, last_modified=datetime.strptime(last_modified, self.DATE_TIME_FORMAT) diff --git a/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py b/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py index 969d6958..5a74c9c2 100644 --- a/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +++ b/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py @@ -6,14 +6,12 @@ import asyncio import itertools import traceback -from collections.abc import Iterable, Mapping, MutableMapping from copy import deepcopy from functools import cache -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, FailureType, Level from airbyte_cdk.models import Type as MessageType -from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType from airbyte_cdk.sources.file_based.exceptions import ( FileBasedSourceError, InvalidSchemaError, @@ -23,7 +21,6 @@ StopSyncPerValidationPolicy, ) from airbyte_cdk.sources.file_based.file_types import FileTransfer -from airbyte_cdk.sources.file_based.remote_file import RemoteFile from airbyte_cdk.sources.file_based.schema_helpers import ( SchemaType, file_transfer_schema, @@ -31,14 +28,21 @@ schemaless_schema, ) from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream -from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor -from airbyte_cdk.sources.file_based.types import StreamSlice from airbyte_cdk.sources.streams import IncrementalMixin -from airbyte_cdk.sources.streams.core import JsonSchema from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message from airbyte_cdk.utils.traced_exception import AirbyteTracedException +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping, MutableMapping + + from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType + from airbyte_cdk.sources.file_based.remote_file import RemoteFile + from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor + from airbyte_cdk.sources.file_based.types import StreamSlice + from airbyte_cdk.sources.streams.core import JsonSchema + + class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin): """The default file-based stream.""" @@ -51,7 +55,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin): airbyte_columns = [ab_last_mod_col, ab_file_name_col] use_file_transfer = False - def __init__(self, **kwargs: Any): + def __init__(self, **kwargs: Any) -> None: if self.FILE_TRANSFER_KW in kwargs: self.use_file_transfer = kwargs.pop(self.FILE_TRANSFER_KW, False) super().__init__(**kwargs) @@ -102,11 +106,10 @@ def compute_slices(self) -> Iterable[Mapping[str, Any] | None]: all_files = self.list_files() files_to_read = self._cursor.get_files_to_sync(all_files, self.logger) sorted_files_to_read = sorted(files_to_read, key=lambda f: (f.last_modified, f.uri)) - slices = [ + return [ {"files": list(group[1])} for group in itertools.groupby(sorted_files_to_read, lambda f: f.last_modified) ] - return slices def transform_record( self, record: dict[str, Any], file: RemoteFile, last_updated: str @@ -195,9 +198,9 @@ def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[Airbyte ), ) - except AirbyteTracedException as exc: + except AirbyteTracedException: # Re-raise the exception to stop the whole sync immediately as this is a fatal error - raise exc + raise except Exception: yield AirbyteMessage( @@ -240,9 +243,9 @@ def get_json_schema(self) -> JsonSchema: message=FileBasedSourceError.SCHEMA_INFERENCE_ERROR.value, exception=AirbyteTracedException(exception=config_exception), failure_type=FailureType.config_error, - ) - except AirbyteTracedException as ate: - raise ate + ) from None + except AirbyteTracedException: + raise except Exception as exc: raise SchemaInferenceError( FileBasedSourceError.SCHEMA_INFERENCE_ERROR, stream=self.name @@ -297,9 +300,7 @@ def _get_raw_json_schema(self) -> JsonSchema: stream=self.name, ) - schema = {"type": "object", "properties": inferred_schema} - - return schema + return {"type": "object", "properties": inferred_schema} def get_files(self) -> Iterable[RemoteFile]: """Return all files that belong to the stream as defined by the stream's globs.""" @@ -320,7 +321,7 @@ def _fill_nulls(schema: Mapping[str, Any]) -> Mapping[str, Any]: if k == "type": if isinstance(v, list): if "null" not in v: - schema[k] = ["null"] + v + schema[k] = ["null", *v] elif v != "null": schema[k] = ["null", v] else: @@ -355,8 +356,8 @@ async def _infer_schema(self, files: list[RemoteFile]) -> Mapping[str, Any]: for task in done: try: base_schema = merge_schemas(base_schema, task.result()) - except AirbyteTracedException as ate: - raise ate + except AirbyteTracedException: + raise except Exception as exc: self.logger.error( f"An error occurred inferring the schema. \n {traceback.format_exc()}", @@ -370,8 +371,8 @@ async def _infer_file_schema(self, file: RemoteFile) -> SchemaType: return await self.get_parser().infer_schema( self.config, file, self.stream_reader, self.logger ) - except AirbyteTracedException as ate: - raise ate + except AirbyteTracedException: + raise except Exception as exc: raise SchemaInferenceError( FileBasedSourceError.SCHEMA_INFERENCE_ERROR, diff --git a/airbyte_cdk/sources/http_logger.py b/airbyte_cdk/sources/http_logger.py index 0e246041..c8fca717 100644 --- a/airbyte_cdk/sources/http_logger.py +++ b/airbyte_cdk/sources/http_logger.py @@ -3,9 +3,13 @@ # from __future__ import annotations -import requests +from typing import TYPE_CHECKING -from airbyte_cdk.sources.message import LogMessage + +if TYPE_CHECKING: + import requests + + from airbyte_cdk.sources.message import LogMessage def format_http_message( @@ -13,7 +17,8 @@ def format_http_message( title: str, description: str, stream_name: str | None, - is_auxiliary: bool = None, + *, + is_auxiliary: bool | None = None, ) -> LogMessage: request = response.request log_message = { @@ -48,4 +53,4 @@ def format_http_message( def _normalize_body_string(body_str: str | bytes | None) -> str | None: - return body_str.decode() if isinstance(body_str, (bytes, bytearray)) else body_str + return body_str.decode() if isinstance(body_str, bytes | bytearray) else body_str diff --git a/airbyte_cdk/sources/message/repository.py b/airbyte_cdk/sources/message/repository.py index d06a9094..6f352e74 100644 --- a/airbyte_cdk/sources/message/repository.py +++ b/airbyte_cdk/sources/message/repository.py @@ -7,13 +7,17 @@ import logging from abc import ABC, abstractmethod from collections import deque -from collections.abc import Callable, Iterable +from typing import TYPE_CHECKING from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, Type from airbyte_cdk.sources.utils.types import JsonType from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets +if TYPE_CHECKING: + from collections.abc import Callable, Iterable + + _LOGGER = logging.getLogger("MessageRepository") _SUPPORTED_MESSAGE_TYPES = {Type.CONTROL, Type.LOG} LogMessage = dict[str, JsonType] @@ -102,7 +106,7 @@ def __init__( dict_to_append: LogMessage, decorated: MessageRepository, log_level: Level = Level.INFO, - ): + ) -> None: self._dict_to_append = dict_to_append self._decorated = decorated self._log_level = log_level @@ -128,10 +132,18 @@ def _append_second_to_first( for key in second: if key in first: if isinstance(first[key], dict) and isinstance(second[key], dict): - self._append_second_to_first(first[key], second[key], path + [str(key)]) # type: ignore # type is verified above + self._append_second_to_first( + first[key], + second[key], + [*path, str(key)], + ) else: if first[key] != second[key]: - _LOGGER.warning("Conflict at %s" % ".".join(path + [str(key)])) + _LOGGER.warning( + "Conflict at {}".format( + ".".join([*path, str(key)]), + ), + ) first[key] = second[key] else: first[key] = second[key] diff --git a/airbyte_cdk/sources/source.py b/airbyte_cdk/sources/source.py index 390af101..b5792f6c 100644 --- a/airbyte_cdk/sources/source.py +++ b/airbyte_cdk/sources/source.py @@ -3,10 +3,9 @@ # from __future__ import annotations -import logging from abc import ABC, abstractmethod from collections.abc import Iterable, Mapping -from typing import Any, Generic, TypeVar +from typing import TYPE_CHECKING, Any, Generic, TypeVar from airbyte_cdk.connector import BaseConnector, DefaultConnectorMixin, TConfig from airbyte_cdk.models import ( @@ -19,6 +18,10 @@ ) +if TYPE_CHECKING: + import logging + + TState = TypeVar("TState") TCatalog = TypeVar("TCatalog") diff --git a/airbyte_cdk/sources/streams/availability_strategy.py b/airbyte_cdk/sources/streams/availability_strategy.py index 04284fcf..202bfcca 100644 --- a/airbyte_cdk/sources/streams/availability_strategy.py +++ b/airbyte_cdk/sources/streams/availability_strategy.py @@ -3,18 +3,19 @@ # from __future__ import annotations -import logging import typing from abc import ABC, abstractmethod -from collections.abc import Mapping from typing import Any from airbyte_cdk.models import SyncMode -from airbyte_cdk.sources.streams.core import Stream, StreamData if typing.TYPE_CHECKING: + import logging + from collections.abc import Mapping + from airbyte_cdk.sources import Source + from airbyte_cdk.sources.streams.core import Stream, StreamData class AvailabilityStrategy(ABC): diff --git a/airbyte_cdk/sources/streams/call_rate.py b/airbyte_cdk/sources/streams/call_rate.py index 47fb013a..96394b6a 100644 --- a/airbyte_cdk/sources/streams/call_rate.py +++ b/airbyte_cdk/sources/streams/call_rate.py @@ -8,7 +8,6 @@ import datetime import logging import time -from collections.abc import Mapping from datetime import timedelta from threading import RLock from typing import TYPE_CHECKING, Any @@ -23,6 +22,7 @@ # prevents mypy from complaining about missing session attributes in LimiterMixin if TYPE_CHECKING: + from collections.abc import Mapping MIXIN_BASE = requests.Session else: MIXIN_BASE = object @@ -39,7 +39,14 @@ class Rate: class CallRateLimitHit(Exception): - def __init__(self, error: str, item: Any, weight: int, rate: str, time_to_wait: timedelta): + def __init__( + self, + error: str, + item: Any, + weight: int, + rate: str, + time_to_wait: timedelta, + ) -> None: """Constructor :param error: error message @@ -105,7 +112,7 @@ def __init__( url: str | None = None, params: Mapping[str, Any] | None = None, headers: Mapping[str, Any] | None = None, - ): + ) -> None: """Constructor :param method: @@ -139,9 +146,8 @@ def __call__(self, request: Any) -> bool: else: return False - if self._method is not None: - if prepared_request.method != self._method: - return False + if self._method is not None and prepared_request.method != self._method: + return False if self._url is not None and prepared_request.url is not None: url_without_params = prepared_request.url.split("?")[0] if url_without_params != self._url: @@ -158,7 +164,7 @@ def __call__(self, request: Any) -> bool: class BaseCallRatePolicy(AbstractCallRatePolicy, abc.ABC): - def __init__(self, matchers: list[RequestMatcher]): + def __init__(self, matchers: list[RequestMatcher]) -> None: self._matchers = matchers def matches(self, request: Any) -> bool: @@ -208,7 +214,7 @@ def __init__( period: timedelta, call_limit: int, matchers: list[RequestMatcher], - ): + ) -> None: """A policy that allows {call_limit} calls within a {period} time interval :param next_reset_ts: next call rate reset time point @@ -279,7 +285,7 @@ def _update_current_window(self) -> None: now = datetime.datetime.now() if now > self._next_reset_ts: logger.debug("started new window, %s calls available now", self._call_limit) - self._next_reset_ts = self._next_reset_ts + self._offset + self._next_reset_ts += self._offset self._calls_num = 0 @@ -290,7 +296,7 @@ class MovingWindowCallRatePolicy(BaseCallRatePolicy): This strategy requires saving of timestamps of all requests within a window. """ - def __init__(self, rates: list[Rate], matchers: list[RequestMatcher]): + def __init__(self, rates: list[Rate], matchers: list[RequestMatcher]) -> None: """Constructor :param rates: list of rates, the order is important and must be ascending @@ -440,7 +446,7 @@ def _do_acquire( """ last_exception = None # sometimes we spend all budget before a second attempt, so we have few more here - for attempt in range(1, self._maximum_attempts_to_acquire): + for _attempt in range(1, self._maximum_attempts_to_acquire): try: policy.try_acquire(request, weight=1) return @@ -478,7 +484,7 @@ def __init__( ratelimit_remaining_header: str = "ratelimit-remaining", status_codes_for_ratelimit_hit: tuple[int] = (429,), **kwargs: Any, - ): + ) -> None: """Constructor :param ratelimit_reset_header: name of the header that has a timestamp of the next reset of call budget @@ -524,7 +530,7 @@ def __init__( self, api_budget: AbstractAPIBudget, **kwargs: Any, - ): + ) -> None: self._api_budget = api_budget super().__init__(**kwargs) # type: ignore # Base Session doesn't take any kwargs diff --git a/airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py b/airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py index b5e72ac4..04f77c14 100644 --- a/airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +++ b/airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py @@ -4,12 +4,15 @@ from abc import ABC, abstractmethod from collections.abc import Iterable, Mapping from enum import Enum -from typing import Any +from typing import TYPE_CHECKING, Any -from .cursor import Cursor from airbyte_cdk.sources.types import StreamSlice +if TYPE_CHECKING: + from .cursor import Cursor + + class CheckpointMode(Enum): INCREMENTAL = "incremental" RESUMABLE_FULL_REFRESH = "resumable_full_refresh" @@ -50,7 +53,7 @@ class IncrementalCheckpointReader(CheckpointReader): def __init__( self, stream_state: Mapping[str, Any], stream_slices: Iterable[Mapping[str, Any] | None] - ): + ) -> None: self._state: Mapping[str, Any] | None = stream_state self._stream_slices = iter(stream_slices) self._has_slices = False @@ -88,7 +91,7 @@ def __init__( cursor: Cursor, stream_slices: Iterable[Mapping[str, Any] | None], read_state_from_cursor: bool = False, - ): + ) -> None: self._cursor = cursor self._stream_slices = iter(stream_slices) # read_state_from_cursor is used to delineate that partitions should determine when to stop syncing dynamically according @@ -224,7 +227,7 @@ def __init__( cursor: Cursor, stream_slices: Iterable[Mapping[str, Any] | None], read_state_from_cursor: bool = False, - ): + ) -> None: super().__init__( cursor=cursor, stream_slices=stream_slices, @@ -274,7 +277,7 @@ class ResumableFullRefreshCheckpointReader(CheckpointReader): fetching more pages or stopping the sync. """ - def __init__(self, stream_state: Mapping[str, Any]): + def __init__(self, stream_state: Mapping[str, Any]) -> None: # The first attempt of an RFR stream has an empty {} incoming state, but should still make a first attempt to read records # from the first page in next(). self._first_page = bool(stream_state == {}) @@ -300,7 +303,10 @@ class FullRefreshCheckpointReader(CheckpointReader): is not capable of managing state. At the end of a sync, a final state message is emitted to signal completion. """ - def __init__(self, stream_slices: Iterable[Mapping[str, Any] | None]): + def __init__( + self, + stream_slices: Iterable[Mapping[str, Any] | None], + ) -> None: self._stream_slices = iter(stream_slices) self._final_checkpoint = False diff --git a/airbyte_cdk/sources/streams/checkpoint/cursor.py b/airbyte_cdk/sources/streams/checkpoint/cursor.py index 9a059fee..542f035d 100644 --- a/airbyte_cdk/sources/streams/checkpoint/cursor.py +++ b/airbyte_cdk/sources/streams/checkpoint/cursor.py @@ -4,9 +4,11 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.types import Record, StreamSlice, StreamState + +if TYPE_CHECKING: + from airbyte_cdk.sources.types import Record, StreamSlice, StreamState class Cursor(ABC): diff --git a/airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py b/airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py index b79e5bee..a01dbce8 100644 --- a/airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +++ b/airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py @@ -2,8 +2,11 @@ from __future__ import annotations import json -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any + + +if TYPE_CHECKING: + from collections.abc import Mapping class PerPartitionKeySerializer: diff --git a/airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py b/airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py index 108ef260..c4473336 100644 --- a/airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +++ b/airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py @@ -2,10 +2,13 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.sources.streams.checkpoint import Cursor -from airbyte_cdk.sources.types import Record, StreamSlice, StreamState + + +if TYPE_CHECKING: + from airbyte_cdk.sources.types import Record, StreamSlice, StreamState @dataclass diff --git a/airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py b/airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py index 0bcb0adf..d780ab04 100644 --- a/airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +++ b/airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py @@ -1,19 +1,23 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from __future__ import annotations -from collections.abc import Mapping, MutableMapping from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.models import FailureType from airbyte_cdk.sources.streams.checkpoint import Cursor from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import ( PerPartitionKeySerializer, ) -from airbyte_cdk.sources.types import Record, StreamSlice, StreamState from airbyte_cdk.utils import AirbyteTracedException +if TYPE_CHECKING: + from collections.abc import Mapping, MutableMapping + + from airbyte_cdk.sources.types import Record, StreamSlice, StreamState + + FULL_REFRESH_COMPLETE_STATE: Mapping[str, Any] = {"__ab_full_refresh_sync_complete": True} diff --git a/airbyte_cdk/sources/streams/concurrent/abstract_stream.py b/airbyte_cdk/sources/streams/concurrent/abstract_stream.py index 9f113cf8..ad372e54 100644 --- a/airbyte_cdk/sources/streams/concurrent/abstract_stream.py +++ b/airbyte_cdk/sources/streams/concurrent/abstract_stream.py @@ -4,16 +4,20 @@ from __future__ import annotations from abc import ABC, abstractmethod -from collections.abc import Iterable, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from deprecated.classic import deprecated -from airbyte_cdk.models import AirbyteStream from airbyte_cdk.sources.source import ExperimentalClassWarning -from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability -from airbyte_cdk.sources.streams.concurrent.cursor import Cursor -from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition + + +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + from airbyte_cdk.models import AirbyteStream + from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability + from airbyte_cdk.sources.streams.concurrent.cursor import Cursor + from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning) diff --git a/airbyte_cdk/sources/streams/concurrent/adapters.py b/airbyte_cdk/sources/streams/concurrent/adapters.py index 0b15be5e..23ec6aaf 100644 --- a/airbyte_cdk/sources/streams/concurrent/adapters.py +++ b/airbyte_cdk/sources/streams/concurrent/adapters.py @@ -5,10 +5,9 @@ import copy import json -import logging from collections.abc import Iterable, Mapping, MutableMapping from functools import cache -from typing import Any +from typing import TYPE_CHECKING, Any from deprecated.classic import deprecated @@ -21,9 +20,6 @@ SyncMode, Type, ) -from airbyte_cdk.sources import AbstractSource, Source -from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager -from airbyte_cdk.sources.message import MessageRepository from airbyte_cdk.sources.source import ExperimentalClassWarning from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy @@ -42,13 +38,21 @@ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator from airbyte_cdk.sources.streams.concurrent.partitions.record import Record -from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import ( - DateTimeStreamStateConverter, -) -from airbyte_cdk.sources.streams.core import StreamData from airbyte_cdk.sources.types import StreamSlice -from airbyte_cdk.sources.utils.schema_helpers import InternalConfig -from airbyte_cdk.sources.utils.slice_logger import SliceLogger + + +if TYPE_CHECKING: + import logging + + from airbyte_cdk.sources import AbstractSource, Source + from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager + from airbyte_cdk.sources.message import MessageRepository + from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import ( + DateTimeStreamStateConverter, + ) + from airbyte_cdk.sources.streams.core import StreamData + from airbyte_cdk.sources.utils.schema_helpers import InternalConfig + from airbyte_cdk.sources.utils.slice_logger import SliceLogger """ @@ -133,7 +137,7 @@ def __init__( cursor: Cursor, slice_logger: SliceLogger, logger: logging.Logger, - ): + ) -> None: """:param stream: The underlying AbstractStream""" self._abstract_stream = stream self._legacy_stream = legacy_stream @@ -161,7 +165,7 @@ def read_records( ) -> Iterable[StreamData]: try: yield from self._read_records() - except Exception as exc: + except Exception: if hasattr(self._cursor, "state"): state = str(self._cursor.state) else: @@ -173,7 +177,7 @@ def read_records( level=Level.ERROR, message=f"Cursor State at time of exception: {state}" ), ) - raise exc + raise def _read_records(self) -> Iterable[StreamData]: for partition in self._abstract_stream.generate_partitions(): @@ -257,7 +261,7 @@ def __init__( cursor_field: list[str] | None, state: MutableMapping[str, Any] | None, cursor: Cursor, - ): + ) -> None: """:param stream: The stream to delegate to :param _slice: The partition's stream_slice :param message_repository: The message repository to use to emit non-record messages @@ -302,7 +306,7 @@ def read(self) -> Iterable[Record]: if display_message: raise ExceptionWithDisplayMessage(display_message) from e else: - raise e + raise def to_slice(self) -> Mapping[str, Any] | None: return self._slice @@ -343,7 +347,7 @@ def __init__( cursor_field: list[str] | None, state: MutableMapping[str, Any] | None, cursor: Cursor, - ): + ) -> None: """:param stream: The stream to delegate to :param message_repository: The message repository to use to emit non-record messages """ @@ -387,7 +391,7 @@ def __init__( connector_state_converter: DateTimeStreamStateConverter, cursor_field: list[str] | None, slice_boundary_fields: tuple[str, str] | None, - ): + ) -> None: """Initialize the CursorPartitionGenerator with a stream, sync mode, and cursor. :param stream: The stream to delegate to for partition generation. @@ -448,11 +452,17 @@ def generate(self) -> Iterable[Partition]: category=ExperimentalClassWarning, ) class AvailabilityStrategyFacade(AvailabilityStrategy): - def __init__(self, abstract_availability_strategy: AbstractAvailabilityStrategy): + def __init__( + self, + abstract_availability_strategy: AbstractAvailabilityStrategy, + ) -> None: self._abstract_availability_strategy = abstract_availability_strategy def check_availability( - self, stream: Stream, logger: logging.Logger, source: Source | None = None + self, + stream: Stream, + logger: logging.Logger, + source: Source | None = None, ) -> tuple[bool, str | None]: """Checks stream availability. diff --git a/airbyte_cdk/sources/streams/concurrent/availability_strategy.py b/airbyte_cdk/sources/streams/concurrent/availability_strategy.py index 22d14fd9..24adb816 100644 --- a/airbyte_cdk/sources/streams/concurrent/availability_strategy.py +++ b/airbyte_cdk/sources/streams/concurrent/availability_strategy.py @@ -3,14 +3,18 @@ # from __future__ import annotations -import logging from abc import ABC, abstractmethod +from typing import TYPE_CHECKING from deprecated.classic import deprecated from airbyte_cdk.sources.source import ExperimentalClassWarning +if TYPE_CHECKING: + import logging + + class StreamAvailability(ABC): @abstractmethod def is_available(self) -> bool: @@ -30,7 +34,7 @@ def message(self) -> str | None: class StreamUnavailable(StreamAvailability): - def __init__(self, message: str): + def __init__(self, message: str) -> None: self._message = message def is_available(self) -> bool: diff --git a/airbyte_cdk/sources/streams/concurrent/cursor.py b/airbyte_cdk/sources/streams/concurrent/cursor.py index 448c78cd..812e1d2c 100644 --- a/airbyte_cdk/sources/streams/concurrent/cursor.py +++ b/airbyte_cdk/sources/streams/concurrent/cursor.py @@ -5,17 +5,21 @@ import functools from abc import ABC, abstractmethod -from collections.abc import Callable, Iterable, Mapping, MutableMapping -from typing import Any, Protocol +from typing import TYPE_CHECKING, Any, Protocol from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager -from airbyte_cdk.sources.message import MessageRepository from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY -from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition -from airbyte_cdk.sources.streams.concurrent.partitions.record import Record -from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import ( - AbstractStreamStateConverter, -) + + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Mapping, MutableMapping + + from airbyte_cdk.sources.message import MessageRepository + from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition + from airbyte_cdk.sources.streams.concurrent.partitions.record import Record + from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import ( + AbstractStreamStateConverter, + ) def _extract_value(mapping: Mapping[str, Any], path: list[str]) -> Any: diff --git a/airbyte_cdk/sources/streams/concurrent/default_stream.py b/airbyte_cdk/sources/streams/concurrent/default_stream.py index acef018f..bf8a8310 100644 --- a/airbyte_cdk/sources/streams/concurrent/default_stream.py +++ b/airbyte_cdk/sources/streams/concurrent/default_stream.py @@ -3,20 +3,26 @@ # from __future__ import annotations -from collections.abc import Iterable, Mapping from functools import cache -from logging import Logger -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.models import AirbyteStream, SyncMode from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream -from airbyte_cdk.sources.streams.concurrent.availability_strategy import ( - AbstractAvailabilityStrategy, - StreamAvailability, -) -from airbyte_cdk.sources.streams.concurrent.cursor import Cursor -from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition -from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator + + +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + from logging import Logger + + from airbyte_cdk.sources.streams.concurrent.availability_strategy import ( + AbstractAvailabilityStrategy, + StreamAvailability, + ) + from airbyte_cdk.sources.streams.concurrent.cursor import Cursor + from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition + from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import ( + PartitionGenerator, + ) class DefaultStream(AbstractStream): diff --git a/airbyte_cdk/sources/streams/concurrent/exceptions.py b/airbyte_cdk/sources/streams/concurrent/exceptions.py index cc8b1149..96c380fd 100644 --- a/airbyte_cdk/sources/streams/concurrent/exceptions.py +++ b/airbyte_cdk/sources/streams/concurrent/exceptions.py @@ -9,7 +9,11 @@ class ExceptionWithDisplayMessage(Exception): """Exception that can be used to display a custom message to the user.""" - def __init__(self, display_message: str, **kwargs: Any): + def __init__( + self, + display_message: str, + **kwargs: Any, + ) -> None: super().__init__(**kwargs) self.display_message = display_message diff --git a/airbyte_cdk/sources/streams/concurrent/helpers.py b/airbyte_cdk/sources/streams/concurrent/helpers.py index b469c926..b1854e78 100644 --- a/airbyte_cdk/sources/streams/concurrent/helpers.py +++ b/airbyte_cdk/sources/streams/concurrent/helpers.py @@ -1,7 +1,11 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. from __future__ import annotations -from airbyte_cdk.sources.streams import Stream +from typing import TYPE_CHECKING + + +if TYPE_CHECKING: + from airbyte_cdk.sources.streams import Stream def get_primary_key_from_stream( diff --git a/airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py b/airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py index 6508b8c3..6ec7e399 100644 --- a/airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +++ b/airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py @@ -4,15 +4,20 @@ from __future__ import annotations import time -from queue import Queue +from typing import TYPE_CHECKING from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentinel import ( PartitionGenerationCompletedSentinel, ) from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException -from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager -from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream -from airbyte_cdk.sources.streams.concurrent.partitions.types import QueueItem + + +if TYPE_CHECKING: + from queue import Queue + + from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager + from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream + from airbyte_cdk.sources.streams.concurrent.partitions.types import QueueItem class PartitionEnqueuer: diff --git a/airbyte_cdk/sources/streams/concurrent/partition_reader.py b/airbyte_cdk/sources/streams/concurrent/partition_reader.py index fa44a1e4..afa0a08f 100644 --- a/airbyte_cdk/sources/streams/concurrent/partition_reader.py +++ b/airbyte_cdk/sources/streams/concurrent/partition_reader.py @@ -3,16 +3,21 @@ # from __future__ import annotations -from queue import Queue +from typing import TYPE_CHECKING from airbyte_cdk.sources.concurrent_source.stream_thread_exception import StreamThreadException -from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition from airbyte_cdk.sources.streams.concurrent.partitions.types import ( PartitionCompleteSentinel, QueueItem, ) +if TYPE_CHECKING: + from queue import Queue + + from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition + + class PartitionReader: """Generates records from a partition and puts them in a queue.""" diff --git a/airbyte_cdk/sources/streams/concurrent/partitions/partition.py b/airbyte_cdk/sources/streams/concurrent/partitions/partition.py index fbf64b4a..1da852a7 100644 --- a/airbyte_cdk/sources/streams/concurrent/partitions/partition.py +++ b/airbyte_cdk/sources/streams/concurrent/partitions/partition.py @@ -4,10 +4,13 @@ from __future__ import annotations from abc import ABC, abstractmethod -from collections.abc import Iterable, Mapping -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.sources.streams.concurrent.partitions.record import Record + +if TYPE_CHECKING: + from collections.abc import Iterable, Mapping + + from airbyte_cdk.sources.streams.concurrent.partitions.record import Record class Partition(ABC): diff --git a/airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py b/airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py index 471af4a6..fffa7443 100644 --- a/airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py +++ b/airbyte_cdk/sources/streams/concurrent/partitions/partition_generator.py @@ -4,9 +4,13 @@ from __future__ import annotations from abc import ABC, abstractmethod -from collections.abc import Iterable +from typing import TYPE_CHECKING -from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition + +if TYPE_CHECKING: + from collections.abc import Iterable + + from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition class PartitionGenerator(ABC): diff --git a/airbyte_cdk/sources/streams/concurrent/partitions/record.py b/airbyte_cdk/sources/streams/concurrent/partitions/record.py index d6877de9..4a995983 100644 --- a/airbyte_cdk/sources/streams/concurrent/partitions/record.py +++ b/airbyte_cdk/sources/streams/concurrent/partitions/record.py @@ -3,11 +3,12 @@ # from __future__ import annotations -from collections.abc import Mapping from typing import TYPE_CHECKING, Any if TYPE_CHECKING: + from collections.abc import Mapping + from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition @@ -18,8 +19,9 @@ def __init__( self, data: Mapping[str, Any], partition: Partition, + *, is_file_transfer_message: bool = False, - ): + ) -> None: self.data = data self.partition = partition self.is_file_transfer_message = is_file_transfer_message diff --git a/airbyte_cdk/sources/streams/concurrent/partitions/types.py b/airbyte_cdk/sources/streams/concurrent/partitions/types.py index 13931984..f3866579 100644 --- a/airbyte_cdk/sources/streams/concurrent/partitions/types.py +++ b/airbyte_cdk/sources/streams/concurrent/partitions/types.py @@ -17,7 +17,12 @@ class PartitionCompleteSentinel: Includes a pointer to the partition that was processed. """ - def __init__(self, partition: Partition, is_successful: bool = True): + def __init__( + self, + partition: Partition, + *, + is_successful: bool = True, + ) -> None: """:param partition: The partition that was processed""" self.partition = partition self.is_successful = is_successful diff --git a/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py b/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py index 7a377594..721873c9 100644 --- a/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +++ b/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py @@ -3,13 +3,15 @@ # from __future__ import annotations +import operator from abc import ABC, abstractmethod -from collections.abc import MutableMapping from enum import Enum from typing import TYPE_CHECKING, Any if TYPE_CHECKING: + from collections.abc import MutableMapping + from airbyte_cdk.sources.streams.concurrent.cursor import CursorField @@ -30,7 +32,11 @@ def _from_state_message(self, value: Any) -> Any: def _to_state_message(self, value: Any) -> Any: pass - def __init__(self, is_sequential_state: bool = True): + def __init__( + self, + *, + is_sequential_state: bool = True, + ) -> None: self._is_sequential_state = is_sequential_state def convert_to_state_message( @@ -128,7 +134,11 @@ def merge_intervals( return [] sorted_intervals = sorted( - intervals, key=lambda interval: (interval[self.START_KEY], interval[self.END_KEY]) + intervals, + key=operator.itemgetter( + self.START_KEY, + self.END_KEY, + ), ) merged_intervals = [sorted_intervals[0]] diff --git a/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py b/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py index 7cfaaf78..4a878fb5 100644 --- a/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +++ b/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py @@ -4,9 +4,8 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Callable, MutableMapping from datetime import datetime, timedelta, timezone -from typing import Any +from typing import TYPE_CHECKING, Any import pendulum from pendulum.datetime import DateTime @@ -14,13 +13,18 @@ # FIXME We would eventually like the Concurrent package do be agnostic of the declarative package. However, this is a breaking change and # the goal in the short term is only to fix the issue we are seeing for source-declarative-manifest. from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser -from airbyte_cdk.sources.streams.concurrent.cursor import CursorField from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import ( AbstractStreamStateConverter, ConcurrencyCompatibleStateType, ) +if TYPE_CHECKING: + from collections.abc import Callable, MutableMapping + + from airbyte_cdk.sources.streams.concurrent.cursor import CursorField + + class DateTimeStreamStateConverter(AbstractStreamStateConverter): def _from_state_message(self, value: Any) -> Any: return self.parse_timestamp(value) @@ -154,7 +158,7 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter): def __init__( self, is_sequential_state: bool = True, cursor_granularity: timedelta | None = None - ): + ) -> None: super().__init__(is_sequential_state=is_sequential_state) self._cursor_granularity = cursor_granularity or timedelta(milliseconds=1) @@ -184,7 +188,7 @@ def __init__( input_datetime_formats: list[str] | None = None, is_sequential_state: bool = True, cursor_granularity: timedelta | None = None, - ): + ) -> None: super().__init__( is_sequential_state=is_sequential_state, cursor_granularity=cursor_granularity ) diff --git a/airbyte_cdk/sources/streams/http/availability_strategy.py b/airbyte_cdk/sources/streams/http/availability_strategy.py index 7679eefe..e4f2f40f 100644 --- a/airbyte_cdk/sources/streams/http/availability_strategy.py +++ b/airbyte_cdk/sources/streams/http/availability_strategy.py @@ -3,16 +3,17 @@ # from __future__ import annotations -import logging import typing -from airbyte_cdk.sources.streams import Stream from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy from airbyte_cdk.utils.traced_exception import AirbyteTracedException if typing.TYPE_CHECKING: + import logging + from airbyte_cdk.sources import Source + from airbyte_cdk.sources.streams import Stream class HttpAvailabilityStrategy(AvailabilityStrategy): diff --git a/airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py b/airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py index e1e743b0..5727fe0d 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/backoff_strategy.py @@ -4,8 +4,11 @@ from __future__ import annotations from abc import ABC, abstractmethod +from typing import TYPE_CHECKING -import requests + +if TYPE_CHECKING: + import requests class BackoffStrategy(ABC): diff --git a/airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py b/airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py index db8efc89..4e4f7cdd 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py @@ -1,11 +1,15 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from __future__ import annotations -import requests +from typing import TYPE_CHECKING from .backoff_strategy import BackoffStrategy +if TYPE_CHECKING: + import requests + + class DefaultBackoffStrategy(BackoffStrategy): def backoff_time( self, diff --git a/airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py b/airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py index 62d033db..8a7ed0fd 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py @@ -3,7 +3,7 @@ # from __future__ import annotations -from collections.abc import Mapping +from typing import TYPE_CHECKING from requests.exceptions import InvalidSchema, InvalidURL, RequestException @@ -14,6 +14,10 @@ ) +if TYPE_CHECKING: + from collections.abc import Mapping + + DEFAULT_ERROR_MAPPING: Mapping[int | str | type[Exception], ErrorResolution] = { InvalidSchema: ErrorResolution( response_action=ResponseAction.FAIL, diff --git a/airbyte_cdk/sources/streams/http/error_handlers/error_handler.py b/airbyte_cdk/sources/streams/http/error_handlers/error_handler.py index 7c22555d..e73370be 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/error_handler.py @@ -2,10 +2,13 @@ from __future__ import annotations from abc import ABC, abstractmethod +from typing import TYPE_CHECKING -import requests -from .response_models import ErrorResolution +if TYPE_CHECKING: + import requests + + from .response_models import ErrorResolution class ErrorHandler(ABC): diff --git a/airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py b/airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py index e94fe452..54e409c7 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py @@ -4,8 +4,11 @@ from __future__ import annotations from abc import ABC, abstractmethod +from typing import TYPE_CHECKING -import requests + +if TYPE_CHECKING: + import requests class ErrorMessageParser(ABC): diff --git a/airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py b/airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py index 0a59c22f..efc601f5 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py @@ -3,9 +3,8 @@ # from __future__ import annotations -import logging -from collections.abc import Mapping from datetime import timedelta +from typing import TYPE_CHECKING import requests @@ -20,6 +19,11 @@ ) +if TYPE_CHECKING: + import logging + from collections.abc import Mapping + + class HttpStatusErrorHandler(ErrorHandler): def __init__( self, diff --git a/airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py b/airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py index 008fbad0..a83a9f85 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py @@ -3,10 +3,15 @@ # from __future__ import annotations +from typing import TYPE_CHECKING + import requests from airbyte_cdk.sources.streams.http.error_handlers import ErrorMessageParser -from airbyte_cdk.sources.utils.types import JsonType + + +if TYPE_CHECKING: + from airbyte_cdk.sources.utils.types import JsonType class JsonErrorMessageParser(ErrorMessageParser): diff --git a/airbyte_cdk/sources/streams/http/exceptions.py b/airbyte_cdk/sources/streams/http/exceptions.py index c904fe0b..409c40ef 100644 --- a/airbyte_cdk/sources/streams/http/exceptions.py +++ b/airbyte_cdk/sources/streams/http/exceptions.py @@ -12,7 +12,7 @@ def __init__( request: requests.PreparedRequest, response: requests.Response | Exception | None, error_message: str = "", - ): + ) -> None: if isinstance(response, requests.Response): error_message = ( error_message @@ -37,7 +37,7 @@ def __init__( request: requests.PreparedRequest, response: requests.Response | Exception | None, error_message: str = "", - ): + ) -> None: """:param backoff: how long to backoff in seconds :param request: the request that triggered this backoff exception :param response: the response that triggered the backoff exception diff --git a/airbyte_cdk/sources/streams/http/http.py b/airbyte_cdk/sources/streams/http/http.py index c3498706..638e8304 100644 --- a/airbyte_cdk/sources/streams/http/http.py +++ b/airbyte_cdk/sources/streams/http/http.py @@ -5,20 +5,17 @@ import logging from abc import ABC, abstractmethod -from collections.abc import Callable, Iterable, Mapping, MutableMapping from datetime import timedelta -from typing import Any +from typing import TYPE_CHECKING, Any from urllib.parse import urljoin import requests from deprecated import deprecated -from requests.auth import AuthBase from airbyte_cdk.models import AirbyteMessage, FailureType, SyncMode from airbyte_cdk.models import Type as MessageType from airbyte_cdk.sources.message.repository import InMemoryMessageRepository from airbyte_cdk.sources.streams.call_rate import APIBudget -from airbyte_cdk.sources.streams.checkpoint.cursor import Cursor from airbyte_cdk.sources.streams.checkpoint.resumable_full_refresh_cursor import ( ResumableFullRefreshCursor, ) @@ -37,14 +34,22 @@ ) from airbyte_cdk.sources.streams.http.http_client import HttpClient from airbyte_cdk.sources.types import Record, StreamSlice -from airbyte_cdk.sources.utils.types import JsonType + + +if TYPE_CHECKING: + from collections.abc import Callable, Iterable, Mapping, MutableMapping + + from requests.auth import AuthBase + + from airbyte_cdk.sources.streams.checkpoint.cursor import Cursor + from airbyte_cdk.sources.utils.types import JsonType # list of all possible HTTP methods which can be used for sending of request bodies BODY_REQUEST_METHODS = ("GET", "POST", "PUT", "PATCH") -class HttpStream(Stream, CheckpointMixin, ABC): +class HttpStream(Stream, CheckpointMixin, ABC): # noqa: PLR0904 # Too many public methods """Base abstract class for an Airbyte Stream using the HTTP protocol. Basic building block for users building an Airbyte source for a HTTP API.""" source_defined_cursor = True # Most HTTP streams use a source defined cursor (i.e: the user can't configure it like on a SQL table) @@ -52,7 +57,9 @@ class HttpStream(Stream, CheckpointMixin, ABC): None # Use this variable to define page size for API http requests with pagination support ) - def __init__(self, authenticator: AuthBase | None = None, api_budget: APIBudget | None = None): + def __init__( + self, authenticator: AuthBase | None = None, api_budget: APIBudget | None = None + ) -> None: self._exit_on_rate_limit: bool = False self._http_client = HttpClient( name=self.name, @@ -455,7 +462,7 @@ def _extract_slice_fields( if isinstance(stream_slice, StreamSlice): partition = stream_slice.partition cursor_slice = stream_slice.cursor_slice - remaining = {k: v for k, v in stream_slice.items()} + remaining = dict(stream_slice.items()) else: # RFR streams that implement stream_slices() to generate stream slices in the legacy mapping format are converted into a # structured stream slice mapping by the LegacyCursorBasedCheckpointReader. The structured mapping object has separate @@ -465,7 +472,7 @@ def _extract_slice_fields( remaining = { key: val for key, val in stream_slice.items() - if key != "partition" and key != "cursor_slice" + if key not in {"partition", "cursor_slice"} } return partition, cursor_slice, remaining @@ -523,7 +530,7 @@ def get_log_formatter(self) -> Callable[[requests.Response], Any] | None: class HttpSubStream(HttpStream, ABC): - def __init__(self, parent: HttpStream, **kwargs: Any): + def __init__(self, parent: HttpStream, **kwargs: Any) -> None: """:param parent: should be the instance of HttpStream class""" super().__init__(**kwargs) self.parent = parent @@ -568,7 +575,7 @@ def stream_slices( reason="You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead.", ) class HttpStreamAdapterBackoffStrategy(BackoffStrategy): - def __init__(self, stream: HttpStream): + def __init__(self, stream: HttpStream) -> None: self.stream = stream def backoff_time( diff --git a/airbyte_cdk/sources/streams/http/http_client.py b/airbyte_cdk/sources/streams/http/http_client.py index 89375196..cdde891d 100644 --- a/airbyte_cdk/sources/streams/http/http_client.py +++ b/airbyte_cdk/sources/streams/http/http_client.py @@ -6,9 +6,8 @@ import logging import os import urllib -from collections.abc import Callable, Mapping from pathlib import Path -from typing import Any +from typing import TYPE_CHECKING, Any import orjson import requests @@ -24,7 +23,6 @@ StreamDescriptor, ) from airbyte_cdk.sources.http_config import MAX_CONNECTION_POOL_SIZE -from airbyte_cdk.sources.message import MessageRepository from airbyte_cdk.sources.streams.call_rate import APIBudget, CachedLimiterSession, LimiterSession from airbyte_cdk.sources.streams.http.error_handlers import ( BackoffStrategy, @@ -54,6 +52,12 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException +if TYPE_CHECKING: + from collections.abc import Callable, Mapping + + from airbyte_cdk.sources.message import MessageRepository + + BODY_REQUEST_METHODS = ("GET", "POST", "PUT", "PATCH") @@ -90,7 +94,7 @@ def __init__( error_message_parser: ErrorMessageParser | None = None, disable_retries: bool = False, message_repository: MessageRepository | None = None, - ): + ) -> None: self._name = name self._api_budget: APIBudget = api_budget or APIBudget(policies=[]) if session: @@ -199,10 +203,7 @@ def _create_prepared_request( def _max_retries(self) -> int: """Determines the max retries based on the provided error handler.""" max_retries = None - if self._disable_retries: - max_retries = 0 - else: - max_retries = self._error_handler.max_retries + max_retries = 0 if self._disable_retries else self._error_handler.max_retries return max_retries if max_retries is not None else self._DEFAULT_MAX_RETRY @property @@ -242,15 +243,13 @@ def _send_with_retry( max_tries=max_tries, max_time=max_time ) # backoff handlers wrap _send, so it will always return a response - response = backoff_handler(rate_limit_backoff_handler(user_backoff_handler))( + return backoff_handler(rate_limit_backoff_handler(user_backoff_handler))( request, request_kwargs, log_formatter=log_formatter, exit_on_rate_limit=exit_on_rate_limit, ) # type: ignore # mypy can't infer that backoff_handler wraps _send - return response - def _send( self, request: requests.PreparedRequest, @@ -371,10 +370,11 @@ def _handle_error_resolution( self._logger.info(error_resolution.error_message or log_message) # TODO: Consider dynamic retry count depending on subsequent error codes - elif ( - error_resolution.response_action == ResponseAction.RETRY - or error_resolution.response_action == ResponseAction.RATE_LIMITED - ): + + elif error_resolution.response_action in { + ResponseAction.RETRY, + ResponseAction.RATE_LIMITED, + }: user_defined_backoff_time = None for backoff_strategy in self._backoff_strategies: backoff_time = backoff_strategy.backoff_time( @@ -416,9 +416,9 @@ def _handle_error_resolution( elif response: try: response.raise_for_status() - except requests.HTTPError as e: + except requests.HTTPError: self._logger.error(response.text) - raise e + raise @property def name(self) -> str: diff --git a/airbyte_cdk/sources/streams/http/rate_limiting.py b/airbyte_cdk/sources/streams/http/rate_limiting.py index e34555ba..6fdbfd51 100644 --- a/airbyte_cdk/sources/streams/http/rate_limiting.py +++ b/airbyte_cdk/sources/streams/http/rate_limiting.py @@ -48,16 +48,15 @@ def log_retry_attempt(details: Mapping[str, Any]) -> None: def should_give_up(exc: Exception) -> bool: # If a non-rate-limiting related 4XX error makes it this far, it means it was unexpected and probably consistent, so we shouldn't back off - if isinstance(exc, RequestException): - if exc.response is not None: - give_up: bool = ( - exc.response is not None - and exc.response.status_code != codes.too_many_requests - and 400 <= exc.response.status_code < 500 - ) - if give_up: - logger.info(f"Giving up for returned HTTP status: {exc.response.status_code!r}") - return give_up + if isinstance(exc, RequestException) and exc.response is not None: + give_up: bool = ( + exc.response is not None + and exc.response.status_code != codes.too_many_requests + and 400 <= exc.response.status_code < 500 + ) + if give_up: + logger.info(f"Giving up for returned HTTP status: {exc.response.status_code!r}") + return give_up # Only RequestExceptions are retryable, so if we get here, it's not retryable return False diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py index 37dc0266..a1f95aa0 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py @@ -5,28 +5,31 @@ import logging from abc import abstractmethod -from collections.abc import Mapping, MutableMapping from json import JSONDecodeError -from typing import Any +from typing import TYPE_CHECKING, Any import backoff import pendulum import requests from requests.auth import AuthBase -from ..exceptions import DefaultBackoffException from airbyte_cdk.models import FailureType, Level from airbyte_cdk.sources.http_logger import format_http_message from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository +from airbyte_cdk.sources.streams.http.exceptions import DefaultBackoffException from airbyte_cdk.utils import AirbyteTracedException from airbyte_cdk.utils.airbyte_secrets_utils import add_to_secrets +if TYPE_CHECKING: + from collections.abc import Mapping, MutableMapping + + logger = logging.getLogger("airbyte") _NOOP_MESSAGE_REPOSITORY = NoopMessageRepository() -class AbstractOauth2Authenticator(AuthBase): +class AbstractOauth2Authenticator(AuthBase): # noqa: PLR0904 # Too complex """Abstract class for an OAuth authenticators that implements the OAuth token refresh flow. The authenticator is designed to generically perform the refresh flow without regard to how config fields are get/set by delegating that behavior to the classes implementing the interface. diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py index 1685439b..d463640f 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py @@ -4,16 +4,19 @@ from __future__ import annotations from abc import abstractmethod -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from requests.auth import AuthBase +if TYPE_CHECKING: + from collections.abc import Mapping + + class AbstractHeaderAuthenticator(AuthBase): """Abstract class for an header-based authenticators that add a header to outgoing HTTP requests.""" - def __call__(self, request): + def __call__(self, request: Any) -> Any: """Attach the HTTP headers required to authenticate on the HTTP request""" request.headers.update(self.get_auth_header()) return request diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py index c107443c..1f537195 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py @@ -3,8 +3,7 @@ # from __future__ import annotations -from collections.abc import Mapping, Sequence -from typing import Any +from typing import TYPE_CHECKING, Any import dpath import pendulum @@ -19,6 +18,10 @@ ) +if TYPE_CHECKING: + from collections.abc import Mapping, Sequence + + class Oauth2Authenticator(AbstractOauth2Authenticator): """Generates OAuth2.0 access tokens from an OAuth2.0 refresh token and client credentials. The generated access token is attached to each request via the Authorization header. @@ -31,18 +34,19 @@ def __init__( client_id: str, client_secret: str, refresh_token: str, - scopes: list[str] = None, - token_expiry_date: pendulum.DateTime = None, - token_expiry_date_format: str = None, + scopes: list[str] | None = None, + token_expiry_date: pendulum.DateTime | None = None, + token_expiry_date_format: str | None = None, access_token_name: str = "access_token", expires_in_name: str = "expires_in", - refresh_request_body: Mapping[str, Any] = None, + refresh_request_body: Mapping[str, Any] | None = None, grant_type: str = "refresh_token", + *, token_expiry_is_time_of_expiration: bool = False, refresh_token_error_status_codes: tuple[int, ...] = (), refresh_token_error_key: str = "", refresh_token_error_values: tuple[str, ...] = (), - ): + ) -> None: self._token_refresh_endpoint = token_refresh_endpoint self._client_secret = client_secret self._client_id = client_id @@ -91,7 +95,7 @@ def get_grant_type(self) -> str: def get_token_expiry_date(self) -> pendulum.DateTime: return self._token_expiry_date - def set_token_expiry_date(self, value: str | int): + def set_token_expiry_date(self, value: str | int) -> None: self._token_expiry_date = self._parse_token_expiration_date(value) @property @@ -107,7 +111,7 @@ def access_token(self) -> str: return self._access_token @access_token.setter - def access_token(self, value: str): + def access_token(self, value: str) -> None: self._access_token = value @@ -124,11 +128,11 @@ def __init__( self, connector_config: Mapping[str, Any], token_refresh_endpoint: str, - scopes: list[str] = None, + scopes: list[str] | None = None, access_token_name: str = "access_token", expires_in_name: str = "expires_in", refresh_token_name: str = "refresh_token", - refresh_request_body: Mapping[str, Any] = None, + refresh_request_body: Mapping[str, Any] | None = None, grant_type: str = "refresh_token", client_id: str | None = None, client_secret: str | None = None, @@ -137,11 +141,12 @@ def __init__( token_expiry_date_config_path: Sequence[str] = ("credentials", "token_expiry_date"), token_expiry_date_format: str | None = None, message_repository: MessageRepository = NoopMessageRepository(), + *, token_expiry_is_time_of_expiration: bool = False, refresh_token_error_status_codes: tuple[int, ...] = (), refresh_token_error_key: str = "", refresh_token_error_values: tuple[str, ...] = (), - ): + ) -> None: """Args: connector_config (Mapping[str, Any]): The full connector configuration token_refresh_endpoint (str): Full URL to the token refresh endpoint @@ -209,13 +214,13 @@ def access_token(self) -> str: return dpath.get(self._connector_config, self._access_token_config_path, default="") @access_token.setter - def access_token(self, new_access_token: str): + def access_token(self, new_access_token: str) -> None: dpath.new(self._connector_config, self._access_token_config_path, new_access_token) def get_refresh_token(self) -> str: return dpath.get(self._connector_config, self._refresh_token_config_path, default="") - def set_refresh_token(self, new_refresh_token: str): + def set_refresh_token(self, new_refresh_token: str) -> None: dpath.new(self._connector_config, self._refresh_token_config_path, new_refresh_token) def get_token_expiry_date(self) -> pendulum.DateTime: @@ -224,9 +229,11 @@ def get_token_expiry_date(self) -> pendulum.DateTime: ) return pendulum.now().subtract(days=1) if expiry_date == "" else pendulum.parse(expiry_date) - def set_token_expiry_date(self, new_token_expiry_date): + def set_token_expiry_date(self, new_token_expiry_date) -> None: dpath.new( - self._connector_config, self._token_expiry_date_config_path, str(new_token_expiry_date) + self._connector_config, + self._token_expiry_date_config_path, + str(new_token_expiry_date), ) def token_has_expired(self) -> bool: @@ -235,7 +242,7 @@ def token_has_expired(self) -> bool: @staticmethod def get_new_token_expiry_date( - access_token_expires_in: str, token_expiry_date_format: str = None + access_token_expires_in: str, token_expiry_date_format: str | None = None ) -> pendulum.DateTime: if token_expiry_date_format: return pendulum.from_format(access_token_expires_in, token_expiry_date_format) diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/token.py b/airbyte_cdk/sources/streams/http/requests_native_auth/token.py index 5b03e458..652508bd 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/token.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/token.py @@ -27,7 +27,7 @@ def token(self) -> str: def __init__( self, tokens: list[str], auth_method: str = "Bearer", auth_header: str = "Authorization" - ): + ) -> None: self._auth_method = auth_method self._auth_header = auth_header self._tokens = tokens @@ -47,7 +47,12 @@ def auth_header(self) -> str: def token(self) -> str: return f"{self._auth_method} {self._token}" - def __init__(self, token: str, auth_method: str = "Bearer", auth_header: str = "Authorization"): + def __init__( + self, + token: str, + auth_method: str = "Bearer", + auth_header: str = "Authorization", + ) -> None: self._auth_header = auth_header self._auth_method = auth_method self._token = token @@ -72,7 +77,7 @@ def __init__( password: str = "", auth_method: str = "Basic", auth_header: str = "Authorization", - ): + ) -> None: auth_string = f"{username}:{password}".encode() b64_encoded = base64.b64encode(auth_string).decode("utf8") self._auth_header = auth_header diff --git a/airbyte_cdk/sources/types.py b/airbyte_cdk/sources/types.py index c72fd8e4..6ef81260 100644 --- a/airbyte_cdk/sources/types.py +++ b/airbyte_cdk/sources/types.py @@ -17,7 +17,11 @@ class Record(Mapping[str, Any]): - def __init__(self, data: Mapping[str, Any], associated_slice: StreamSlice | None): + def __init__( + self, + data: Mapping[str, Any], + associated_slice: StreamSlice | None, + ) -> None: self._data = data self._associated_slice = associated_slice diff --git a/airbyte_cdk/sources/utils/record_helper.py b/airbyte_cdk/sources/utils/record_helper.py index 4f64095a..7c8e0ce1 100644 --- a/airbyte_cdk/sources/utils/record_helper.py +++ b/airbyte_cdk/sources/utils/record_helper.py @@ -6,7 +6,7 @@ import time from collections.abc import Mapping from collections.abc import Mapping as ABCMapping -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.models import ( AirbyteLogMessage, @@ -16,10 +16,13 @@ ) from airbyte_cdk.models import Type as MessageType from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage -from airbyte_cdk.sources.streams.core import StreamData from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer +if TYPE_CHECKING: + from airbyte_cdk.sources.streams.core import StreamData + + def stream_data_to_airbyte_message( stream_name: str, data_or_message: StreamData, diff --git a/airbyte_cdk/sources/utils/schema_helpers.py b/airbyte_cdk/sources/utils/schema_helpers.py index 0a72cc5d..78efcd2d 100644 --- a/airbyte_cdk/sources/utils/schema_helpers.py +++ b/airbyte_cdk/sources/utils/schema_helpers.py @@ -25,13 +25,13 @@ class JsonFileLoader: pointing to shared_schema.json file instead of shared/shared_schema.json """ - def __init__(self, uri_base: str, shared: str): + def __init__(self, uri_base: str, shared: str) -> None: self.shared = shared self.uri_base = uri_base def __call__(self, uri: str) -> dict[str, Any]: uri = uri.replace(self.uri_base, f"{self.uri_base}/{self.shared}/") - with open(uri) as f: + with open(uri, encoding="utf-8") as f: data = json.load(f) if isinstance(data, dict): return data @@ -76,7 +76,7 @@ def _expand_refs(schema: Any, ref_resolver: RefResolver | None = None) -> None: ) # expand refs in definitions as well schema.update(definition) else: - for key, value in schema.items(): + for value in schema.values(): _expand_refs(value, ref_resolver=ref_resolver) elif isinstance(schema, list): for value in schema: @@ -93,7 +93,9 @@ def expand_refs(schema: Any) -> None: def rename_key(schema: Any, old_key: str, new_key: str) -> None: - """Iterate over nested dictionary and replace one key with another. Used to replace anyOf with oneOf. Recursive." + """Iterate over nested dictionary and replace one key with another. + + Used to replace anyOf with oneOf. Recursive. :param schema: schema that will be patched :param old_key: name of the key to replace @@ -102,7 +104,7 @@ def rename_key(schema: Any, old_key: str, new_key: str) -> None: if not isinstance(schema, MutableMapping): return - for key, value in schema.items(): + for value in schema.values(): rename_key(value, old_key, new_key) if old_key in schema: schema[new_key] = schema.pop(old_key) @@ -111,7 +113,7 @@ def rename_key(schema: Any, old_key: str, new_key: str) -> None: class ResourceSchemaLoader: """JSONSchema loader from package resources""" - def __init__(self, package_name: str): + def __init__(self, package_name: str) -> None: self.package_name = package_name def get_schema(self, name: str) -> dict[str, Any]: @@ -190,10 +192,7 @@ def is_limit_reached(self, records_counter: int) -> bool: :param records_counter - number of records already red :return True if limit reached, False otherwise """ - if self.limit: - if records_counter >= self.limit: - return True - return False + return bool(self.limit and records_counter >= self.limit) def split_config(config: Mapping[str, Any]) -> tuple[dict[str, Any], InternalConfig]: diff --git a/airbyte_cdk/sources/utils/slice_logger.py b/airbyte_cdk/sources/utils/slice_logger.py index 28fc6aa0..2154931c 100644 --- a/airbyte_cdk/sources/utils/slice_logger.py +++ b/airbyte_cdk/sources/utils/slice_logger.py @@ -6,13 +6,16 @@ import json import logging from abc import ABC, abstractmethod -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level from airbyte_cdk.models import Type as MessageType +if TYPE_CHECKING: + from collections.abc import Mapping + + class SliceLogger(ABC): """SliceLogger is an interface that allows us to log slices of data in a uniform way. It is responsible for determining whether or not a slice should be logged and for creating the log message. diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index d3d7e1d4..8ea8069b 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -4,14 +4,17 @@ from __future__ import annotations import logging -from collections.abc import Callable, Mapping from distutils.util import strtobool from enum import Flag, auto -from typing import Any +from typing import TYPE_CHECKING, Any from jsonschema import Draft7Validator, ValidationError, validators +if TYPE_CHECKING: + from collections.abc import Callable, Mapping + + json_to_python_simple = { "string": str, "number": float, @@ -48,7 +51,7 @@ class TypeTransformer: _custom_normalizer: Callable[[Any, dict[str, Any]], Any] | None = None - def __init__(self, config: TransformConfig): + def __init__(self, config: TransformConfig) -> None: """Initialize TypeTransformer instance. :param config Transform config that would be applied to object """ @@ -59,7 +62,7 @@ def __init__(self, config: TransformConfig): key: self.__get_normalizer(key, orig_validator) for key, orig_validator in Draft7Validator.VALIDATORS.items() # Do not validate field we do not transform for maximum performance. - if key in ["type", "array", "$ref", "properties", "items"] + if key in {"type", "array", "$ref", "properties", "items"} } self._normalizer = validators.create( meta_schema=Draft7Validator.META_SCHEMA, validators=all_validators @@ -183,7 +186,7 @@ def resolve(subschema): return normalizator - def transform(self, record: dict[str, Any], schema: Mapping[str, Any]): + def transform(self, record: dict[str, Any], schema: Mapping[str, Any]) -> None: """Normalize and validate according to config. :param record: record instance for normalization/transformation. All modification are done by modifying existent object. :param schema: object's jsonschema for normalization. diff --git a/airbyte_cdk/sql/shared/catalog_providers.py b/airbyte_cdk/sql/shared/catalog_providers.py index 64bac21a..da66d1a7 100644 --- a/airbyte_cdk/sql/shared/catalog_providers.py +++ b/airbyte_cdk/sql/shared/catalog_providers.py @@ -10,13 +10,12 @@ from typing import TYPE_CHECKING, Any, cast, final -from airbyte_cdk.models import ConfiguredAirbyteCatalog from airbyte_cdk.sql import exceptions as exc from airbyte_cdk.sql._util.name_normalizers import LowerCaseNormalizer if TYPE_CHECKING: - from airbyte_cdk.models import ConfiguredAirbyteStream + from airbyte_cdk.models import ConfiguredAirbyteCatalog, ConfiguredAirbyteStream class CatalogProvider: diff --git a/airbyte_cdk/sql/shared/sql_processor.py b/airbyte_cdk/sql/shared/sql_processor.py index d7a39863..654633ff 100644 --- a/airbyte_cdk/sql/shared/sql_processor.py +++ b/airbyte_cdk/sql/shared/sql_processor.py @@ -7,7 +7,6 @@ from collections import defaultdict from contextlib import contextmanager from functools import cached_property -from pathlib import Path from typing import TYPE_CHECKING, Any, final import pandas as pd @@ -18,8 +17,6 @@ from sqlalchemy import Column, Table, and_, create_engine, insert, null, select, text, update from sqlalchemy.exc import ProgrammingError, SQLAlchemyError -from airbyte_protocol_dataclasses.models import AirbyteStateMessage - from airbyte_cdk.sql import exceptions as exc from airbyte_cdk.sql._util.hashing import one_way_hash from airbyte_cdk.sql._util.name_normalizers import LowerCaseNormalizer @@ -35,6 +32,7 @@ if TYPE_CHECKING: from collections.abc import Generator + from pathlib import Path from sqlalchemy.engine import Connection, Engine from sqlalchemy.engine.cursor import CursorResult @@ -43,6 +41,8 @@ from sqlalchemy.sql.elements import TextClause from sqlalchemy.sql.type_api import TypeEngine + from airbyte_protocol_dataclasses.models import AirbyteStateMessage + from airbyte_cdk.sql.shared.catalog_providers import CatalogProvider diff --git a/airbyte_cdk/test/catalog_builder.py b/airbyte_cdk/test/catalog_builder.py index 23162e1a..8fa088dd 100644 --- a/airbyte_cdk/test/catalog_builder.py +++ b/airbyte_cdk/test/catalog_builder.py @@ -78,5 +78,5 @@ def with_stream( def build(self) -> ConfiguredAirbyteCatalog: return ConfiguredAirbyteCatalog( - streams=list(map(lambda builder: builder.build(), self._streams)) + streams=[builder.build() for builder in self._streams] ) diff --git a/airbyte_cdk/test/entrypoint_wrapper.py b/airbyte_cdk/test/entrypoint_wrapper.py index 37d4dbae..e6dd6d6c 100644 --- a/airbyte_cdk/test/entrypoint_wrapper.py +++ b/airbyte_cdk/test/entrypoint_wrapper.py @@ -20,10 +20,9 @@ import re import tempfile import traceback -from collections.abc import Mapping from io import StringIO from pathlib import Path -from typing import Any +from typing import TYPE_CHECKING, Any from orjson import orjson from pydantic import ValidationError as V2ValidationError @@ -45,11 +44,20 @@ TraceType, Type, ) -from airbyte_cdk.sources import Source + + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.sources import Source class EntrypointOutput: - def __init__(self, messages: list[str], uncaught_exception: BaseException | None = None): + def __init__( + self, + messages: list[str], + uncaught_exception: BaseException | None = None, + ) -> None: try: self._messages = [self._parse_message(message) for message in messages] except V2ValidationError as exception: @@ -115,12 +123,12 @@ def catalog(self) -> AirbyteMessage: return catalog[0] def get_stream_statuses(self, stream_name: str) -> list[AirbyteStreamStatus]: - status_messages = map( - lambda message: message.trace.stream_status.status, # type: ignore - filter( + status_messages = ( + message.trace.stream_status.status + for message in filter( lambda message: message.trace.stream_status.stream_descriptor.name == stream_name, # type: ignore # callable; trace has `stream_status` self._get_trace_message_by_trace_type(TraceType.STREAM_STATUS), - ), + ) ) return list(status_messages) diff --git a/airbyte_cdk/test/mock_http/matcher.py b/airbyte_cdk/test/mock_http/matcher.py index 4221e00e..6ce64efb 100644 --- a/airbyte_cdk/test/mock_http/matcher.py +++ b/airbyte_cdk/test/mock_http/matcher.py @@ -1,11 +1,15 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. from __future__ import annotations -from airbyte_cdk.test.mock_http.request import HttpRequest +from typing import TYPE_CHECKING + + +if TYPE_CHECKING: + from airbyte_cdk.test.mock_http.request import HttpRequest class HttpRequestMatcher: - def __init__(self, request: HttpRequest, minimum_number_of_expected_match: int): + def __init__(self, request: HttpRequest, minimum_number_of_expected_match: int) -> None: self._request_to_match = request self._minimum_number_of_expected_match = minimum_number_of_expected_match self._actual_number_of_matches = 0 diff --git a/airbyte_cdk/test/mock_http/mocker.py b/airbyte_cdk/test/mock_http/mocker.py index 106f5f9f..6d759a46 100644 --- a/airbyte_cdk/test/mock_http/mocker.py +++ b/airbyte_cdk/test/mock_http/mocker.py @@ -3,15 +3,19 @@ import contextlib import functools -from collections.abc import Callable from enum import Enum -from types import TracebackType +from typing import TYPE_CHECKING import requests_mock from airbyte_cdk.test.mock_http import HttpRequest, HttpRequestMatcher, HttpResponse +if TYPE_CHECKING: + from collections.abc import Callable + from types import TracebackType + + class SupportedHttpMethods(str, Enum): GET = "get" PATCH = "patch" @@ -134,7 +138,7 @@ def wrapper(*args, **kwargs): # type: ignore # this is a very generic wrapper result = f(*args, **kwargs) except requests_mock.NoMockAddress as no_mock_exception: matchers_as_string = "\n\t".join( - map(lambda matcher: str(matcher.request), self._matchers) + str(matcher.request) for matcher in self._matchers ) raise ValueError( f"No matcher matches {no_mock_exception.args[0]} with headers `{no_mock_exception.request.headers}` " diff --git a/airbyte_cdk/test/mock_http/response.py b/airbyte_cdk/test/mock_http/response.py index 0ce900ab..67468c79 100644 --- a/airbyte_cdk/test/mock_http/response.py +++ b/airbyte_cdk/test/mock_http/response.py @@ -1,14 +1,18 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. from __future__ import annotations -from collections.abc import Mapping from types import MappingProxyType +from typing import TYPE_CHECKING + + +if TYPE_CHECKING: + from collections.abc import Mapping class HttpResponse: def __init__( self, body: str, status_code: int = 200, headers: Mapping[str, str] = MappingProxyType({}) - ): + ) -> None: self._body = body self._status_code = status_code self._headers = headers diff --git a/airbyte_cdk/test/mock_http/response_builder.py b/airbyte_cdk/test/mock_http/response_builder.py index 8089adf4..6ab9e578 100644 --- a/airbyte_cdk/test/mock_http/response_builder.py +++ b/airbyte_cdk/test/mock_http/response_builder.py @@ -4,13 +4,16 @@ import functools import json from abc import ABC, abstractmethod -from pathlib import Path as FilePath -from typing import Any +from typing import TYPE_CHECKING, Any from airbyte_cdk.test.mock_http import HttpResponse from airbyte_cdk.test.utils.data import get_unit_test_folder +if TYPE_CHECKING: + from pathlib import Path as FilePath + + def _extract(path: list[str], response_template: dict[str, Any]) -> Any: return functools.reduce(lambda a, b: a[b], path, response_template) @@ -43,7 +46,7 @@ def extract(self, template: dict[str, Any]) -> Any: class FieldPath(Path): - def __init__(self, field: str): + def __init__(self, field: str) -> None: self._path = [field] def write(self, template: dict[str, Any], value: Any) -> None: @@ -60,7 +63,7 @@ def __str__(self) -> str: class NestedPath(Path): - def __init__(self, path: list[str]): + def __init__(self, path: list[str]) -> None: self._path = path def write(self, template: dict[str, Any], value: Any) -> None: @@ -83,7 +86,7 @@ def update(self, response: dict[str, Any]) -> None: class FieldUpdatePaginationStrategy(PaginationStrategy): - def __init__(self, path: Path, value: Any): + def __init__(self, path: Path, value: Any) -> None: self._path = path self._value = value @@ -97,7 +100,7 @@ def __init__( template: dict[str, Any], id_path: Path | None, cursor_path: FieldPath | NestedPath | None, - ): + ) -> None: self._record = template self._id_path = id_path self._cursor_path = cursor_path @@ -153,7 +156,7 @@ def __init__( template: dict[str, Any], records_path: FieldPath | NestedPath, pagination_strategy: PaginationStrategy | None, - ): + ) -> None: self._response = template self._records: list[RecordBuilder] = [] self._records_path = records_path @@ -195,7 +198,7 @@ def find_template(resource: str, execution_folder: str) -> dict[str, Any]: / "response" / f"{resource}.json" ) - with open(response_template_filepath) as template_file: + with open(response_template_filepath, encoding="utf-8") as template_file: return json.load(template_file) # type: ignore # we assume the dev correctly set up the resource file diff --git a/airbyte_cdk/test/utils/data.py b/airbyte_cdk/test/utils/data.py index a8d96996..a72674b9 100644 --- a/airbyte_cdk/test/utils/data.py +++ b/airbyte_cdk/test/utils/data.py @@ -7,7 +7,7 @@ def get_unit_test_folder(execution_folder: str) -> FilePath: path = FilePath(execution_folder) while path.name != "unit_tests": - if path.name == path.root or path.name == path.drive: + if path.name in {path.root, path.drive}: raise ValueError( f"Could not find `unit_tests` folder as a parent of {execution_folder}" ) @@ -20,6 +20,5 @@ def read_resource_file_contents(resource: str, test_location: str) -> str: file_path = str( get_unit_test_folder(test_location) / "resource" / "http" / "response" / f"{resource}" ) - with open(file_path) as f: - response = f.read() - return response + with open(file_path, encoding="utf-8") as f: + return f.read() diff --git a/airbyte_cdk/test/utils/http_mocking.py b/airbyte_cdk/test/utils/http_mocking.py index d76c92d0..a5383f30 100644 --- a/airbyte_cdk/test/utils/http_mocking.py +++ b/airbyte_cdk/test/utils/http_mocking.py @@ -2,10 +2,13 @@ from __future__ import annotations import re -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any -from requests_mock import Mocker + +if TYPE_CHECKING: + from collections.abc import Mapping + + from requests_mock import Mocker def register_mock_responses( diff --git a/airbyte_cdk/test/utils/reading.py b/airbyte_cdk/test/utils/reading.py index 3ba35c2c..4b771de6 100644 --- a/airbyte_cdk/test/utils/reading.py +++ b/airbyte_cdk/test/utils/reading.py @@ -1,15 +1,19 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. from __future__ import annotations -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk import AbstractSource -from airbyte_cdk.models import AirbyteStateMessage, ConfiguredAirbyteCatalog, SyncMode from airbyte_cdk.test.catalog_builder import CatalogBuilder from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk import AbstractSource + from airbyte_cdk.models import AirbyteStateMessage, ConfiguredAirbyteCatalog, SyncMode + + def catalog(stream_name: str, sync_mode: SyncMode) -> ConfiguredAirbyteCatalog: """Create a catalog with a single stream.""" return CatalogBuilder().with_stream(stream_name, sync_mode).build() diff --git a/airbyte_cdk/utils/airbyte_secrets_utils.py b/airbyte_cdk/utils/airbyte_secrets_utils.py index ac9d6ba5..fb5558b8 100644 --- a/airbyte_cdk/utils/airbyte_secrets_utils.py +++ b/airbyte_cdk/utils/airbyte_secrets_utils.py @@ -3,12 +3,15 @@ # from __future__ import annotations -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any import dpath +if TYPE_CHECKING: + from collections.abc import Mapping + + def get_secret_paths(spec: Mapping[str, Any]) -> list[list[str]]: paths = [] @@ -29,7 +32,7 @@ def traverse_schema(schema_item: Any, path: list[str]) -> None: for i in schema_item: traverse_schema(i, path) elif path[-1] == "airbyte_secret" and schema_item is True: - filtered_path = [p for p in path[:-1] if p not in ["properties", "oneOf"]] + filtered_path = [p for p in path[:-1] if p not in {"properties", "oneOf"}] paths.append(filtered_path) traverse_schema(spec, []) diff --git a/airbyte_cdk/utils/datetime_format_inferrer.py b/airbyte_cdk/utils/datetime_format_inferrer.py index b69424c6..2240a7de 100644 --- a/airbyte_cdk/utils/datetime_format_inferrer.py +++ b/airbyte_cdk/utils/datetime_format_inferrer.py @@ -3,12 +3,15 @@ # from __future__ import annotations -from typing import Any +from typing import TYPE_CHECKING, Any -from airbyte_cdk.models import AirbyteRecordMessage from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser +if TYPE_CHECKING: + from airbyte_cdk.models import AirbyteRecordMessage + + class DatetimeFormatInferrer: """This class is used to detect toplevel fields in records that might be datetime values, along with the used format.""" @@ -39,7 +42,7 @@ def _can_be_datetime(self, value: Any) -> bool: or between 1_000_000_000_000 and 2_000_000_000_000 for milliseconds. This is separate from the format check for performance reasons """ - if isinstance(value, (str, int)): + if isinstance(value, str | int): try: value_as_int = int(value) for timestamp_range in self._timestamp_heuristic_ranges: diff --git a/airbyte_cdk/utils/event_timing.py b/airbyte_cdk/utils/event_timing.py index 56f6ba79..fb3c6069 100644 --- a/airbyte_cdk/utils/event_timing.py +++ b/airbyte_cdk/utils/event_timing.py @@ -19,19 +19,19 @@ class EventTimer: Event nesting follows a LIFO pattern, so finish will apply to the last started event. """ - def __init__(self, name): + def __init__(self, name: str) -> None: self.name = name self.events = {} self.count = 0 self.stack = [] - def start_event(self, name): + def start_event(self, name: str) -> None: """Start a new event and push it to the stack.""" self.events[name] = Event(name=name) self.count += 1 self.stack.insert(0, self.events[name]) - def finish_event(self): + def finish_event(self) -> None: """Finish the current event and pop it from the stack.""" if self.stack: event = self.stack.pop(0) @@ -39,7 +39,7 @@ def finish_event(self): else: logger.warning(f"{self.name} finish_event called without start_event") - def report(self, order_by="name"): + def report(self, order_by="name") -> str: """:param order_by: 'name' or 'duration'""" if order_by == "name": events = sorted(self.events.values(), key=lambda event: event.name) @@ -63,10 +63,10 @@ def duration(self) -> float: return (self.end - self.start) / 1e9 return float("+inf") - def __str__(self): + def __str__(self) -> str: return f"{self.name} {datetime.timedelta(seconds=self.duration)}" - def finish(self): + def finish(self) -> None: self.end = time.perf_counter_ns() diff --git a/airbyte_cdk/utils/mapping_helpers.py b/airbyte_cdk/utils/mapping_helpers.py index f534ec8b..a3ab6cd0 100644 --- a/airbyte_cdk/utils/mapping_helpers.py +++ b/airbyte_cdk/utils/mapping_helpers.py @@ -3,8 +3,11 @@ # from __future__ import annotations -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any + + +if TYPE_CHECKING: + from collections.abc import Mapping def combine_mappings( diff --git a/airbyte_cdk/utils/print_buffer.py b/airbyte_cdk/utils/print_buffer.py index f90611ca..8320f00b 100644 --- a/airbyte_cdk/utils/print_buffer.py +++ b/airbyte_cdk/utils/print_buffer.py @@ -5,7 +5,11 @@ import time from io import StringIO from threading import RLock -from types import TracebackType +from typing import TYPE_CHECKING + + +if TYPE_CHECKING: + from types import TracebackType class PrintBuffer: @@ -36,7 +40,7 @@ class PrintBuffer: Exits the runtime context and restores the original stdout and stderr. """ - def __init__(self, flush_interval: float = 0.1): + def __init__(self, flush_interval: float = 0.1) -> None: self.buffer = StringIO() self.flush_interval = flush_interval self.last_flush_time = time.monotonic() diff --git a/airbyte_cdk/utils/schema_inferrer.py b/airbyte_cdk/utils/schema_inferrer.py index 7f5c0ebd..da6da635 100644 --- a/airbyte_cdk/utils/schema_inferrer.py +++ b/airbyte_cdk/utils/schema_inferrer.py @@ -4,14 +4,17 @@ from __future__ import annotations from collections import defaultdict -from collections.abc import Mapping -from typing import Any +from typing import TYPE_CHECKING, Any from genson import SchemaBuilder, SchemaNode from genson.schema.strategies.object import Object from genson.schema.strategies.scalar import Number -from airbyte_cdk.models import AirbyteRecordMessage + +if TYPE_CHECKING: + from collections.abc import Mapping + + from airbyte_cdk.models import AirbyteRecordMessage # schema keywords @@ -33,7 +36,7 @@ class NoRequiredObj(Object): """ def to_schema(self) -> Mapping[str, Any]: - schema: dict[str, Any] = super(NoRequiredObj, self).to_schema() + schema: dict[str, Any] = super().to_schema() schema.pop("required", None) return schema @@ -41,7 +44,7 @@ def to_schema(self) -> Mapping[str, Any]: class IntegerToNumber(Number): """This class has the regular Number behaviour, but it will never emit an integer type.""" - def __init__(self, node_class: SchemaNode): + def __init__(self, node_class: SchemaNode) -> None: super().__init__(node_class) self._type = "number" @@ -65,7 +68,11 @@ def merge_exceptions( [x for exception in exceptions for x in exception._validation_errors], ) - def __init__(self, schema: InferredSchema, validation_errors: list[Exception]): + def __init__( + self, + schema: InferredSchema, + validation_errors: list[Exception], + ) -> None: self._schema = schema self._validation_errors = validation_errors @@ -75,7 +82,7 @@ def schema(self) -> InferredSchema: @property def validation_errors(self) -> list[str]: - return list(map(lambda error: str(error), self._validation_errors)) + return [str(error) for error in self._validation_errors] class SchemaInferrer: diff --git a/airbyte_cdk/utils/traced_exception.py b/airbyte_cdk/utils/traced_exception.py index ec1e3de2..502da8e8 100644 --- a/airbyte_cdk/utils/traced_exception.py +++ b/airbyte_cdk/utils/traced_exception.py @@ -33,7 +33,7 @@ def __init__( failure_type: FailureType = FailureType.system_error, exception: BaseException | None = None, stream_descriptor: StreamDescriptor | None = None, - ): + ) -> None: """:param internal_message: the internal error that caused the failure :param message: a user-friendly message that indicates the cause of the error :param failure_type: the type of error diff --git a/pyproject.toml b/pyproject.toml index 1a20a0b3..88fa4124 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -190,18 +190,23 @@ ignore = [ "COM812", # Because it conflicts with ruff auto-format "DJ", # Django linting "EM", # flake8-errmsgs (may reconsider later) + "FURB189", # Subclassing safety at the cost if isinstance() instability "G", # flake8-logging-format "INP001", # Dir 'examples' is part of an implicit namespace package. Add an __init__.py. "ISC001", # Conflicts with ruff auto-format + "N818", # Custom exception names should use the suffix "Error" "NPY", # NumPy-specific rules "PERF203", # exception handling in loop "PIE790", # Allow unnecssary 'pass' (sometimes useful for readability) + "PLR6201", # Allow membership checks in lists (set-based check is unsafe when values are unhashable) "PLR6301", # Allow class methods that don't use 'self' (otherwise noisy) + "RET504", # Ignore unnecessary assign before return "RUF022", # Allow unsorted __all__ (sometimes useful for grouping by type with pdoc) "S", # flake8-bandit (noisy, security related) "SIM910", # Allow "None" as second argument to Dict.get(). "Explicit is better than implicit." "TD002", # Require author for TODOs "TRY003", # Allow string passing to exception constructor. + "TRY400", # Ignore for now: prefer logging.exception over logging.error ] fixable = ["ALL"] unfixable = [ diff --git a/unit_tests/sources/test_http_logger.py b/unit_tests/sources/test_http_logger.py index 115b419e..bc3654b9 100644 --- a/unit_tests/sources/test_http_logger.py +++ b/unit_tests/sources/test_http_logger.py @@ -18,7 +18,7 @@ class ResponseBuilder: - def __init__(self): + def __init__(self) -> None: self._body_content = "" self._headers = {} self._request = ANY_REQUEST @@ -225,9 +225,21 @@ def build(self): ], ) def test_prepared_request_to_airbyte_message( - test_name, http_method, url, headers, params, body_json, body_data, expected_airbyte_message -): - request = requests.Request(method=http_method, url=url, headers=headers, params=params) + test_name, + http_method, + url, + headers, + params, + body_json, + body_data, + expected_airbyte_message, +) -> None: + request = requests.Request( + method=http_method, + url=url, + headers=headers, + params=params, + ) if body_json: request.json = body_json if body_data: @@ -235,7 +247,10 @@ def test_prepared_request_to_airbyte_message( prepared_request = request.prepare() actual_airbyte_message = format_http_message( - ResponseBuilder().request(prepared_request).build(), A_TITLE, A_DESCRIPTION, A_STREAM_NAME + ResponseBuilder().request(prepared_request).build(), + A_TITLE, + A_DESCRIPTION, + A_STREAM_NAME, ) assert actual_airbyte_message == expected_airbyte_message @@ -279,8 +294,12 @@ def test_prepared_request_to_airbyte_message( ], ) def test_response_to_airbyte_message( - test_name, response_body, response_headers, status_code, expected_airbyte_message -): + test_name, + response_body, + response_headers, + status_code, + expected_airbyte_message, +) -> None: response = ( ResponseBuilder() .body_content(response_body) @@ -289,6 +308,11 @@ def test_response_to_airbyte_message( .build() ) - actual_airbyte_message = format_http_message(response, A_TITLE, A_DESCRIPTION, A_STREAM_NAME) + actual_airbyte_message = format_http_message( + response, + A_TITLE, + A_DESCRIPTION, + A_STREAM_NAME, + ) assert actual_airbyte_message["http"]["response"] == expected_airbyte_message