diff --git a/airbyte_cdk/config_observation.py b/airbyte_cdk/config_observation.py index 40d6a89a..52504600 100644 --- a/airbyte_cdk/config_observation.py +++ b/airbyte_cdk/config_observation.py @@ -28,6 +28,7 @@ def __init__( self, non_observed_mapping: MutableMapping[Any, Any], observer: ConfigObserver, + *, update_on_unchanged_value: bool = True, ) -> None: non_observed_mapping = copy(non_observed_mapping) @@ -45,7 +46,7 @@ def __init__( value[i] = ObservedDict(sub_value, observer) super().__init__(non_observed_mapping) - def __setitem__(self, item: Any, value: Any) -> None: + def __setitem__(self, item: Any, value: Any) -> None: # noqa: ANN401 (any-type) """Override dict.__setitem__ by: 1. Observing the new value if it is a dict 2. Call observer update if the new value is different from the previous one @@ -78,7 +79,7 @@ def observe_connector_config( non_observed_connector_config: MutableMapping[str, Any], ) -> ObservedDict: if isinstance(non_observed_connector_config, ObservedDict): - raise ValueError("This connector configuration is already observed") + raise ValueError("This connector configuration is already observed") # noqa: TRY004 (expected TypeError) connector_config_observer = ConfigObserver() observed_connector_config = ObservedDict( non_observed_connector_config, connector_config_observer diff --git a/airbyte_cdk/connector.py b/airbyte_cdk/connector.py index da42bcac..9c22372e 100644 --- a/airbyte_cdk/connector.py +++ b/airbyte_cdk/connector.py @@ -52,8 +52,8 @@ def read_config(config_path: str) -> Mapping[str, Any]: ) @staticmethod - def _read_json_file(file_path: str) -> Any: - with open(file_path, encoding="utf-8") as file: + def _read_json_file(file_path: str) -> Any: # noqa: ANN401 (any-type) + with open(file_path, encoding="utf-8") as file: # noqa: PTH123, FURB101 (prefer pathlib) contents = file.read() try: @@ -61,17 +61,18 @@ def _read_json_file(file_path: str) -> Any: except json.JSONDecodeError as error: raise ValueError( f"Could not read json file {file_path}: {error}. Please ensure that it is a valid JSON." - ) + ) from None @staticmethod def write_config(config: TConfig, config_path: str) -> None: - with open(config_path, "w", encoding="utf-8") as fh: + with open(config_path, "w", encoding="utf-8") as fh: # noqa: PTH123, FURB103 (replace with pathlib) fh.write(json.dumps(config)) def spec(self, logger: logging.Logger) -> ConnectorSpecification: """Returns the spec for this integration. The spec is a JSON-Schema object describing the required configurations (e.g: username and password) required to run this integration. By default, this will be loaded from a "spec.yaml" or a "spec.json" in the package root. """ + _ = logger # unused package = self.__class__.__module__.split(".")[0] yaml_spec = load_optional_package_file(package, "spec.yaml") @@ -90,7 +91,7 @@ def spec(self, logger: logging.Logger) -> ConnectorSpecification: except json.JSONDecodeError as error: raise ValueError( f"Could not read json spec file: {error}. Please ensure that it is a valid JSON." - ) + ) from None else: raise FileNotFoundError("Unable to find spec.yaml or spec.json in the package.") @@ -113,7 +114,7 @@ class DefaultConnectorMixin: def configure( self: _WriteConfigProtocol, config: Mapping[str, Any], temp_dir: str ) -> Mapping[str, Any]: - config_path = os.path.join(temp_dir, "config.json") + config_path = os.path.join(temp_dir, "config.json") # noqa: PTH118 (should use pathlib) self.write_config(config, config_path) return config diff --git a/airbyte_cdk/connector_builder/message_grouper.py b/airbyte_cdk/connector_builder/message_grouper.py index caf8a95e..715b1fda 100644 --- a/airbyte_cdk/connector_builder/message_grouper.py +++ b/airbyte_cdk/connector_builder/message_grouper.py @@ -155,7 +155,7 @@ def get_message_groups( schema = schema_inferrer.get_stream_schema(configured_stream.stream.name) except SchemaValidationException as exception: for validation_error in exception.validation_errors: - log_messages.append(LogMessage(validation_error, "ERROR")) # noqa: PERF401 # Could be list comprehension + log_messages.append(LogMessage(validation_error, "ERROR")) # noqa: PERF401 (consider list comprehension) schema = exception.schema return StreamRead( diff --git a/airbyte_cdk/entrypoint.py b/airbyte_cdk/entrypoint.py index a5441c73..09a974c3 100644 --- a/airbyte_cdk/entrypoint.py +++ b/airbyte_cdk/entrypoint.py @@ -125,7 +125,7 @@ def parse_args(args: list[str]) -> argparse.Namespace: def run(self, parsed_args: argparse.Namespace) -> Iterable[str]: cmd = parsed_args.command if not cmd: - raise Exception("No command passed") + raise Exception("No command passed") # noqa: TRY002 (vanilla exception) if hasattr(parsed_args, "debug") and parsed_args.debug: self.logger.setLevel(logging.DEBUG) @@ -174,7 +174,7 @@ def run(self, parsed_args: argparse.Namespace) -> Iterable[str]: self.read(source_spec, config, config_catalog, state), ) else: - raise Exception("Unexpected command " + cmd) + raise Exception("Unexpected command " + cmd) # noqa: TRY002 (vanilla exception) finally: yield from [ self.airbyte_message_to_string(queued_message) @@ -234,7 +234,11 @@ def discover( yield AirbyteMessage(type=Type.CATALOG, catalog=catalog) def read( - self, source_spec: ConnectorSpecification, config: TConfig, catalog: Any, state: list[Any] + self, + source_spec: ConnectorSpecification, + config: TConfig, + catalog: Any, # noqa: ANN401 (any-type) + state: list[Any], ) -> Iterable[AirbyteMessage]: self.set_up_secret_filter(config, source_spec.connectionSpecification) if self.source.check_config_against_spec: @@ -290,21 +294,21 @@ def airbyte_message_to_string(airbyte_message: AirbyteMessage) -> str: return orjson.dumps(AirbyteMessageSerializer.dump(airbyte_message)).decode() # type: ignore[no-any-return] # orjson.dumps(message).decode() always returns string @classmethod - def extract_state(cls, args: list[str]) -> Any | None: + def extract_state(cls, args: list[str]) -> Any | None: # noqa: ANN401 (any-type) parsed_args = cls.parse_args(args) if hasattr(parsed_args, "state"): return parsed_args.state return None @classmethod - def extract_catalog(cls, args: list[str]) -> Any | None: + def extract_catalog(cls, args: list[str]) -> Any | None: # noqa: ANN401 (any-type) parsed_args = cls.parse_args(args) if hasattr(parsed_args, "catalog"): return parsed_args.catalog return None @classmethod - def extract_config(cls, args: list[str]) -> Any | None: + def extract_config(cls, args: list[str]) -> Any | None: # noqa: ANN401 (any-type) parsed_args = cls.parse_args(args) if hasattr(parsed_args, "config"): return parsed_args.config @@ -333,13 +337,13 @@ def _init_internal_request_filter() -> None: wrapped_fn = Session.send @wraps(wrapped_fn) - def filtered_send(self: Any, request: PreparedRequest, **kwargs: Any) -> Response: + def filtered_send(self: Any, request: PreparedRequest, **kwargs: Any) -> Response: # noqa: ANN401 (any-type) parsed_url = urlparse(request.url) if parsed_url.scheme not in VALID_URL_SCHEMES: raise requests.exceptions.InvalidSchema( "Invalid Protocol Scheme: The endpoint that data is being requested from is using an invalid or insecure " - + f"protocol {parsed_url.scheme!r}. Valid protocol schemes: {','.join(VALID_URL_SCHEMES)}" + f"protocol {parsed_url.scheme!r}. Valid protocol schemes: {','.join(VALID_URL_SCHEMES)}" ) if not parsed_url.hostname: @@ -359,7 +363,7 @@ def filtered_send(self: Any, request: PreparedRequest, **kwargs: Any) -> Respons # This is a special case where the developer specifies an IP address string that is not formatted correctly like trailing # whitespace which will fail the socket IP lookup. This only happens when using IP addresses and not text hostnames. # Knowing that this is a request using the requests library, we will mock the exception without calling the lib - raise requests.exceptions.InvalidURL(f"Invalid URL {parsed_url}: {exception}") + raise requests.exceptions.InvalidURL(f"Invalid URL {parsed_url}: {exception}") from None return wrapped_fn(self, request, **kwargs) @@ -389,6 +393,6 @@ def main() -> None: source = impl() if not isinstance(source, Source): - raise Exception("Source implementation provided does not implement Source class!") + raise Exception("Source implementation provided does not implement Source class!") # noqa: TRY002, TRY004 (should raise TypeError) launch(source, sys.argv[1:]) diff --git a/airbyte_cdk/exception_handler.py b/airbyte_cdk/exception_handler.py index 5c493816..09a4cf86 100644 --- a/airbyte_cdk/exception_handler.py +++ b/airbyte_cdk/exception_handler.py @@ -33,7 +33,7 @@ def hook_fn( exception_type: type[BaseException], exception_value: BaseException, traceback_: TracebackType | None, - ) -> Any: + ) -> Any: # noqa: ANN401 (any-type) # For developer ergonomics, we want to see the stack trace in the logs when we do a ctrl-c if issubclass(exception_type, KeyboardInterrupt): sys.__excepthook__(exception_type, exception_value, traceback_) diff --git a/airbyte_cdk/logger.py b/airbyte_cdk/logger.py index 0687c6c2..d94f63d3 100644 --- a/airbyte_cdk/logger.py +++ b/airbyte_cdk/logger.py @@ -6,7 +6,7 @@ import json import logging import logging.config -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, ClassVar from orjson import orjson @@ -61,7 +61,7 @@ class AirbyteLogFormatter(logging.Formatter): """Output log records using AirbyteMessage""" # Transforming Python log levels to Airbyte protocol log levels - level_mapping = { + level_mapping: ClassVar[dict[int, Level]] = { logging.FATAL: Level.FATAL, logging.ERROR: Level.ERROR, logging.WARNING: Level.WARN, diff --git a/airbyte_cdk/sources/abstract_source.py b/airbyte_cdk/sources/abstract_source.py index 9c6dc81d..b2c9d794 100644 --- a/airbyte_cdk/sources/abstract_source.py +++ b/airbyte_cdk/sources/abstract_source.py @@ -75,10 +75,15 @@ def streams(self, config: Mapping[str, Any]) -> list[Stream]: """ # Stream name to instance map for applying output object transformation - _stream_to_instance_map: dict[str, Stream] = {} + _stream_to_instance_map: dict[str, Stream] = {} # noqa: RUF012 (mutable class member can leak across instances) _slice_logger: SliceLogger = DebugSliceLogger() - def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog: + def discover( + self, + logger: logging.Logger, + config: Mapping[str, Any], + ) -> AirbyteCatalog: + _ = logger # unused """Implements the Discover operation from the Airbyte Specification. See https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/#discover. """ @@ -94,7 +99,7 @@ def check(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCon return AirbyteConnectionStatus(status=Status.FAILED, message=repr(error)) return AirbyteConnectionStatus(status=Status.SUCCEEDED) - def read( + def read( # noqa: PLR0915 self, logger: logging.Logger, config: Mapping[str, Any], @@ -104,7 +109,7 @@ def read( """Implements the Read operation from the Airbyte Specification. See https://docs.airbyte.com/understanding-airbyte/airbyte-protocol/.""" logger.info(f"Starting syncing {self.name}") config, internal_config = split_config(config) - # TODO assert all streams exist in the connector + # TODO assert all streams exist in the connector # noqa: TD004 # get the streams once in case the connector needs to make any queries to generate them stream_instances = {s.name: s for s in self.streams(config)} state_manager = ConnectorStateManager(state=state) @@ -133,7 +138,7 @@ def read( # Use configured_stream as stream_instance to support references in error handling. stream_instance = configured_stream.stream - raise AirbyteTracedException( + raise AirbyteTracedException( # noqa: TRY301 message="A stream listed in your configuration was not found in the source. Please check the logs for more " "details.", internal_message=error_message, diff --git a/airbyte_cdk/sources/config.py b/airbyte_cdk/sources/config.py index 209ed17f..243bd3c0 100644 --- a/airbyte_cdk/sources/config.py +++ b/airbyte_cdk/sources/config.py @@ -19,7 +19,7 @@ class BaseConfig(BaseModel): """ @classmethod - def schema(cls, *args: Any, **kwargs: Any) -> dict[str, Any]: + def schema(cls, *args: Any, **kwargs: Any) -> dict[str, Any]: # noqa: ANN401 (any-type) """We're overriding the schema classmethod to enable some post-processing""" schema = super().schema(*args, **kwargs) rename_key(schema, old_key="anyOf", new_key="oneOf") # UI supports only oneOf diff --git a/airbyte_cdk/sources/declarative/async_job/status.py b/airbyte_cdk/sources/declarative/async_job/status.py index 9081b87f..6be0e84a 100644 --- a/airbyte_cdk/sources/declarative/async_job/status.py +++ b/airbyte_cdk/sources/declarative/async_job/status.py @@ -13,7 +13,7 @@ class AsyncJobStatus(Enum): FAILED = ("FAILED", _TERMINAL) TIMED_OUT = ("TIMED_OUT", _TERMINAL) - def __init__(self, value: str, is_terminal: bool) -> None: # noqa: FBT001 (positional bool arg) + def __init__(self, value: str, is_terminal: bool) -> None: # noqa: FBT001 (positional bool) self._value = value self._is_terminal = is_terminal diff --git a/airbyte_cdk/sources/declarative/auth/token.py b/airbyte_cdk/sources/declarative/auth/token.py index b4d80912..586d31f8 100644 --- a/airbyte_cdk/sources/declarative/auth/token.py +++ b/airbyte_cdk/sources/declarative/auth/token.py @@ -1,7 +1,7 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # -# ruff: noqa: A005 (shadows built-in 'token' module) +# ruff: noqa: A005 # Shadows built-in 'token' module from __future__ import annotations diff --git a/airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py b/airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py index 0eb8da9d..fe109b73 100644 --- a/airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +++ b/airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py @@ -45,7 +45,7 @@ def get_concurrency_level(self) -> int: if isinstance(self._default_concurrency, InterpolatedString): evaluated_default_concurrency = self._default_concurrency.eval(config=self.config) if not isinstance(evaluated_default_concurrency, int): - raise ValueError("default_concurrency did not evaluate to an integer") + raise ValueError("default_concurrency did not evaluate to an integer") # noqa: TRY004 (expected TypeError) return ( min(evaluated_default_concurrency, self.max_concurrency) if self.max_concurrency diff --git a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py index c8d5aca8..08ea53fe 100644 --- a/airbyte_cdk/sources/declarative/concurrent_declarative_source.py +++ b/airbyte_cdk/sources/declarative/concurrent_declarative_source.py @@ -55,14 +55,15 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]): def __init__( self, - catalog: ConfiguredAirbyteCatalog | None, + catalog: ConfiguredAirbyteCatalog | None, # noqa: ARG002 (unused) config: Mapping[str, Any] | None, state: TState, source_config: ConnectionDefinition, + *, debug: bool = False, emit_connector_builder_messages: bool = False, component_factory: ModelToComponentFactory | None = None, - **kwargs: Any, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> None: super().__init__( source_config=source_config, @@ -149,7 +150,7 @@ def read( yield from super().read(logger, config, filtered_catalog, state) - def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog: + def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog: # noqa: ARG002 (unused) concurrent_streams = self._concurrent_streams or [] synchronous_streams = self._synchronous_streams or [] return AirbyteCatalog( @@ -277,13 +278,13 @@ def _stream_supports_concurrent_partition_processing( declarative_stream.retriever.requester, HttpRequester ): http_requester = declarative_stream.retriever.requester - if "stream_state" in http_requester._path.string: + if "stream_state" in http_requester._path.string: # noqa: SLF001 (private member) self.logger.warning( f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the HttpRequester which is not thread-safe. Defaulting to synchronous processing" ) return False - request_options_provider = http_requester._request_options_provider + request_options_provider = http_requester._request_options_provider # noqa: SLF001 (private member) if request_options_provider.request_options_contain_stream_state(): self.logger.warning( f"Low-code stream '{declarative_stream.name}' uses interpolation of stream_state in the HttpRequester which is not thread-safe. Defaulting to synchronous processing" diff --git a/airbyte_cdk/sources/declarative/datetime/datetime_parser.py b/airbyte_cdk/sources/declarative/datetime/datetime_parser.py index 3a632951..276900fe 100644 --- a/airbyte_cdk/sources/declarative/datetime/datetime_parser.py +++ b/airbyte_cdk/sources/declarative/datetime/datetime_parser.py @@ -17,7 +17,7 @@ class DatetimeParser: _UNIX_EPOCH = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) - def parse(self, date: str | int, format: str) -> datetime.datetime: + def parse(self, date: str | int, format: str) -> datetime.datetime: # noqa: A002 (shadowed built-in) # "%s" is a valid (but unreliable) directive for formatting, but not for parsing # It is defined as # The number of seconds since the Epoch, 1970-01-01 00:00:00+0000 (UTC). https://man7.org/linux/man-pages/man3/strptime.3.html @@ -36,7 +36,7 @@ def parse(self, date: str | int, format: str) -> datetime.datetime: return parsed_datetime.replace(tzinfo=datetime.timezone.utc) return parsed_datetime - def format(self, dt: datetime.datetime, format: str) -> str: + def format(self, dt: datetime.datetime, format: str) -> str: # noqa: A002 (shadowed built-in) # strftime("%s") is unreliable because it ignores the time zone information and assumes the time zone of the system it's running on # It's safer to use the timestamp() method than the %s directive # See https://stackoverflow.com/a/4974930 diff --git a/airbyte_cdk/sources/declarative/declarative_stream.py b/airbyte_cdk/sources/declarative/declarative_stream.py index a61c0c03..1b39af62 100644 --- a/airbyte_cdk/sources/declarative/declarative_stream.py +++ b/airbyte_cdk/sources/declarative/declarative_stream.py @@ -111,7 +111,9 @@ def state(self, value: MutableMapping[str, Any]) -> None: self.retriever.state = state def get_updated_state( - self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any] + self, + current_stream_state: MutableMapping[str, Any], # noqa: ARG002 (unused) + latest_record: Mapping[str, Any], # noqa: ARG002 (unused) ) -> MutableMapping[str, Any]: return self.state @@ -131,10 +133,10 @@ def is_resumable(self) -> bool: def read_records( self, - sync_mode: SyncMode, - cursor_field: list[str] | None = None, + sync_mode: SyncMode, # noqa: ARG002 (unused) + cursor_field: list[str] | None = None, # noqa: ARG002 (unused) stream_slice: Mapping[str, Any] | None = None, - stream_state: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Iterable[Mapping[str, Any]]: """:param: stream_state We knowingly avoid using stream_state as we want cursors to manage their own state.""" if stream_slice is None or stream_slice == {}: @@ -145,7 +147,7 @@ def read_records( # empty slice which seems to make sense. stream_slice = StreamSlice(partition={}, cursor_slice={}) if not isinstance(stream_slice, StreamSlice): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"DeclarativeStream does not support stream_slices that are not StreamSlice. Got {stream_slice}" ) yield from self.retriever.read_records(self.get_json_schema(), stream_slice) # type: ignore # records are of the correct type @@ -161,9 +163,9 @@ def get_json_schema(self) -> Mapping[str, Any]: # type: ignore def stream_slices( self, *, - sync_mode: SyncMode, - cursor_field: list[str] | None = None, - stream_state: Mapping[str, Any] | None = None, + sync_mode: SyncMode, # noqa: ARG002 (unused) + cursor_field: list[str] | None = None, # noqa: ARG002 (unused) + stream_state: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Iterable[StreamSlice | None]: """Override to define the slices for this stream. See the stream slicing section of the docs for more information. diff --git a/airbyte_cdk/sources/declarative/decoders/json_decoder.py b/airbyte_cdk/sources/declarative/decoders/json_decoder.py index 5627097c..269be0da 100644 --- a/airbyte_cdk/sources/declarative/decoders/json_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/json_decoder.py @@ -69,8 +69,11 @@ class JsonlDecoder(Decoder): def is_stream_response(self) -> bool: return True - def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], None, None]: - # TODO???: set delimiter? usually it is `\n` but maybe it would be useful to set optional? + def decode( # type: ignore[override] # Base class returns MutableMapping + self, + response: requests.Response, + ) -> Generator[Mapping[str, Any], None, None]: + # TODO: (?) set delimiter? usually it is `\n` but maybe it would be useful to set optional? # https://github.com/airbytehq/airbyte-internal-issues/issues/8436 for record in response.iter_lines(): yield orjson.loads(record) diff --git a/airbyte_cdk/sources/declarative/decoders/noop_decoder.py b/airbyte_cdk/sources/declarative/decoders/noop_decoder.py index 197f168e..428a12bd 100644 --- a/airbyte_cdk/sources/declarative/decoders/noop_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/noop_decoder.py @@ -20,5 +20,8 @@ class NoopDecoder(Decoder): def is_stream_response(self) -> bool: return False - def decode(self, response: requests.Response) -> Generator[Mapping[str, Any], None, None]: + def decode( # type: ignore[override] # Base class returns MutableMapping + self, + response: requests.Response, # noqa: ARG002 (unused) + ) -> Generator[Mapping[str, Any], None, None]: yield from [{}] diff --git a/airbyte_cdk/sources/declarative/extractors/record_filter.py b/airbyte_cdk/sources/declarative/extractors/record_filter.py index cc0601a1..e269469f 100644 --- a/airbyte_cdk/sources/declarative/extractors/record_filter.py +++ b/airbyte_cdk/sources/declarative/extractors/record_filter.py @@ -67,7 +67,7 @@ def __init__( self, date_time_based_cursor: DatetimeBasedCursor, substream_cursor: PerPartitionWithGlobalCursor | GlobalSubstreamCursor | None, - **kwargs: Any, + **kwargs: Any, # noqa: ANN401 (any-type) ) -> None: super().__init__(**kwargs) self._date_time_based_cursor = date_time_based_cursor @@ -79,7 +79,7 @@ def _cursor_field(self) -> str: @property def _start_date_from_config(self) -> datetime.datetime: - return self._date_time_based_cursor._start_datetime.get_datetime( + return self._date_time_based_cursor._start_datetime.get_datetime( # noqa: SLF001 (private member) self._date_time_based_cursor.config ) @@ -114,7 +114,7 @@ def filter_records( ) yield from records - def _get_state_value(self, stream_state: StreamState, stream_slice: StreamSlice) -> str | None: + def _get_state_value(self, stream_state: StreamState, stream_slice: StreamSlice) -> str | None: # noqa: ARG002 (unused) """Return cursor_value or None in case it was not found. Cursor_value may be empty if: 1. It is an initial sync => no stream_state exist at all. diff --git a/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py b/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py index 7acbd1e7..6d96cb4c 100644 --- a/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +++ b/airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py @@ -95,7 +95,7 @@ def _save_to_file(self, response: requests.Response) -> tuple[str, str]: needs_decompression = True # we will assume at first that the response is compressed and change the flag if not tmp_file = str(uuid.uuid4()) - with closing(response) as response, open(tmp_file, "wb") as data_file: + with closing(response) as response, open(tmp_file, "wb") as data_file: # noqa: PTH123, PLR1704 (prefer pathlib) response_encoding = self._get_response_encoding(dict(response.headers or {})) for chunk in response.iter_content(chunk_size=DOWNLOAD_CHUNK_SIZE): try: @@ -109,7 +109,7 @@ def _save_to_file(self, response: requests.Response) -> tuple[str, str]: needs_decompression = False # check the file exists - if os.path.isfile(tmp_file): + if os.path.isfile(tmp_file): # noqa: PTH113 (prefer pathlib) return tmp_file, response_encoding raise ValueError( f"The IO/Error occured while verifying binary data. Tmp file {tmp_file} doesn't exist." @@ -132,12 +132,12 @@ def _read_with_chunks( ValueError: If an IO/Error occurs while reading the temporary data. """ try: - with open(path, encoding=file_encoding) as data: + with open(path, encoding=file_encoding) as data: # noqa: PTH123 (prefer pathlib) chunks = pd.read_csv( data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object ) for chunk in chunks: - chunk = chunk.replace({nan: None}).to_dict(orient="records") + chunk = chunk.replace({nan: None}).to_dict(orient="records") # noqa: PLW2901 (redefined loop var) yield from chunk # Yield rows from chunks except pd.errors.EmptyDataError as e: self.logger.info(f"Empty data received. {e}") @@ -149,7 +149,7 @@ def _read_with_chunks( ) from None finally: # remove binary tmp file, after data is read - os.remove(path) + os.remove(path) # noqa: PTH107 (prefer pathlib) def extract_records( self, response: requests.Response | None = None diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index c3b38eec..694bf2f1 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -167,7 +167,11 @@ def observe(self, stream_slice: StreamSlice, record: Record) -> None: ): self._highest_observed_cursor_field_value = record_cursor_value - def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: + def close_slice( + self, + stream_slice: StreamSlice, + *args: Any, # noqa: ANN401, ARG002 (any-type, unused) + ) -> None: if stream_slice.partition: raise ValueError( f"Stream slice {stream_slice} should not have a partition. Got {stream_slice.partition}." @@ -205,7 +209,7 @@ def stream_slices(self) -> Iterable[StreamSlice]: start_datetime = self._calculate_earliest_possible_value(self.select_best_end_datetime()) return self._partition_daterange(start_datetime, end_datetime, self._step) - def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: + def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: # noqa: ARG002 (unused) # Datetime based cursors operate over slices made up of datetime ranges. Stream state is based on the progress # through each slice and does not belong to a specific slice. We just return stream state as it is. return self.get_stream_state() @@ -321,36 +325,36 @@ def _parse_timedelta(cls, time_str: str | None) -> datetime.timedelta | Duration def get_request_params( self, *, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.request_parameter, stream_slice) def get_request_headers( self, *, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.header, stream_slice) def get_request_body_data( self, *, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.body_data, stream_slice) def get_request_body_json( self, *, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.body_json, stream_slice) diff --git a/airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py b/airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py index 27795765..9c46f014 100644 --- a/airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py @@ -65,7 +65,7 @@ def start(self) -> None: def finish(self) -> int: if self._start: - return ((time.perf_counter_ns() - self._start) / 1e9).__ceil__() + return ((time.perf_counter_ns() - self._start) / 1e9).__ceil__() # noqa: PLC2801 (unnecessary dunder?) raise RuntimeError("Global substream cursor timer not started") @@ -124,7 +124,7 @@ def stream_slices(self) -> Iterable[StreamSlice]: ) self.start_slices_generation() - for slice, last, state in iterate_with_last_flag_and_state( + for slice, last, state in iterate_with_last_flag_and_state( # noqa: A001 (shadowed built-in) slice_generator, self._partition_router.get_stream_state ): self._parent_state = state @@ -140,7 +140,7 @@ def generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[Str yield from slice_generator - def register_slice(self, last: bool) -> None: + def register_slice(self, last: bool) -> None: # noqa: FBT001 (positional bool) """Tracks the processing of a stream slice. Releases the semaphore for each slice. If it's the last slice (`last=True`), @@ -216,7 +216,7 @@ def observe(self, stream_slice: StreamSlice, record: Record) -> None: StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), record ) - def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: + def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: # noqa: ANN401 (any-type) """Close the current stream slice. This method is called when a stream slice is completed. For the global parent cursor, we close the child cursor @@ -229,7 +229,7 @@ def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: """ with self._lock: self._slice_semaphore.acquire() - if self._all_slices_yielded and self._slice_semaphore._value == 0: + if self._all_slices_yielded and self._slice_semaphore._value == 0: # noqa: SLF001 (private member) self._lookback_window = self._timer.finish() self._stream_cursor.close_slice( StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), *args @@ -246,7 +246,7 @@ def get_stream_state(self) -> StreamState: return state - def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: + def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: # noqa: ARG002 (unused) # stream_slice is ignored as cursor is global return self._stream_cursor.get_stream_state() diff --git a/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py b/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py index 2e09196b..928d2c52 100644 --- a/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py @@ -157,7 +157,7 @@ def observe(self, stream_slice: StreamSlice, record: Record) -> None: StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), record ) - def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: + def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: # noqa: ANN401 (any-type) try: self._cursor_per_partition[self._to_partition_key(stream_slice.partition)].close_slice( StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), *args @@ -166,7 +166,7 @@ def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: raise ValueError( f"Partition {exception!s} could not be found in current state based on the record. This is unexpected because " f"we should only update state for partitions that were emitted during `stream_slices`" - ) + ) from None def get_stream_state(self) -> StreamState: states = [] @@ -212,7 +212,7 @@ def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | return self._get_state_for_partition(stream_slice.partition) - def _create_cursor(self, cursor_state: Any) -> DeclarativeCursor: + def _create_cursor(self, cursor_state: Any) -> DeclarativeCursor: # noqa: ANN401 (any-type) cursor = self._cursor_factory.create() cursor.set_initial_state(cursor_state) return cursor diff --git a/airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py b/airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py index 26f97ed1..cdc18c98 100644 --- a/airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +++ b/airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py @@ -100,7 +100,7 @@ def stream_slices(self) -> Iterable[StreamSlice]: ): # Generate slices for the current cursor and handle the last slice using the flag self._parent_state = parent_state - for slice, is_last_slice, _ in iterate_with_last_flag_and_state( + for slice, is_last_slice, _ in iterate_with_last_flag_and_state( # noqa: A001 (shadowed built-in) self._get_active_cursor().generate_slices_from_partition(partition=partition), lambda: None, ): @@ -126,7 +126,7 @@ def observe(self, stream_slice: StreamSlice, record: Record) -> None: self._per_partition_cursor.observe(stream_slice, record) self._global_cursor.observe(stream_slice, record) - def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: + def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: # noqa: ANN401 (any-type) if not self._use_global_cursor: self._per_partition_cursor.close_slice(stream_slice, *args) self._global_cursor.close_slice(stream_slice, *args) diff --git a/airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py b/airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py index 2645a1e2..7989b0fd 100644 --- a/airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py @@ -30,7 +30,8 @@ def observe(self, stream_slice: StreamSlice, record: Record) -> None: """Resumable full refresh manages state using a page number so it does not need to update state by observing incoming records.""" pass - def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: + def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: # noqa: ANN401 (any-type) + _ = args # The ResumableFullRefreshCursor doesn't support nested streams yet so receiving a partition is unexpected if stream_slice.partition: raise ValueError( @@ -38,17 +39,17 @@ def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: ) self._cursor = stream_slice.cursor_slice - def should_be_synced(self, record: Record) -> bool: + def should_be_synced(self, record: Record) -> bool: # noqa: ARG002 (unused) """Unlike date-based cursors which filter out records outside slice boundaries, resumable full refresh records exist within pages that don't have filterable bounds. We should always return them. """ return True - def is_greater_than_or_equal(self, first: Record, second: Record) -> bool: + def is_greater_than_or_equal(self, first: Record, second: Record) -> bool: # noqa: ARG002 (unused) """RFR record don't have ordering to be compared between one another.""" return False - def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: + def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: # noqa: ARG002 (unused) # A top-level RFR cursor only manages the state of a single partition return self._cursor @@ -64,36 +65,36 @@ def stream_slices(self) -> Iterable[StreamSlice]: def get_request_params( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return {} def get_request_headers( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return {} def get_request_body_data( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return {} def get_request_body_json( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return {} @@ -107,7 +108,7 @@ class ChildPartitionResumableFullRefreshCursor(ResumableFullRefreshCursor): Check the `close_slice` method overide for more info about the actual behaviour of this cursor. """ - def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: + def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: # noqa: ANN401 (any-type) """Once the current slice has finished syncing: - paginator returns None - no more slices to process @@ -116,4 +117,5 @@ def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: thus we have to set the cursor to ` __ab_full_refresh_sync_complete: true `, otherwise there is a risk of Inf. Loop processing the same slice. """ + _ = stream_slice, args self._cursor = FULL_REFRESH_COMPLETE_STATE diff --git a/airbyte_cdk/sources/declarative/interpolation/filters.py b/airbyte_cdk/sources/declarative/interpolation/filters.py index c9ca75a8..0bf6d922 100644 --- a/airbyte_cdk/sources/declarative/interpolation/filters.py +++ b/airbyte_cdk/sources/declarative/interpolation/filters.py @@ -10,7 +10,7 @@ from typing import Any -def hash(value: Any, hash_type: str = "md5", salt: str | None = None) -> str: +def hash(value: Any, hash_type: str = "md5", salt: str | None = None) -> str: # noqa: ANN401, A001 (any-type, shadowed-builtin) """Implementation of a custom Jinja2 hash filter Hash type defaults to 'md5' if one is not specified. @@ -89,7 +89,7 @@ def base64decode(value: str) -> str: return base64.b64decode(value.encode("utf-8")).decode() -def string(value: Any) -> str: +def string(value: Any) -> str: # noqa: ANN401 (any-type) """Converts the input value to a string. If the value is already a string, it is returned as is. Otherwise, the value is interpreted as a json object and wrapped in triple-quotes so it's evalued as a string by the JinjaInterpolation diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py b/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py index 484658a4..0dfb8f48 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py @@ -34,8 +34,8 @@ @dataclass class InterpolatedBoolean: - f""" - Wrapper around a string to be evaluated to a boolean value. + """Wrapper around a string to be evaluated to a boolean value. + The string will be evaluated as False if it interpolates to a value in {FALSE_VALUES} Attributes: @@ -49,7 +49,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._interpolation = JinjaInterpolation() self._parameters = parameters - def eval(self, config: Config, **additional_parameters: Any) -> bool: + def eval(self, config: Config, **additional_parameters: Any) -> bool: # noqa: ANN401 (any-type) """Interpolates the predicate condition string using the config and other optional arguments passed as parameter. :param config: The user-provided configuration as specified by the source's spec @@ -65,7 +65,7 @@ def eval(self, config: Config, **additional_parameters: Any) -> bool: parameters=self._parameters, **additional_parameters, ) - if evaluated in FALSE_VALUES: + if evaluated in FALSE_VALUES: # noqa: SIM103 (unnecessary bool) return False # The presence of a value is generally regarded as truthy, so we treat it as such return True diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py b/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py index a41280fc..139e309e 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py @@ -30,7 +30,7 @@ def __post_init__(self, parameters: Mapping[str, Any] | None) -> None: self._interpolation = JinjaInterpolation() self._parameters = parameters - def eval(self, config: Config, **additional_parameters: Any) -> dict[str, Any]: + def eval(self, config: Config, **additional_parameters: Any) -> dict[str, Any]: # noqa: ANN401 (any-type) """Wrapper around a Mapping[str, str] that allows for both keys and values to be interpolated. :param config: The user-provided configuration as specified by the source's spec @@ -50,7 +50,7 @@ def eval(self, config: Config, **additional_parameters: Any) -> dict[str, Any]: for name, value in self.mapping.items() } - def _eval(self, value: str, config: Config, **kwargs: Any) -> Any: + def _eval(self, value: str, config: Config, **kwargs: Any) -> Any: # noqa: ANN401 (any-type) # The values in self._mapping can be of Any type # We only want to interpolate them if they are strings if isinstance(value, str): diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py b/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py index c46dc0d7..8dd80643 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py @@ -36,12 +36,15 @@ def __post_init__(self, parameters: Mapping[str, Any] | None) -> None: self._interpolation = JinjaInterpolation() self._parameters = parameters - def eval(self, config: Config, **additional_parameters: Any) -> Any: + def eval(self, config: Config, **additional_parameters: Any) -> Any: # noqa: ANN401 (any-type) return self._eval(self.mapping, config, **additional_parameters) def _eval( - self, value: NestedMapping | NestedMappingEntry, config: Config, **kwargs: Any - ) -> Any: + self, + value: NestedMapping | NestedMappingEntry, + config: Config, + **kwargs: Any, # noqa: ANN401 (any-type) + ) -> Any: # noqa: ANN401 (any-type) # Recursively interpolate dictionaries and lists if isinstance(value, str): return self._interpolation.eval(value, config, parameters=self._parameters, **kwargs) diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py b/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py index 7e56afaf..6e8c6ec3 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py @@ -37,7 +37,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: # This allows for optimization, but we do not know it yet at this stage self._is_plain_string = None - def eval(self, config: Config, **kwargs: Any) -> Any: + def eval(self, config: Config, **kwargs: Any) -> Any: # noqa: ANN401 (any-type) """Interpolates the input string using the config and other optional arguments passed as parameter. :param config: The user-provided configuration as specified by the source's spec diff --git a/airbyte_cdk/sources/declarative/interpolation/interpolation.py b/airbyte_cdk/sources/declarative/interpolation/interpolation.py index 30cf5072..ca89beac 100644 --- a/airbyte_cdk/sources/declarative/interpolation/interpolation.py +++ b/airbyte_cdk/sources/declarative/interpolation/interpolation.py @@ -20,8 +20,8 @@ def eval( input_str: str, config: Config, default: str | None = None, - **additional_options: Any, - ) -> Any: + **additional_options: Any, # noqa: ANN401 (any-type) + ) -> Any: # noqa: ANN401 (any-type) """Interpolates the input string using the config, and additional options passed as parameter. :param input_str: The string to interpolate diff --git a/airbyte_cdk/sources/declarative/interpolation/jinja.py b/airbyte_cdk/sources/declarative/interpolation/jinja.py index 536f482a..5b148dd7 100644 --- a/airbyte_cdk/sources/declarative/interpolation/jinja.py +++ b/airbyte_cdk/sources/declarative/interpolation/jinja.py @@ -30,7 +30,7 @@ class StreamPartitionAccessEnvironment(SandboxedEnvironment): parameter """ - def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool: + def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool: # noqa: ANN401 (any-type) if attr == "_partition": return True return super().is_safe_attribute(obj, attr, value) # type: ignore # for some reason, mypy says 'Returning Any from function declared to return "bool"' @@ -54,7 +54,7 @@ class JinjaInterpolation(Interpolation): """ # These aliases are used to deprecate existing keywords without breaking all existing connectors. - ALIASES = { + ALIASES = { # noqa: RUF012 "stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values "stream_partition": "stream_slice", # Use stream_partition to access partition router's values } @@ -62,12 +62,14 @@ class JinjaInterpolation(Interpolation): # These extensions are not installed so they're not currently a problem, # but we're still explicitely removing them from the jinja context. # At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks - RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops + RESTRICTED_EXTENSIONS = [ # noqa: RUF012 (mutable class attribute) + "jinja2.ext.loopcontrols" + ] # Adds support for break continue in loops # By default, these Python builtin functions are available in the Jinja context. # We explicitely remove them because of the potential security risk. # Please add a unit test to test_jinja.py when adding a restriction. - RESTRICTED_BUILTIN_FUNCTIONS = [ + RESTRICTED_BUILTIN_FUNCTIONS = [ # noqa: RUF012 "range" ] # The range function can cause very expensive computations @@ -87,8 +89,8 @@ def eval( config: Config, default: str | None = None, valid_types: tuple[type[Any]] | None = None, - **additional_parameters: Any, - ) -> Any: + **additional_parameters: Any, # noqa: ANN401 (any-type) + ) -> Any: # noqa: ANN401 (any-type) context = {"config": config, **additional_parameters} for alias, equivalent in self.ALIASES.items(): @@ -107,13 +109,13 @@ def eval( return self._literal_eval(result, valid_types) else: # If input is not a string, return it as is - raise Exception(f"Expected a string, got {input_str}") + raise Exception(f"Expected a string, got {input_str}") # noqa: TRY002, TRY004 (should raise TypeError) except UndefinedError: pass # If result is empty or resulted in an undefined error, evaluate and return the default string return self._literal_eval(self._eval(default, context), valid_types) - def _literal_eval(self, result: str | None, valid_types: tuple[type[Any]] | None) -> Any: + def _literal_eval(self, result: str | None, valid_types: tuple[type[Any]] | None) -> Any: # noqa: ANN401 (any-type) try: evaluated = ast.literal_eval(result) # type: ignore # literal_eval is able to handle None except (ValueError, SyntaxError): @@ -136,13 +138,13 @@ def _eval(self, s: str | None, context: Mapping[str, Any]) -> str | None: # It can be returned as is return s - @cache + @cache # noqa: B019 (cached class methods can cause memory leaks) def _find_undeclared_variables(self, s: str | None) -> Template: """Find undeclared variables and cache them""" ast = self._environment.parse(s) # type: ignore # parse is able to handle None return meta.find_undeclared_variables(ast) - @cache + @cache # noqa: B019 (cached class methods can cause memory leaks) def _compile(self, s: str | None) -> Template: """We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader""" return self._environment.from_string(s) diff --git a/airbyte_cdk/sources/declarative/interpolation/macros.py b/airbyte_cdk/sources/declarative/interpolation/macros.py index cbc05003..91e1463b 100644 --- a/airbyte_cdk/sources/declarative/interpolation/macros.py +++ b/airbyte_cdk/sources/declarative/interpolation/macros.py @@ -70,7 +70,7 @@ def _str_to_datetime(s: str) -> datetime.datetime: return parsed_date.astimezone(pytz.utc) -def max(*args: typing.Any) -> typing.Any: +def max(*args: typing.Any) -> typing.Any: # noqa: ANN401, A001 (any-type, shadow builtin) """Returns biggest object of an iterable, or two or more arguments. max(iterable, *[, default=obj, key=func]) -> value @@ -89,7 +89,7 @@ def max(*args: typing.Any) -> typing.Any: return builtins.max(*args) -def day_delta(num_days: int, format: str = "%Y-%m-%dT%H:%M:%S.%f%z") -> str: +def day_delta(num_days: int, format: str = "%Y-%m-%dT%H:%M:%S.%f%z") -> str: # noqa: A002 (shadow builtin) """Returns datetime of now() + num_days Usage: @@ -113,7 +113,9 @@ def duration(datestring: str) -> datetime.timedelta | isodate.Duration: def format_datetime( - dt: str | datetime.datetime, format: str, input_format: str | None = None + dt: str | datetime.datetime, + format: str, # noqa: A002 (shadow builtin) + input_format: str | None = None, ) -> str: """Converts datetime to another format diff --git a/airbyte_cdk/sources/declarative/manifest_declarative_source.py b/airbyte_cdk/sources/declarative/manifest_declarative_source.py index 52642e91..aa08ecca 100644 --- a/airbyte_cdk/sources/declarative/manifest_declarative_source.py +++ b/airbyte_cdk/sources/declarative/manifest_declarative_source.py @@ -61,6 +61,7 @@ class ManifestDeclarativeSource(DeclarativeSource): def __init__( self, source_config: ConnectionDefinition, + *, debug: bool = False, emit_connector_builder_messages: bool = False, component_factory: ModelToComponentFactory | None = None, @@ -229,7 +230,7 @@ def _validate_source(self) -> None: except FileNotFoundError as e: raise FileNotFoundError( f"Failed to read manifest component json schema required for validation: {e}" - ) + ) from None streams = self._source_config.get("streams") if not streams: @@ -263,7 +264,7 @@ def _validate_source(self) -> None: f"The manifest version {manifest_version} is greater than the airbyte-cdk package version ({cdk_version}). Your " f"manifest may contain features that are not in the current CDK version." ) - if manifest_major == 0 and manifest_minor < 29: + if manifest_major == 0 and manifest_minor < 29: # noqa: PLR2004 (magic number) raise ValidationError( f"The low-code framework was promoted to Beta in airbyte-cdk version 0.29.0 and contains many breaking changes to the " f"language. The manifest version {manifest_version} is incompatible with the airbyte-cdk package version " @@ -274,7 +275,7 @@ def _validate_source(self) -> None: def _get_version_parts(version: str, version_type: str) -> tuple[int, int, int]: """Takes a semantic version represented as a string and splits it into a tuple of its major, minor, and patch versions.""" version_parts = re.split(r"\.", version) - if len(version_parts) != 3 or not all(part.isdigit() for part in version_parts): + if len(version_parts) != 3 or not all(part.isdigit() for part in version_parts): # noqa: PLR2004 (magic number) raise ValidationError( f"The {version_type} version {version} specified is not a valid version format (ex. 1.2.3)" ) diff --git a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py index a98a275e..81239d13 100644 --- a/airbyte_cdk/sources/declarative/models/declarative_component_schema.py +++ b/airbyte_cdk/sources/declarative/models/declarative_component_schema.py @@ -1,5 +1,9 @@ +# +# Copyright (c) 2024 Airbyte, Inc., all rights reserved. +# # generated by datamodel-codegen: # filename: declarative_component_schema.yaml +# ruff: noqa: FBT003 (generated file) from __future__ import annotations diff --git a/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py b/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py index 0ccffb50..15d99ad9 100644 --- a/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +++ b/airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py @@ -110,7 +110,7 @@ def preprocess_manifest(self, manifest: Mapping[str, Any]) -> Mapping[str, Any]: """ return self._evaluate_node(manifest, manifest, set()) # type: ignore[no-any-return] - def _evaluate_node(self, node: Any, manifest: Mapping[str, Any], visited: set[Any]) -> Any: + def _evaluate_node(self, node: Any, manifest: Mapping[str, Any], visited: set[Any]) -> Any: # noqa: ANN401 (any-type) if isinstance(node, dict): evaluated_dict = { k: self._evaluate_node(v, manifest, visited) @@ -136,7 +136,7 @@ def _evaluate_node(self, node: Any, manifest: Mapping[str, Any], visited: set[An return ret return node - def _lookup_ref_value(self, ref: str, manifest: Mapping[str, Any]) -> Any: + def _lookup_ref_value(self, ref: str, manifest: Mapping[str, Any]) -> Any: # noqa: ANN401 (any-type) ref_match = re.match(r"#/(.*)", ref) if not ref_match: raise ValueError(f"Invalid reference format {ref}") @@ -144,10 +144,10 @@ def _lookup_ref_value(self, ref: str, manifest: Mapping[str, Any]) -> Any: path = ref_match.groups()[0] return self._read_ref_value(path, manifest) except (AttributeError, KeyError, IndexError): - raise UndefinedReferenceException(path, ref) + raise UndefinedReferenceException(path, ref) from None @staticmethod - def _is_ref(node: Any) -> bool: + def _is_ref(node: Any) -> bool: # noqa: ANN401 (any-type) return isinstance(node, str) and node.startswith("#/") @staticmethod @@ -155,7 +155,7 @@ def _is_ref_key(key: str) -> bool: return bool(key == REF_TAG) @staticmethod - def _read_ref_value(ref: str, manifest_node: Mapping[str, Any]) -> Any: + def _read_ref_value(ref: str, manifest_node: Mapping[str, Any]) -> Any: # noqa: ANN401 (any-type) """Read the value at the referenced location of the manifest. References are ambiguous because one could define a key containing `/` diff --git a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py index c64d0ab2..4d83e07b 100644 --- a/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +++ b/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py @@ -365,13 +365,14 @@ ComponentDefinition = Mapping[str, Any] -class ModelToComponentFactory: +class ModelToComponentFactory: # noqa: PLR0904 (too many public methods) EPOCH_DATETIME_FORMAT = "%s" def __init__( self, limit_pages_fetched_per_slice: int | None = None, limit_slices_fetched: int | None = None, + *, emit_connector_builder_messages: bool = False, disable_retries: bool = False, disable_cache: bool = False, @@ -461,8 +462,8 @@ def create_component( model_type: type[BaseModel], component_definition: ComponentDefinition, config: Config, - **kwargs: Any, - ) -> Any: + **kwargs: Any, # noqa: ANN401 (any-type) + ) -> Any: # noqa: ANN401 (any-type) """Takes a given Pydantic model type and Mapping representing a component definition and creates a declarative component and subcomponents which will be used at runtime. This is done by first parsing the mapping into a Pydantic model and then creating creating declarative components from that model. @@ -481,7 +482,7 @@ def create_component( declarative_component_model = model_type.parse_obj(component_definition) if not isinstance(declarative_component_model, model_type): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"Expected {model_type.__name__} component, but received {declarative_component_model.__class__.__name__}" ) @@ -489,7 +490,12 @@ def create_component( model=declarative_component_model, config=config, **kwargs ) - def _create_component_from_model(self, model: BaseModel, config: Config, **kwargs: Any) -> Any: + def _create_component_from_model( + self, + model: BaseModel, + config: Config, + **kwargs: Any, # noqa: ANN401 (any-type) + ) -> Any: # noqa: ANN401 (any-type) if model.__class__ not in self.PYDANTIC_MODEL_TO_CONSTRUCTOR: raise ValueError( f"{model.__class__} with attributes {model} is not a valid component type" @@ -501,7 +507,9 @@ def _create_component_from_model(self, model: BaseModel, config: Config, **kwarg @staticmethod def create_added_field_definition( - model: AddedFieldDefinitionModel, config: Config, **kwargs: Any + model: AddedFieldDefinitionModel, + config: Config, # noqa: ARG004 (unused) + **kwargs: Any, # noqa: ANN401 (any-type) ) -> AddedFieldDefinition: interpolated_value = InterpolatedString.create( model.value, parameters=model.parameters or {} @@ -513,7 +521,12 @@ def create_added_field_definition( parameters=model.parameters or {}, ) - def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any) -> AddFields: + def create_add_fields( + self, + model: AddFieldsModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) + ) -> AddFields: added_field_definitions = [ self._create_component_from_model( model=added_field_definition_model, @@ -527,7 +540,10 @@ def create_add_fields(self, model: AddFieldsModel, config: Config, **kwargs: Any return AddFields(fields=added_field_definitions, parameters=model.parameters or {}) def create_keys_to_lower_transformation( - self, model: KeysToLowerModel, config: Config, **kwargs: Any + self, + model: KeysToLowerModel, # noqa: ARG002 (unused) + config: Config, # noqa: ARG002 (unused) + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> KeysToLowerTransformation: return KeysToLowerTransformation() @@ -548,7 +564,7 @@ def create_api_key_authenticator( model: ApiKeyAuthenticatorModel, config: Config, token_provider: TokenProvider | None = None, - **kwargs: Any, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> ApiKeyAuthenticator: if model.inject_into is None and model.header is None: raise ValueError( @@ -560,7 +576,7 @@ def create_api_key_authenticator( "inject_into and header cannot be set both for ApiKeyAuthenticator - remove the deprecated header option" ) - if token_provider is not None and model.api_token != "": + if token_provider is not None and model.api_token != "": # noqa: PLC1901 (compare to empty string) raise ValueError( "If token_provider is set, api_token is ignored and has to be set to empty string." ) @@ -595,20 +611,20 @@ def create_api_key_authenticator( def create_legacy_to_per_partition_state_migration( self, - model: LegacyToPerPartitionStateMigrationModel, + model: LegacyToPerPartitionStateMigrationModel, # noqa: ARG002 (unused) config: Mapping[str, Any], declarative_stream: DeclarativeStreamModel, ) -> LegacyToPerPartitionStateMigration: retriever = declarative_stream.retriever if not isinstance(retriever, SimpleRetrieverModel): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"LegacyToPerPartitionStateMigrations can only be applied on a DeclarativeStream with a SimpleRetriever. Got {type(retriever)}" ) partition_router = retriever.partition_router if not isinstance( partition_router, SubstreamPartitionRouterModel | CustomPartitionRouterModel ): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"LegacyToPerPartitionStateMigrations can only be applied on a SimpleRetriever with a Substream partition router. Got {type(partition_router)}" ) if not hasattr(partition_router, "parent_stream_configs"): @@ -624,7 +640,11 @@ def create_legacy_to_per_partition_state_migration( ) # type: ignore # The retriever type was already checked def create_session_token_authenticator( - self, model: SessionTokenAuthenticatorModel, config: Config, name: str, **kwargs: Any + self, + model: SessionTokenAuthenticatorModel, + config: Config, + name: str, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> ApiKeyAuthenticator | BearerAuthenticator: decoder = ( self._create_component_from_model(model=model.decoder, config=config) @@ -665,7 +685,9 @@ def create_session_token_authenticator( @staticmethod def create_basic_http_authenticator( - model: BasicHttpAuthenticatorModel, config: Config, **kwargs: Any + model: BasicHttpAuthenticatorModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> BasicHttpAuthenticator: return BasicHttpAuthenticator( password=model.password or "", @@ -679,9 +701,9 @@ def create_bearer_authenticator( model: BearerAuthenticatorModel, config: Config, token_provider: TokenProvider | None = None, - **kwargs: Any, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> BearerAuthenticator: - if token_provider is not None and model.api_token != "": + if token_provider is not None and model.api_token != "": # noqa: PLC1901 (compare to empty string) raise ValueError( "If token_provider is set, api_token is ignored and has to be set to empty string." ) @@ -700,12 +722,20 @@ def create_bearer_authenticator( ) @staticmethod - def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream: + def create_check_stream( + model: CheckStreamModel, + config: Config, # noqa: ARG004 (unused) + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) + ) -> CheckStream: return CheckStream(stream_names=model.stream_names, parameters={}) def create_composite_error_handler( - self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any + self, + model: CompositeErrorHandlerModel, + config: Config, + **kwargs: Any, # noqa: ANN401 (any-type) ) -> CompositeErrorHandler: + _ = kwargs # unused error_handlers = [ self._create_component_from_model(model=error_handler_model, config=config) for error_handler_model in model.error_handlers @@ -716,8 +746,11 @@ def create_composite_error_handler( @staticmethod def create_concurrency_level( - model: ConcurrencyLevelModel, config: Config, **kwargs: Any + model: ConcurrencyLevelModel, + config: Config, + **kwargs: Any, # noqa: ANN401 (any-type) ) -> ConcurrencyLevel: + _ = kwargs # unused return ConcurrencyLevel( default_concurrency=model.default_concurrency, max_concurrency=model.max_concurrency, @@ -725,7 +758,7 @@ def create_concurrency_level( parameters={}, ) - def create_concurrent_cursor_from_datetime_based_cursor( + def create_concurrent_cursor_from_datetime_based_cursor( # noqa: PLR0914 self, state_manager: ConnectorStateManager, model_type: type[BaseModel], @@ -734,8 +767,9 @@ def create_concurrent_cursor_from_datetime_based_cursor( stream_namespace: str | None, config: Config, stream_state: MutableMapping[str, Any], - **kwargs: Any, + **kwargs: Any, # noqa: ANN401 (any-type) ) -> tuple[ConcurrentCursor, DateTimeStreamStateConverter]: + _ = kwargs # unused component_type = component_definition.get("type") if component_definition.get("type") != model_type.__name__: raise ValueError( @@ -745,7 +779,7 @@ def create_concurrent_cursor_from_datetime_based_cursor( datetime_based_cursor_model = model_type.parse_obj(component_definition) if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}" ) @@ -887,7 +921,9 @@ def create_concurrent_cursor_from_datetime_based_cursor( @staticmethod def create_constant_backoff_strategy( - model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any + model: ConstantBackoffStrategyModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> ConstantBackoffStrategy: return ConstantBackoffStrategy( backoff_time_in_seconds=model.backoff_time_in_seconds, @@ -896,17 +932,21 @@ def create_constant_backoff_strategy( ) def create_cursor_pagination( - self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any + self, + model: CursorPaginationModel, + config: Config, + decoder: Decoder, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> CursorPaginationStrategy: if isinstance(decoder, PaginationDecoderDecorator): if not isinstance(decoder.decoder, JsonDecoder | XmlDecoder): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." ) decoder_to_use = decoder else: if not isinstance(decoder, JsonDecoder | XmlDecoder): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." ) decoder_to_use = PaginationDecoderDecorator(decoder=decoder) @@ -920,7 +960,7 @@ def create_cursor_pagination( parameters=model.parameters or {}, ) - def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: + def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> Any: # noqa: ANN401 (any-type) """Generically creates a custom component based on the model type and a class_name reference to the custom Python class being instantiated. Only the model's additional properties that match the custom class definition are passed to the constructor :param model: The Pydantic model of the custom component being created @@ -980,17 +1020,17 @@ def create_custom_component(self, model: Any, config: Config, **kwargs: Any) -> return custom_component_class(**kwargs) @staticmethod - def _get_class_from_fully_qualified_class_name(full_qualified_class_name: str) -> Any: + def _get_class_from_fully_qualified_class_name(full_qualified_class_name: str) -> Any: # noqa: ANN401 (any-type) split = full_qualified_class_name.split(".") module = ".".join(split[:-1]) class_name = split[-1] try: return getattr(importlib.import_module(module), class_name) except AttributeError: - raise ValueError(f"Could not load class {full_qualified_class_name}.") + raise ValueError(f"Could not load class {full_qualified_class_name}.") from None @staticmethod - def _derive_component_type_from_type_hints(field_type: Any) -> str | None: + def _derive_component_type_from_type_hints(field_type: Any) -> str | None: # noqa: ANN401 (any-type) interface = field_type while True: origin = get_origin(interface) @@ -1007,7 +1047,7 @@ def _derive_component_type_from_type_hints(field_type: Any) -> str | None: return None @staticmethod - def is_builtin_type(cls: type[Any] | None) -> bool: + def is_builtin_type(cls: type[Any] | None) -> bool: # noqa: PLW0211 (static method should not use cls) if not cls: return False return cls.__module__ == "builtins" @@ -1020,8 +1060,12 @@ def _extract_missing_parameters(error: TypeError) -> list[str]: return [] def _create_nested_component( - self, model: Any, model_field: str, model_value: Any, config: Config - ) -> Any: + self, + model: Any, # noqa: ANN401 (any-type) + model_field: str, # noqa: ARG002 (unused) + model_value: Any, # noqa: ANN401 (any-type) + config: Config, + ) -> Any: # noqa: ANN401 (any-type) type_name = model_value.get("type", None) if not type_name: # If no type is specified, we can assume this is a dictionary object which can be returned instead of a subcomponent @@ -1057,21 +1101,24 @@ def _create_nested_component( f"{type_name}.$parameters.{parameter}" for parameter in missing_parameters ) - ) + ) from None raise TypeError( f"Error creating component '{type_name}' with parent custom component {model.class_name}: {error}" - ) + ) from None else: raise ValueError( f"Error creating custom component {model.class_name}. Subcomponent creation has not been implemented for '{type_name}'" ) @staticmethod - def _is_component(model_value: Any) -> bool: + def _is_component(model_value: Any) -> bool: # noqa: ANN401 (any-type) return isinstance(model_value, dict) and model_value.get("type") is not None def create_datetime_based_cursor( - self, model: DatetimeBasedCursorModel, config: Config, **kwargs: Any + self, + model: DatetimeBasedCursorModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> DatetimeBasedCursor: start_datetime: str | MinMaxDatetime = ( model.start_datetime @@ -1131,7 +1178,10 @@ def create_datetime_based_cursor( ) def create_declarative_stream( - self, model: DeclarativeStreamModel, config: Config, **kwargs: Any + self, + model: DeclarativeStreamModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> DeclarativeStream: # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the @@ -1209,7 +1259,7 @@ def create_declarative_stream( transformations = [] if model.transformations: for transformation_model in model.transformations: - transformations.append( + transformations.append( # noqa: PERF401 (consider list comprehension) self._create_component_from_model(model=transformation_model, config=config) ) retriever = self._create_component_from_model( @@ -1326,19 +1376,22 @@ def _merge_stream_slicers( return None def create_default_error_handler( - self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any + self, + model: DefaultErrorHandlerModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> DefaultErrorHandler: backoff_strategies = [] if model.backoff_strategies: for backoff_strategy_model in model.backoff_strategies: - backoff_strategies.append( + backoff_strategies.append( # noqa: PERF401 (consider list comprehension) self._create_component_from_model(model=backoff_strategy_model, config=config) ) response_filters = [] if model.response_filters: for response_filter_model in model.response_filters: - response_filters.append( + response_filters.append( # noqa: PERF401 (consider list comprehension) self._create_component_from_model(model=response_filter_model, config=config) ) response_filters.append( @@ -1405,7 +1458,7 @@ def create_dpath_extractor( model: DpathExtractorModel, config: Config, decoder: Decoder | None = None, - **kwargs: Any, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> DpathExtractor: decoder_to_use = decoder or JsonDecoder(parameters={}) model_field_path: list[InterpolatedString | str] = list(model.field_path) @@ -1482,7 +1535,9 @@ def create_http_requester( @staticmethod def create_http_response_filter( - model: HttpResponseFilterModel, config: Config, **kwargs: Any + model: HttpResponseFilterModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> HttpResponseFilter: action = ResponseAction(model.action.value) if model.action else None @@ -1505,33 +1560,49 @@ def create_http_response_filter( @staticmethod def create_inline_schema_loader( - model: InlineSchemaLoaderModel, config: Config, **kwargs: Any + model: InlineSchemaLoaderModel, + config: Config, # noqa: ARG004 (unused) + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> InlineSchemaLoader: return InlineSchemaLoader(schema=model.schema_ or {}, parameters={}) @staticmethod - def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder: + def create_json_decoder( + model: JsonDecoderModel, # noqa: ARG004 (unused) + config: Config, # noqa: ARG004 (unused) + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) + ) -> JsonDecoder: return JsonDecoder(parameters={}) @staticmethod def create_jsonl_decoder( - model: JsonlDecoderModel, config: Config, **kwargs: Any + model: JsonlDecoderModel, # noqa: ARG004 (unused) + config: Config, # noqa: ARG004 (unused) + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> JsonlDecoder: return JsonlDecoder(parameters={}) @staticmethod def create_iterable_decoder( - model: IterableDecoderModel, config: Config, **kwargs: Any + model: IterableDecoderModel, # noqa: ARG004 (unused) + config: Config, # noqa: ARG004 (unused) + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> IterableDecoder: return IterableDecoder(parameters={}) @staticmethod - def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder: + def create_xml_decoder( + model: XmlDecoderModel, # noqa: ARG004 (unused) + config: Config, # noqa: ARG004 (unused) + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) + ) -> XmlDecoder: return XmlDecoder(parameters={}) @staticmethod def create_json_file_schema_loader( - model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any + model: JsonFileSchemaLoaderModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> JsonFileSchemaLoader: return JsonFileSchemaLoader( file_path=model.file_path or "", config=config, parameters=model.parameters or {} @@ -1539,7 +1610,9 @@ def create_json_file_schema_loader( @staticmethod def create_jwt_authenticator( - model: JwtAuthenticatorModel, config: Config, **kwargs: Any + model: JwtAuthenticatorModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> JwtAuthenticator: jwt_headers = model.jwt_headers or JwtHeadersModel(kid=None, typ="JWT", cty=None) jwt_payload = model.jwt_payload or JwtPayloadModel(iss=None, sub=None, aud=None) @@ -1563,7 +1636,9 @@ def create_jwt_authenticator( @staticmethod def create_list_partition_router( - model: ListPartitionRouterModel, config: Config, **kwargs: Any + model: ListPartitionRouterModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> ListPartitionRouter: request_option = ( RequestOption( @@ -1584,7 +1659,9 @@ def create_list_partition_router( @staticmethod def create_min_max_datetime( - model: MinMaxDatetimeModel, config: Config, **kwargs: Any + model: MinMaxDatetimeModel, + config: Config, # noqa: ARG004 (unused) + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> MinMaxDatetime: return MinMaxDatetime( datetime=model.datetime, @@ -1595,17 +1672,26 @@ def create_min_max_datetime( ) @staticmethod - def create_no_auth(model: NoAuthModel, config: Config, **kwargs: Any) -> NoAuth: + def create_no_auth( + model: NoAuthModel, + config: Config, # noqa: ARG004 (unused) + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) + ) -> NoAuth: return NoAuth(parameters=model.parameters or {}) @staticmethod def create_no_pagination( - model: NoPaginationModel, config: Config, **kwargs: Any + model: NoPaginationModel, # noqa: ARG004 (unused) + config: Config, # noqa: ARG004 (unused) + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> NoPagination: return NoPagination(parameters={}) def create_oauth_authenticator( - self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any + self, + model: OAuthAuthenticatorModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> DeclarativeOauth2Authenticator: if model.refresh_token_updater: # ignore type error because fixing it would have a lot of dependencies, revisit later @@ -1664,17 +1750,20 @@ def create_oauth_authenticator( @staticmethod def create_offset_increment( - model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any + model: OffsetIncrementModel, + config: Config, + decoder: Decoder, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> OffsetIncrement: if isinstance(decoder, PaginationDecoderDecorator): if not isinstance(decoder.decoder, JsonDecoder | XmlDecoder): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." ) decoder_to_use = decoder else: if not isinstance(decoder, JsonDecoder | XmlDecoder): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead." ) decoder_to_use = PaginationDecoderDecorator(decoder=decoder) @@ -1688,7 +1777,9 @@ def create_offset_increment( @staticmethod def create_page_increment( - model: PageIncrementModel, config: Config, **kwargs: Any + model: PageIncrementModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> PageIncrement: return PageIncrement( page_size=model.page_size, @@ -1699,7 +1790,10 @@ def create_page_increment( ) def create_parent_stream_config( - self, model: ParentStreamConfigModel, config: Config, **kwargs: Any + self, + model: ParentStreamConfigModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> ParentStreamConfig: declarative_stream = self._create_component_from_model(model.stream, config=config) request_option = ( @@ -1720,19 +1814,27 @@ def create_parent_stream_config( @staticmethod def create_record_filter( - model: RecordFilterModel, config: Config, **kwargs: Any + model: RecordFilterModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> RecordFilter: return RecordFilter( condition=model.condition or "", config=config, parameters=model.parameters or {} ) @staticmethod - def create_request_path(model: RequestPathModel, config: Config, **kwargs: Any) -> RequestPath: + def create_request_path( + model: RequestPathModel, # noqa: ARG004 (unused) + config: Config, # noqa: ARG004 (unused) + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) + ) -> RequestPath: return RequestPath(parameters={}) @staticmethod def create_request_option( - model: RequestOptionModel, config: Config, **kwargs: Any + model: RequestOptionModel, + config: Config, # noqa: ARG004 (unused) + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> RequestOption: inject_into = RequestOptionType(model.inject_into.value) return RequestOption(field_name=model.field_name, inject_into=inject_into, parameters={}) @@ -1745,7 +1847,7 @@ def create_record_selector( transformations: list[RecordTransformation], decoder: Decoder | None = None, client_side_incremental_sync: dict[str, Any] | None = None, - **kwargs: Any, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> RecordSelector: assert model.schema_normalization is not None # for mypy extractor = self._create_component_from_model( @@ -1780,14 +1882,19 @@ def create_record_selector( @staticmethod def create_remove_fields( - model: RemoveFieldsModel, config: Config, **kwargs: Any + model: RemoveFieldsModel, + config: Config, # noqa: ARG004 (unused) + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> RemoveFields: return RemoveFields( field_pointers=model.field_pointers, condition=model.condition or "", parameters={} ) def create_selective_authenticator( - self, model: SelectiveAuthenticatorModel, config: Config, **kwargs: Any + self, + model: SelectiveAuthenticatorModel, + config: Config, + **kwargs: Any, # noqa: ANN401 (any-type) ) -> DeclarativeAuthenticator: authenticators = { name: self._create_component_from_model(model=auth, config=config) @@ -1803,7 +1910,11 @@ def create_selective_authenticator( @staticmethod def create_legacy_session_token_authenticator( - model: LegacySessionTokenAuthenticatorModel, config: Config, *, url_base: str, **kwargs: Any + model: LegacySessionTokenAuthenticatorModel, + config: Config, + *, + url_base: str, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> LegacySessionTokenAuthenticator: return LegacySessionTokenAuthenticator( api_url=url_base, @@ -1818,7 +1929,7 @@ def create_legacy_session_token_authenticator( parameters=model.parameters or {}, ) - def create_simple_retriever( + def create_simple_retriever( # noqa: PLR0913 self, model: SimpleRetrieverModel, config: Config, @@ -1916,7 +2027,10 @@ def create_simple_retriever( ) def _create_async_job_status_mapping( - self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any + self, + model: AsyncJobStatusMapModel, + config: Config, # noqa: ARG002 (unused) + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> Mapping[str, AsyncJobStatus]: api_status_to_cdk_status = {} for cdk_status, api_statuses in model.dict().items(): @@ -1951,14 +2065,14 @@ def create_async_retriever( config: Config, *, name: str, - primary_key: str + primary_key: str # noqa: ARG002 (unused) | list[str] | list[list[str]] | None, # this seems to be needed to match create_simple_retriever stream_slicer: StreamSlicer | None, client_side_incremental_sync: dict[str, Any] | None = None, transformations: list[RecordTransformation], - **kwargs: Any, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> AsyncRetriever: decoder = ( self._create_component_from_model(model=model.decoder, config=config) @@ -2057,9 +2171,9 @@ def create_async_retriever( stream_slices, JobTracker( 1 - ), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1 + ), # TODO: eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1 # noqa: FIX001, TD001, TD004 self._message_repository, - has_bulk_parent=False, # FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk + has_bulk_parent=False, # TODO: work would need to be done here in order to detect if a stream as a parent stream that is bulk # noqa: FIX001, TD001, TD004 ), record_selector=record_selector, stream_slicer=stream_slicer, @@ -2068,7 +2182,7 @@ def create_async_retriever( ) @staticmethod - def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec: + def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec: # noqa: ANN401, ARG004 (any-type, unused) return Spec( connection_specification=model.connection_specification, documentation_url=model.documentation_url, @@ -2077,7 +2191,10 @@ def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec: ) def create_substream_partition_router( - self, model: SubstreamPartitionRouterModel, config: Config, **kwargs: Any + self, + model: SubstreamPartitionRouterModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> SubstreamPartitionRouter: parent_stream_configs = [] if model.parent_stream_configs: @@ -2098,7 +2215,7 @@ def create_substream_partition_router( def _create_message_repository_substream_wrapper( self, model: ParentStreamConfigModel, config: Config - ) -> Any: + ) -> Any: # noqa: ANN401 (any-type) substream_factory = ModelToComponentFactory( limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice, limit_slices_fetched=self._limit_slices_fetched, @@ -2111,11 +2228,15 @@ def _create_message_repository_substream_wrapper( self._evaluate_log_level(self._emit_connector_builder_messages), ), ) - return substream_factory._create_component_from_model(model=model, config=config) + return substream_factory._create_component_from_model( # noqa: SLF001 (private member) + model=model, config=config + ) @staticmethod def create_wait_time_from_header( - model: WaitTimeFromHeaderModel, config: Config, **kwargs: Any + model: WaitTimeFromHeaderModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> WaitTimeFromHeaderBackoffStrategy: return WaitTimeFromHeaderBackoffStrategy( header=model.header, @@ -2129,7 +2250,9 @@ def create_wait_time_from_header( @staticmethod def create_wait_until_time_from_header( - model: WaitUntilTimeFromHeaderModel, config: Config, **kwargs: Any + model: WaitUntilTimeFromHeaderModel, + config: Config, + **kwargs: Any, # noqa: ANN401, ARG004 (any-type, unused) ) -> WaitUntilTimeFromHeaderBackoffStrategy: return WaitUntilTimeFromHeaderBackoffStrategy( header=model.header, @@ -2142,5 +2265,5 @@ def create_wait_until_time_from_header( def get_message_repository(self) -> MessageRepository: return self._message_repository - def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: + def _evaluate_log_level(self, emit_connector_builder_messages: bool) -> Level: # noqa: FBT001 (positional bool) return Level.DEBUG if emit_connector_builder_messages else Level.INFO diff --git a/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py index 5f9b2e07..a7299c5e 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py @@ -52,36 +52,36 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def get_request_params( self, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.request_parameter, stream_slice) def get_request_headers( self, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.header, stream_slice) def get_request_body_data( self, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.body_data, stream_slice) def get_request_body_json( self, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.body_json, stream_slice) diff --git a/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py index 8b01497b..a1955b69 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py @@ -22,33 +22,33 @@ class SinglePartitionRouter(PartitionRouter): def get_request_params( self, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return {} def get_request_headers( self, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return {} def get_request_body_data( self, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return {} def get_request_body_json( self, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return {} diff --git a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py index e4be83b7..73b50953 100644 --- a/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +++ b/airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py @@ -83,36 +83,36 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def get_request_params( self, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.request_parameter, stream_slice) def get_request_headers( self, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.header, stream_slice) def get_request_body_data( self, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.body_data, stream_slice) def get_request_body_json( self, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: # Pass the stream_slice from the argument, not the cursor because the cursor is updated after processing the response return self._get_request_option(RequestOptionType.body_json, stream_slice) @@ -177,7 +177,7 @@ def stream_slices(self) -> Iterable[StreamSlice]: f"Parent stream {parent_stream.name} returns records of type AirbyteMessage. This SubstreamPartitionRouter is not able to checkpoint incremental parent state." ) if parent_record.type == MessageType.RECORD: - parent_record = parent_record.record.data # type: ignore[union-attr, assignment] # record is always a Record + parent_record = parent_record.record.data # type: ignore[union-attr, assignment] # record is always a Record # noqa: PLW2901 (redefined loop var) else: continue elif isinstance(parent_record, Record): @@ -186,7 +186,7 @@ def stream_slices(self) -> Iterable[StreamSlice]: if parent_record.associated_slice else {} ) - parent_record = parent_record.data + parent_record = parent_record.data # noqa: PLW2901 (redefined loop var) elif not isinstance(parent_record, Mapping): # The parent_record should only take the form of a Record, AirbyteMessage, or Mapping. Anything else is invalid raise AirbyteTracedException( diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py index bb9a87c8..29dcfe7e 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py @@ -44,7 +44,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def backoff_time( self, - response_or_exception: requests.Response | requests.RequestException | None, - attempt_count: int, + response_or_exception: requests.Response | requests.RequestException | None, # noqa: ARG002 (unused) + attempt_count: int, # noqa: ARG002 (unused) ) -> float | None: return self.backoff_time_in_seconds.eval(self.config) # type: ignore # backoff_time_in_seconds is always cast to an interpolated string diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py index b5d68dca..90021e11 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py @@ -44,7 +44,7 @@ def _retry_factor(self) -> float: def backoff_time( self, - response_or_exception: requests.Response | requests.RequestException | None, + response_or_exception: requests.Response | requests.RequestException | None, # noqa: ARG002 (unused) attempt_count: int, ) -> float | None: return self._retry_factor * 2**attempt_count # type: ignore # factor is always cast to an interpolated string diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py index 1bd40133..01156455 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py @@ -51,7 +51,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def backoff_time( self, response_or_exception: requests.Response | requests.RequestException | None, - attempt_count: int, + attempt_count: int, # noqa: ARG002 (unused) ) -> float | None: header = self.header.eval(config=self.config) # type: ignore # header is always cast to an interpolated stream if self.regex: diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py index 5b86e8d5..0d6df2f8 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py @@ -54,7 +54,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def backoff_time( self, response_or_exception: requests.Response | requests.RequestException | None, - attempt_count: int, + attempt_count: int, # noqa: ARG002 (unused) ) -> float | None: now = time.time() header = self.header.eval(self.config) # type: ignore # header is always cast to an interpolated string diff --git a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py index b004098f..3fa17733 100644 --- a/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +++ b/airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py @@ -123,7 +123,7 @@ def interpret_response( ) if matched_error_resolution: return matched_error_resolution - if isinstance(response_or_exception, requests.Response): + if isinstance(response_or_exception, requests.Response): # noqa: SIM102 (collapsible-if) if response_or_exception.ok: return SUCCESS_RESOLUTION diff --git a/airbyte_cdk/sources/declarative/requesters/http_job_repository.py b/airbyte_cdk/sources/declarative/requesters/http_job_repository.py index 465634b0..0d664f9a 100644 --- a/airbyte_cdk/sources/declarative/requesters/http_job_repository.py +++ b/airbyte_cdk/sources/declarative/requesters/http_job_repository.py @@ -160,13 +160,13 @@ def update_jobs_status(self, jobs: Iterable[AsyncJob]) -> None: lazy_log( LOGGER, logging.DEBUG, - lambda: f"Status of job {job.api_job_id()} changed from {job.status()} to {job_status}", + lambda: f"Status of job {job.api_job_id()} changed from {job.status()} to {job_status}", # noqa: B023 (function uses loop var) ) else: lazy_log( LOGGER, logging.DEBUG, - lambda: f"Status of job {job.api_job_id()} is still {job.status()}", + lambda: f"Status of job {job.api_job_id()} is still {job.status()}", # noqa: B023 (function uses loop var) ) job.update_status(job_status) diff --git a/airbyte_cdk/sources/declarative/requesters/http_requester.py b/airbyte_cdk/sources/declarative/requesters/http_requester.py index d1443a14..112eb8d8 100644 --- a/airbyte_cdk/sources/declarative/requesters/http_requester.py +++ b/airbyte_cdk/sources/declarative/requesters/http_requester.py @@ -61,7 +61,7 @@ class HttpRequester(Requester): request_options_provider: InterpolatedRequestOptionsProvider | None = None error_handler: ErrorHandler | None = None disable_retries: bool = False - message_repository: MessageRepository = NoopMessageRepository() + message_repository: MessageRepository = NoopMessageRepository() # noqa: RUF009 (function in default arg) use_cache: bool = False _exit_on_rate_limit: bool = False stream_response: bool = False @@ -115,7 +115,7 @@ def get_authenticator(self) -> DeclarativeAuthenticator: return self._authenticator def get_url_base(self) -> str: - return os.path.join(self._url_base.eval(self.config), "") + return os.path.join(self._url_base.eval(self.config), "") # noqa: PTH118 (prefer pathlib) def get_path( self, @@ -234,7 +234,7 @@ def _request_headers( extra_headers, ) if isinstance(headers, str): - raise ValueError("Request headers cannot be a string") + raise ValueError("Request headers cannot be a string") # noqa: TRY004 (expected TypeError) return {str(k): str(v) for k, v in headers.items()} def _request_params( @@ -257,11 +257,11 @@ def _request_params( extra_params, ) if isinstance(options, str): - raise ValueError("Request params cannot be a string") + raise ValueError("Request params cannot be a string") # noqa: TRY004 (expected TypeError) for k, v in options.items(): if isinstance(v, dict): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"Invalid value for `{k}` parameter. The values of request params cannot be an object." ) @@ -313,14 +313,14 @@ def _request_body_json( extra_body_json, ) if isinstance(options, str): - raise ValueError("Request body json cannot be a string") + raise ValueError("Request body json cannot be a string") # noqa: TRY004 (expected TypeError) return options @classmethod def _join_url(cls, url_base: str, path: str) -> str: return urljoin(url_base, path) - def send_request( + def send_request( # noqa: PLR0913, PLR0917 (too many args) self, stream_state: StreamState | None = None, stream_slice: StreamSlice | None = None, diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py index 66fb4fb4..b8329d28 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py @@ -142,40 +142,40 @@ def path(self) -> str | None: def get_request_params( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> MutableMapping[str, Any]: return self._get_request_options(RequestOptionType.request_parameter) def get_request_headers( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, str]: return self._get_request_options(RequestOptionType.header) def get_request_body_data( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.body_data) def get_request_body_json( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.body_json) - def reset(self, reset_value: Any | None = None) -> None: + def reset(self, reset_value: Any | None = None) -> None: # noqa: ANN401 (any-type) if reset_value: self.pagination_strategy.reset(reset_value=reset_value) else: @@ -275,6 +275,6 @@ def get_request_body_json( stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token ) - def reset(self, reset_value: Any | None = None) -> None: + def reset(self, reset_value: Any | None = None) -> None: # noqa: ANN401, ARG002 (any-type, unused-argument) self._decorated.reset() self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py b/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py index f5205227..cd94995a 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py @@ -29,44 +29,47 @@ def path(self) -> str | None: def get_request_params( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> MutableMapping[str, Any]: return {} def get_request_headers( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, str]: return {} def get_request_body_data( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any] | str: return {} def get_request_body_json( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return {} def next_page_token( - self, response: requests.Response, last_page_size: int, last_record: Record | None + self, + response: requests.Response, # noqa: ARG002 (unused) + last_page_size: int, # noqa: ARG002 (unused) + last_record: Record | None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return {} - def reset(self, reset_value: Any | None = None) -> None: + def reset(self, reset_value: Any | None = None) -> None: # noqa: ANN401 (any-type) # No state to reset pass diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py b/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py index e2131aa3..b58f0484 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py @@ -29,7 +29,7 @@ class Paginator(ABC, RequestOptionsProvider): """ @abstractmethod - def reset(self, reset_value: Any | None = None) -> None: + def reset(self, reset_value: Any | None = None) -> None: # noqa: ANN401 (any-type) """Reset the pagination's inner state""" @abstractmethod diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py index ff8c2ef9..f4791286 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py @@ -61,12 +61,12 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._stop_condition = self.stop_condition @property - def initial_token(self) -> Any | None: + def initial_token(self) -> Any | None: # noqa: ANN401 (any-type) return self._initial_cursor def next_page_token( self, response: requests.Response, last_page_size: int, last_record: Record | None - ) -> Any | None: + ) -> Any | None: # noqa: ANN401 (any-type) decoded_response = next(self.decoder.decode(response)) # The default way that link is presented in requests.Response is a string of various links (last, next, etc). This @@ -92,7 +92,7 @@ def next_page_token( ) return token or None - def reset(self, reset_value: Any | None = None) -> None: + def reset(self, reset_value: Any | None = None) -> None: # noqa: ANN401 (any-type) self._initial_cursor = reset_value def get_page_size(self) -> int | None: diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py index e38dcaf8..208b9138 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py @@ -67,14 +67,17 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: self._page_size = None @property - def initial_token(self) -> Any | None: + def initial_token(self) -> Any | None: # noqa: ANN401 (any-type) if self.inject_on_first_request: return self._offset return None def next_page_token( - self, response: requests.Response, last_page_size: int, last_record: Record | None - ) -> Any | None: + self, + response: requests.Response, + last_page_size: int, + last_record: Record | None, # noqa: ARG002 (unused) + ) -> Any | None: # noqa: ANN401 (any-type) decoded_response = next(self.decoder.decode(response)) # Stop paginating when there are fewer records than the page size or the current page has no records @@ -86,9 +89,9 @@ def next_page_token( self._offset += last_page_size return self._offset - def reset(self, reset_value: Any | None = 0) -> None: + def reset(self, reset_value: Any | None = 0) -> None: # noqa: ANN401 (any-type) if not isinstance(reset_value, int): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"Reset value {reset_value} for OffsetIncrement pagination strategy was not an integer" ) self._offset = reset_value @@ -97,6 +100,6 @@ def get_page_size(self) -> int | None: if self._page_size: page_size = self._page_size.eval(self.config) if not isinstance(page_size, int): - raise Exception(f"{page_size} is of type {type(page_size)}. Expected {int}") + raise Exception(f"{page_size} is of type {type(page_size)}. Expected {int}") # noqa: TRY002 (vanilla exception) return page_size return None diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py index e039740c..e2265e40 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py @@ -42,25 +42,28 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: else: page_size = InterpolatedString(self.page_size, parameters=parameters).eval(self.config) if not isinstance(page_size, int): - raise Exception(f"{page_size} is of type {type(page_size)}. Expected {int}") + raise Exception(f"{page_size} is of type {type(page_size)}. Expected {int}") # noqa: TRY002 (vanilla exception) self._page_size = page_size @property - def initial_token(self) -> Any | None: + def initial_token(self) -> Any | None: # noqa: ANN401 (any-type) if self.inject_on_first_request: return self._page return None def next_page_token( - self, response: requests.Response, last_page_size: int, last_record: Record | None - ) -> Any | None: + self, + response: requests.Response, # noqa: ARG002 (unused) + last_page_size: int, + last_record: Record | None, # noqa: ARG002 (unused) + ) -> Any | None: # noqa: ANN401 (any-type) # Stop paginating when there are fewer records than the page size or the current page has no records if (self._page_size and last_page_size < self._page_size) or last_page_size == 0: return None self._page += 1 return self._page - def reset(self, reset_value: Any | None = None) -> None: + def reset(self, reset_value: Any | None = None) -> None: # noqa: ANN401 (any-type) if reset_value is None: self._page = self.start_from_page elif not isinstance(reset_value, int): diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py index 9b21c717..9002e7bf 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py @@ -20,13 +20,13 @@ class PaginationStrategy: @property @abstractmethod - def initial_token(self) -> Any | None: + def initial_token(self) -> Any | None: # noqa: ANN401 (any-type) """Return the initial value of the token""" @abstractmethod def next_page_token( self, response: requests.Response, last_page_size: int, last_record: Record | None - ) -> Any | None: + ) -> Any | None: # noqa: ANN401 (any-type) """:param response: response to process :param last_page_size: the number of records read from the response :param last_record: the last record extracted from the response @@ -35,7 +35,7 @@ def next_page_token( pass @abstractmethod - def reset(self, reset_value: Any | None = None) -> None: + def reset(self, reset_value: Any | None = None) -> None: # noqa: ANN401 (any-type) """Reset the pagination's inner state""" @abstractmethod diff --git a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py index 66a8c027..dc806454 100644 --- a/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +++ b/airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py @@ -47,7 +47,7 @@ def __init__( def next_page_token( self, response: requests.Response, last_page_size: int, last_record: Record | None - ) -> Any | None: + ) -> Any | None: # noqa: ANN401 (any-type) # We evaluate in reverse order because the assumption is that most of the APIs using data feed structure will return records in # descending order. In terms of performance/memory, we return the records lazily if last_record and self._stop_condition.is_met(last_record): @@ -61,5 +61,5 @@ def get_page_size(self) -> int | None: return self._delegate.get_page_size() @property - def initial_token(self) -> Any | None: + def initial_token(self) -> Any | None: # noqa: ANN401 (any-type) return self._delegate.initial_token diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py index 26157195..01f715c5 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py @@ -46,36 +46,36 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def get_request_params( self, *, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.request_parameter, stream_slice) def get_request_headers( self, *, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.header, stream_slice) def get_request_body_data( self, *, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any] | str: return self._get_request_options(RequestOptionType.body_data, stream_slice) def get_request_body_json( self, *, - stream_state: StreamState | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return self._get_request_options(RequestOptionType.body_json, stream_slice) diff --git a/airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py b/airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py index 96695faa..72e9b519 100644 --- a/airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +++ b/airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py @@ -31,35 +31,35 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None: def get_request_params( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return {} def get_request_headers( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return {} def get_request_body_data( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any] | str: return {} def get_request_body_json( self, *, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: return {} diff --git a/airbyte_cdk/sources/declarative/requesters/requester.py b/airbyte_cdk/sources/declarative/requesters/requester.py index e17c3204..f9426ebc 100644 --- a/airbyte_cdk/sources/declarative/requesters/requester.py +++ b/airbyte_cdk/sources/declarative/requesters/requester.py @@ -110,7 +110,7 @@ def get_request_body_json( """ @abstractmethod - def send_request( + def send_request( # noqa: PLR0913, PLR0917 (too many args) self, stream_state: StreamState | None = None, stream_slice: StreamSlice | None = None, diff --git a/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py index 38a77ec1..01e99983 100644 --- a/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +++ b/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py @@ -116,7 +116,9 @@ def name(self, value: str) -> None: self._name = value def _get_mapping( - self, method: Callable[..., Mapping[str, Any] | str | None], **kwargs: Any + self, + method: Callable[..., Mapping[str, Any] | str | None], + **kwargs: Any, # noqa: ANN401 (any-type) ) -> tuple[Mapping[str, Any] | str, set[str]]: """Get mapping from the provided method, and get the keys of the mapping. If the method returns a string, it will return the string and an empty set. @@ -138,7 +140,7 @@ def _get_request_options( Raise a ValueError if there's a key collision Returned merged mapping otherwise """ - # FIXME we should eventually remove the usage of stream_state as part of the interpolation + # TODO: we should eventually remove the usage of stream_state as part of the interpolation mappings = [ paginator_method( stream_state=stream_state, @@ -173,7 +175,7 @@ def _request_headers( self.stream_slicer.get_request_headers, ) if isinstance(headers, str): - raise ValueError("Request headers cannot be a string") + raise ValueError("Request headers cannot be a string") # noqa: TRY004 (expected TypeError) return {str(k): str(v) for k, v in headers.items()} def _request_params( @@ -194,7 +196,7 @@ def _request_params( self.request_option_provider.get_request_params, ) if isinstance(params, str): - raise ValueError("Request params cannot be a string") + raise ValueError("Request params cannot be a string") # noqa: TRY004 (expected TypeError) return params def _request_body_data( @@ -237,7 +239,7 @@ def _request_body_json( self.request_option_provider.get_request_body_json, ) if isinstance(body_json, str): - raise ValueError("Request body json cannot be a string") + raise ValueError("Request body json cannot be a string") # noqa: TRY004 (expected TypeError) return body_json def _paginator_path( @@ -425,7 +427,7 @@ def read_records( self.cursor.observe(_slice, current_record) # Latest record read, not necessarily within slice boundaries. - # TODO Remove once all custom components implement `observe` method. + # TODO: Remove once all custom components implement `observe` method. # https://github.com/airbytehq/airbyte-internal-issues/issues/6955 most_recent_record_from_slice = self._get_most_recent_record( most_recent_record_from_slice, current_record, _slice @@ -440,7 +442,7 @@ def _get_most_recent_record( self, current_most_recent: Record | None, current_record: Record | None, - stream_slice: StreamSlice, + stream_slice: StreamSlice, # noqa: ARG002 (unused) ) -> Record | None: if self.cursor and current_record: if not current_most_recent: @@ -505,7 +507,7 @@ def must_deduplicate_query_params(self) -> bool: return True @staticmethod - def _to_partition_key(to_serialize: Any) -> str: + def _to_partition_key(to_serialize: Any) -> str: # noqa: ANN401 (any-type) # separators have changed in Python 3.4. To avoid being impacted by further change, we explicitly specify our own value return json.dumps(to_serialize, indent=None, separators=(",", ":"), sort_keys=True) diff --git a/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py b/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py index b101b664..52ef3008 100644 --- a/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py +++ b/airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py @@ -71,7 +71,7 @@ def get_json_schema(self) -> Mapping[str, Any]: self.package_name = resource return self._resolve_schema_references(raw_schema) - def _get_json_filepath(self) -> Any: + def _get_json_filepath(self) -> Any: # noqa: ANN401 (any-type) return self.file_path.eval(self.config) # type: ignore # file_path is always cast to an interpolated string @staticmethod @@ -84,7 +84,7 @@ def extract_resource_and_schema_path(json_schema_path: str) -> tuple[str, str]: """ split_path = json_schema_path.split("/") - if split_path[0] == "" or split_path[0] == ".": + if split_path[0] == "" or split_path[0] == ".": # noqa: PLC1901 (compare to empty string) split_path = split_path[1:] if len(split_path) == 0: diff --git a/airbyte_cdk/sources/declarative/transformations/add_fields.py b/airbyte_cdk/sources/declarative/transformations/add_fields.py index d1e69d2a..ccc6bf6e 100644 --- a/airbyte_cdk/sources/declarative/transformations/add_fields.py +++ b/airbyte_cdk/sources/declarative/transformations/add_fields.py @@ -39,7 +39,7 @@ class ParsedAddFieldDefinition: @dataclass -class AddFields(RecordTransformation): +class AddFields(RecordTransformation): # noqa: PLW1641 # missing __hash__ method """Transformation which adds field to an output record. The path of the added field can be nested. Adding nested fields will create all necessary parent objects (like mkdir -p). Adding fields to an array will extend the array to that index (filling intermediate indices with null values). So if you add a field at index 5 to the array ["value"], it will become ["value", null, null, null, null, diff --git a/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py b/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py index 49c3185e..dc10a7fc 100644 --- a/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py +++ b/airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py @@ -18,9 +18,9 @@ class KeysToLowerTransformation(RecordTransformation): def transform( self, record: dict[str, Any], - config: Config | None = None, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, + config: Config | None = None, # noqa: ARG002 (unused) + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) ) -> None: for key in set(record.keys()): record[key.lower()] = record.pop(key) diff --git a/airbyte_cdk/sources/declarative/transformations/remove_fields.py b/airbyte_cdk/sources/declarative/transformations/remove_fields.py index 67e0ea65..e4902a81 100644 --- a/airbyte_cdk/sources/declarative/transformations/remove_fields.py +++ b/airbyte_cdk/sources/declarative/transformations/remove_fields.py @@ -58,15 +58,15 @@ def transform( self, record: dict[str, Any], config: Config | None = None, - stream_state: StreamState | None = None, - stream_slice: StreamSlice | None = None, + stream_state: StreamState | None = None, # noqa: ARG002 (unused) + stream_slice: StreamSlice | None = None, # noqa: ARG002 (unused) ) -> None: """:param record: The record to be transformed :return: the input record with the requested fields removed """ for pointer in self.field_pointers: # the dpath library by default doesn't delete fields from arrays - try: + try: # noqa: SIM105 (suppressible exception) dpath.delete( record, pointer, diff --git a/airbyte_cdk/sources/declarative/transformations/transformation.py b/airbyte_cdk/sources/declarative/transformations/transformation.py index c8fb7abb..8180059b 100644 --- a/airbyte_cdk/sources/declarative/transformations/transformation.py +++ b/airbyte_cdk/sources/declarative/transformations/transformation.py @@ -13,7 +13,7 @@ @dataclass -class RecordTransformation: +class RecordTransformation: # noqa: PLW1641 # missing __hash__ method """Implementations of this class define transformations that can be applied to records of a stream.""" @abstractmethod diff --git a/airbyte_cdk/sources/declarative/types.py b/airbyte_cdk/sources/declarative/types.py index 57a7f5bb..6b833d2d 100644 --- a/airbyte_cdk/sources/declarative/types.py +++ b/airbyte_cdk/sources/declarative/types.py @@ -1,6 +1,7 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +# ruff: noqa: A005 # Shadows built-in 'declarative' module from __future__ import annotations @@ -18,9 +19,9 @@ # We've migrated connectors in the repository to reference the new location, but these assignments are used to retain backwards # compatibility for sources created by OSS customers or on forks. This can be removed when we start bumping major versions. -FieldPointer = FieldPointer -Config = Config -ConnectionDefinition = ConnectionDefinition -StreamState = StreamState -Record = Record -StreamSlice = StreamSlice +FieldPointer = FieldPointer # noqa: PLW0127 +Config = Config # noqa: PLW0127 +ConnectionDefinition = ConnectionDefinition # noqa: PLW0127 +StreamState = StreamState # noqa: PLW0127 +Record = Record # noqa: PLW0127 +StreamSlice = StreamSlice # noqa: PLW0127 diff --git a/airbyte_cdk/sources/declarative/yaml_declarative_source.py b/airbyte_cdk/sources/declarative/yaml_declarative_source.py index 28659ee7..92defe3b 100644 --- a/airbyte_cdk/sources/declarative/yaml_declarative_source.py +++ b/airbyte_cdk/sources/declarative/yaml_declarative_source.py @@ -26,7 +26,8 @@ class YamlDeclarativeSource(ConcurrentDeclarativeSource[list[AirbyteStateMessage def __init__( self, path_to_yaml: str, - debug: bool = False, + *, + debug: bool = False, # noqa: ARG002 (unused) catalog: ConfiguredAirbyteCatalog | None = None, config: Mapping[str, Any] | None = None, state: list[AirbyteStateMessage] | None = None, diff --git a/airbyte_cdk/sources/embedded/base_integration.py b/airbyte_cdk/sources/embedded/base_integration.py index 929ad96a..ff66e69a 100644 --- a/airbyte_cdk/sources/embedded/base_integration.py +++ b/airbyte_cdk/sources/embedded/base_integration.py @@ -36,7 +36,7 @@ def __init__(self, runner: SourceRunner[TConfig], config: TConfig) -> None: self.last_state: AirbyteStateMessage | None = None @abstractmethod - def _handle_record(self, record: AirbyteRecordMessage, id: str | None) -> TOutput | None: + def _handle_record(self, record: AirbyteRecordMessage, id: str | None) -> TOutput | None: # noqa: A002 (shadowed built-in) """Turn an Airbyte record into the appropriate output type for the integration.""" pass diff --git a/airbyte_cdk/sources/embedded/tools.py b/airbyte_cdk/sources/embedded/tools.py index f3b7a608..e87c4337 100644 --- a/airbyte_cdk/sources/embedded/tools.py +++ b/airbyte_cdk/sources/embedded/tools.py @@ -15,8 +15,9 @@ def get_first( - iterable: Iterable[Any], predicate: Callable[[Any], bool] = lambda m: True -) -> Any | None: + iterable: Iterable[Any], + predicate: Callable[[Any], bool] = lambda m: True, # noqa: ARG005 (unused lambda arg) +) -> Any | None: # noqa: ANN401 (any-type) return next(filter(predicate, iterable), None) diff --git a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py index 4f53ffa4..fc236c73 100644 --- a/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +++ b/airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py @@ -34,7 +34,10 @@ def __init__(self, stream_reader: AbstractFileBasedStreamReader) -> None: self.stream_reader = stream_reader def check_availability( - self, stream: AbstractFileBasedStream, logger: logging.Logger, _: Source | None + self, + stream: AbstractFileBasedStream, + logger: logging.Logger, # noqa: ARG002 (unused) + source: Source | None, # noqa: ARG002 (unused) ) -> tuple[bool, str | None]: # type: ignore[override] """Perform a connection check for the stream (verify that we can list files from the stream). @@ -93,7 +96,9 @@ def _check_list_files(self, stream: AbstractFileBasedStream) -> RemoteFile: try: file = next(iter(stream.get_files())) except StopIteration: - raise CheckAvailabilityError(FileBasedSourceError.EMPTY_STREAM, stream=stream.name) + raise CheckAvailabilityError( + FileBasedSourceError.EMPTY_STREAM, stream=stream.name + ) from None except CustomFileBasedException as exc: raise CheckAvailabilityError(str(exc), stream=stream.name) from exc except Exception as exc: @@ -129,7 +134,7 @@ def _check_parse_record( ) from exc schema = stream.catalog_schema or stream.config.input_schema - if schema and stream.validation_policy.validate_schema_before_sync: + if schema and stream.validation_policy.validate_schema_before_sync: # noqa: SIM102 (collapsible-if) if not conforms_to_schema(record, schema): # type: ignore raise CheckAvailabilityError( FileBasedSourceError.ERROR_VALIDATING_RECORD, diff --git a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py index 0a8589ec..c56748c0 100644 --- a/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +++ b/airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py @@ -74,7 +74,7 @@ def documentation_url(cls) -> AnyUrl: """:return: link to docs page for this source e.g. "https://docs.airbyte.com/integrations/sources/s3" """ @classmethod - def schema(cls, *args: Any, **kwargs: Any) -> dict[str, Any]: + def schema(cls, *args: Any, **kwargs: Any) -> dict[str, Any]: # noqa: ANN401 (any-type) """Generates the mapping comprised of the config fields""" schema = super().schema(*args, **kwargs) transformed_schema: dict[str, Any] = copy.deepcopy(schema) @@ -90,7 +90,7 @@ def remove_discriminator(schema: dict[str, Any]) -> None: dpath.delete(schema, "properties/**/discriminator") @staticmethod - def replace_enum_allOf_and_anyOf(schema: dict[str, Any]) -> dict[str, Any]: + def replace_enum_allOf_and_anyOf(schema: dict[str, Any]) -> dict[str, Any]: # noqa: N802 (violates naming convention) """AllOfs are not supported by the UI, but pydantic is automatically writing them for enums. Unpacks the enums under allOf and moves them up a level under the enum key anyOfs are also not supported by the UI, so we replace them with the similar oneOf, with the @@ -99,7 +99,7 @@ def replace_enum_allOf_and_anyOf(schema: dict[str, Any]) -> dict[str, Any]: objects_to_check = schema["properties"]["streams"]["items"]["properties"]["format"] objects_to_check["type"] = "object" objects_to_check["oneOf"] = objects_to_check.pop("anyOf", []) - for format in objects_to_check["oneOf"]: + for format in objects_to_check["oneOf"]: # noqa: A001 (shadowed built-in) for key in format["properties"]: object_property = format["properties"][key] AbstractFileBasedSpec.move_enum_to_root(object_property) @@ -120,7 +120,7 @@ def replace_enum_allOf_and_anyOf(schema: dict[str, Any]) -> dict[str, Any]: csv_format_schemas = list( filter( - lambda format: format["properties"]["filetype"]["default"] == "csv", + lambda format: format["properties"]["filetype"]["default"] == "csv", # noqa: A006 (lambda arg shadowed) schema["properties"]["streams"]["items"]["properties"]["format"]["oneOf"], ) ) diff --git a/airbyte_cdk/sources/file_based/config/csv_format.py b/airbyte_cdk/sources/file_based/config/csv_format.py index 2f99f499..7a2df3e2 100644 --- a/airbyte_cdk/sources/file_based/config/csv_format.py +++ b/airbyte_cdk/sources/file_based/config/csv_format.py @@ -189,7 +189,7 @@ def validate_encoding(cls, v: str) -> str: try: codecs.lookup(v) except LookupError: - raise ValueError(f"invalid encoding format: {v}") + raise ValueError(f"invalid encoding format: {v}") from None return v @root_validator diff --git a/airbyte_cdk/sources/file_based/exceptions.py b/airbyte_cdk/sources/file_based/exceptions.py index b91d48d1..7d7972f9 100644 --- a/airbyte_cdk/sources/file_based/exceptions.py +++ b/airbyte_cdk/sources/file_based/exceptions.py @@ -42,9 +42,9 @@ class FileBasedSourceError(Enum): class FileBasedErrorsCollector: """The placeholder for all errors collected.""" - errors: list[AirbyteMessage] = [] + errors: list[AirbyteMessage] = [] # noqa: RUF012 (mutable class attribute can leak across instances) - def yield_and_raise_collected(self) -> Any: + def yield_and_raise_collected(self) -> Any: # noqa: ANN401 (any-type) if self.errors: # emit collected logged messages yield from self.errors diff --git a/airbyte_cdk/sources/file_based/file_based_source.py b/airbyte_cdk/sources/file_based/file_based_source.py index d8a968db..a6652a7d 100644 --- a/airbyte_cdk/sources/file_based/file_based_source.py +++ b/airbyte_cdk/sources/file_based/file_based_source.py @@ -81,7 +81,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC): # We make each source override the concurrency level to give control over when they are upgraded. _concurrency_level = None - def __init__( + def __init__( # noqa: PLR0913, PLR0917 (too many args) self, stream_reader: AbstractFileBasedStreamReader, spec_class: type[AbstractFileBasedSpec], @@ -89,7 +89,7 @@ def __init__( config: Mapping[str, Any] | None, state: list[AirbyteStateMessage] | None, availability_strategy: AbstractFileBasedAvailabilityStrategy | None = None, - discovery_policy: AbstractDiscoveryPolicy = DefaultDiscoveryPolicy(), + discovery_policy: AbstractDiscoveryPolicy = DefaultDiscoveryPolicy(), # noqa: B008 parsers: Mapping[type[Any], FileTypeParser] = default_parsers, validation_policies: Mapping[ ValidationPolicy, AbstractSchemaValidationPolicy @@ -153,7 +153,7 @@ def check_connection( message=FileBasedSourceError.CONFIG_VALIDATION_ERROR.value, exception=AirbyteTracedException(exception=config_exception), failure_type=FailureType.config_error, - ) + ) from None if len(streams) == 0: return ( False, @@ -166,7 +166,7 @@ def check_connection( tracebacks = [] for stream in streams: if not isinstance(stream, AbstractFileBasedStream): - raise ValueError(f"Stream {stream} is not a file-based stream.") + raise ValueError(f"Stream {stream} is not a file-based stream.") # noqa: TRY004 (expected TypeError) try: parsed_config = self._get_parsed_config(config) availability_method = ( @@ -210,7 +210,7 @@ def check_connection( def streams(self, config: Mapping[str, Any]) -> list[Stream]: """Return a list of this source's streams.""" - if self.catalog: + if self.catalog: # noqa: SIM108 (consider ternary if) state_manager = ConnectorStateManager(state=self.state) else: # During `check` operations we don't have a catalog so cannot create a state manager. @@ -294,7 +294,7 @@ def streams(self, config: Mapping[str, Any]) -> list[Stream]: ) streams.append(stream) - return streams + return streams # noqa: TRY300 (consider try-else) except ValidationError as exc: raise ConfigValidationError(FileBasedSourceError.CONFIG_VALIDATION_ERROR) from exc @@ -303,6 +303,7 @@ def _make_default_stream( self, stream_config: FileBasedStreamConfig, cursor: AbstractFileBasedCursor | None, + *, use_file_transfer: bool = False, ) -> AbstractFileBasedStream: return DefaultFileBasedStream( @@ -352,7 +353,7 @@ def read( ).items(): yield create_analytics_message(f"file-cdk-{parser}-stream-count", count) - def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification: + def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification: # noqa: ANN401, ARG002 (any-type, unused) """Returns the specification describing what fields can be configured by a user when setting up a file-based source.""" return ConnectorSpecification( documentationUrl=self.spec_class.documentation_url(), diff --git a/airbyte_cdk/sources/file_based/file_based_stream_reader.py b/airbyte_cdk/sources/file_based/file_based_stream_reader.py index 613c53e1..73287c71 100644 --- a/airbyte_cdk/sources/file_based/file_based_stream_reader.py +++ b/airbyte_cdk/sources/file_based/file_based_stream_reader.py @@ -96,7 +96,7 @@ def filter_files_by_globs_and_start_date( seen = set() for file in files: - if self.file_matches_globs(file, globs): + if self.file_matches_globs(file, globs): # noqa: SIM102 (collapsible-if) if file.uri not in seen and (not start_date or file.last_modified >= start_date): seen.add(file.uri) yield file @@ -158,9 +158,9 @@ def get_file( def _get_file_transfer_paths(file: RemoteFile, local_directory: str) -> list[str]: # Remove left slashes from source path format to make relative path for writing locally file_relative_path = file.uri.lstrip("/") - local_file_path = path.join(local_directory, file_relative_path) + local_file_path = path.join(local_directory, file_relative_path) # noqa: PTH118 (prefer pathlib) # Ensure the local directory exists - makedirs(path.dirname(local_file_path), exist_ok=True) - absolute_file_path = path.abspath(local_file_path) + makedirs(path.dirname(local_file_path), exist_ok=True) # noqa: PTH103, PTH120 (prefer pathlib) + absolute_file_path = path.abspath(local_file_path) # noqa: PTH100 (prefer pathlib) return [file_relative_path, local_file_path, absolute_file_path] diff --git a/airbyte_cdk/sources/file_based/file_types/avro_parser.py b/airbyte_cdk/sources/file_based/file_types/avro_parser.py index 50603f39..4864f184 100644 --- a/airbyte_cdk/sources/file_based/file_types/avro_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/avro_parser.py @@ -53,7 +53,7 @@ class AvroParser(FileTypeParser): ENCODING = None - def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: + def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: # noqa: ARG002 (unused) """AvroParser does not require config checks, implicit pydantic validation is enough.""" return True, None @@ -66,7 +66,7 @@ async def infer_schema( ) -> SchemaType: avro_format = config.format if not isinstance(avro_format, AvroFormat): - raise ValueError(f"Expected ParquetFormat, got {avro_format}") + raise ValueError(f"Expected ParquetFormat, got {avro_format}") # noqa: TRY004 (expected TypeError) with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp: avro_reader = fastavro.reader(fp) @@ -84,7 +84,7 @@ async def infer_schema( } @classmethod - def _convert_avro_type_to_json( + def _convert_avro_type_to_json( # noqa: PLR0911, PLR0912 (too many returns, branches) cls, avro_format: AvroFormat, field_name: str, avro_field: str ) -> Mapping[str, Any]: if isinstance(avro_field, str) and avro_field in AVRO_TYPE_TO_JSON_TYPE: @@ -173,11 +173,11 @@ def parse_records( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - discovered_schema: Mapping[str, SchemaType] | None, + discovered_schema: Mapping[str, SchemaType] | None, # noqa: ARG002 (unused) ) -> Iterable[dict[str, Any]]: avro_format = config.format or AvroFormat(filetype="avro") if not isinstance(avro_format, AvroFormat): - raise ValueError(f"Expected ParquetFormat, got {avro_format}") + raise ValueError(f"Expected ParquetFormat, got {avro_format}") # noqa: TRY004 (expected TypeError) line_no = 0 try: @@ -207,9 +207,11 @@ def file_read_mode(self) -> FileReadMode: return FileReadMode.READ_BINARY @staticmethod - def _to_output_value( - avro_format: AvroFormat, record_type: Mapping[str, Any], record_value: Any - ) -> Any: + def _to_output_value( # noqa: PLR0911 (too many returns) + avro_format: AvroFormat, + record_type: Mapping[str, Any], + record_value: Any, # noqa: ANN401 (any-type) + ) -> Any: # noqa: ANN401 (any-type) if isinstance(record_value, bytes): return record_value.decode() if not isinstance(record_type, Mapping): diff --git a/airbyte_cdk/sources/file_based/file_types/csv_parser.py b/airbyte_cdk/sources/file_based/file_types/csv_parser.py index 86fad424..7e888c70 100644 --- a/airbyte_cdk/sources/file_based/file_types/csv_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/csv_parser.py @@ -74,7 +74,7 @@ def read_data( except UnicodeError: raise AirbyteTracedException( message=f"{FileBasedSourceError.ENCODING_ERROR.value} Expected encoding: {config_format.encoding}", - ) + ) from None rows_to_skip = ( config_format.skip_rows_before_header @@ -167,7 +167,7 @@ def __init__( csv.field_size_limit(csv_field_max_bytes) self._csv_reader = csv_reader or _CsvReader() - def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: + def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: # noqa: ARG002 (unused) """CsvParser does not require config checks, implicit pydantic validation is enough.""" return True, None @@ -267,6 +267,7 @@ def _get_cast_function( deduped_property_types: Mapping[str, str], config_format: CsvFormat, logger: logging.Logger, + *, schemaless: bool, ) -> Callable[[Mapping[str, str]], Mapping[str, str]]: # Only cast values if the schema is provided @@ -285,6 +286,7 @@ def _to_nullable( row: Mapping[str, str], deduped_property_types: Mapping[str, str], null_values: set[str], + *, strings_can_be_null: bool, ) -> dict[str, str | None]: return { @@ -298,9 +300,10 @@ def _to_nullable( @staticmethod def _value_is_none( - value: Any, + value: Any, # noqa: ANN401 (any-type) deduped_property_type: str | None, null_values: set[str], + *, strings_can_be_null: bool, ) -> bool: return value in null_values and (strings_can_be_null or deduped_property_type != "string") @@ -358,7 +361,7 @@ def _cast_types( _, python_type = TYPE_PYTHON_MAPPING[prop_type] if python_type is None: - if value == "": + if value == "": # noqa: PLC1901 (compare to empty string) cast_value = None else: warnings.append(_format_warning(key, value, prop_type)) @@ -401,7 +404,7 @@ def _cast_types( class _TypeInferrer(ABC): @abstractmethod - def add_value(self, value: Any) -> None: + def add_value(self, value: Any) -> None: # noqa: ANN401 (any-type) pass @abstractmethod @@ -410,7 +413,7 @@ def infer(self) -> str: class _DisabledTypeInferrer(_TypeInferrer): - def add_value(self, value: Any) -> None: + def add_value(self, value: Any) -> None: # noqa: ANN401 (any-type) pass def infer(self) -> str: @@ -432,7 +435,7 @@ def __init__( self._null_values = null_values self._values: set[str] = set() - def add_value(self, value: Any) -> None: + def add_value(self, value: Any) -> None: # noqa: ANN401 (any-type) self._values.add(value) def infer(self) -> str: @@ -472,7 +475,7 @@ def _infer_type(self, value: str) -> set[str]: def _is_boolean(self, value: str) -> bool: try: _value_to_bool(value, self._boolean_trues, self._boolean_falses) - return True + return True # noqa: TRY300 except ValueError: return False @@ -480,7 +483,7 @@ def _is_boolean(self, value: str) -> bool: def _is_integer(value: str) -> bool: try: _value_to_python_type(value, int) - return True + return True # noqa: TRY300 except ValueError: return False @@ -488,7 +491,7 @@ def _is_integer(value: str) -> bool: def _is_number(value: str) -> bool: try: _value_to_python_type(value, float) - return True + return True # noqa: TRY300 except ValueError: return False @@ -508,11 +511,11 @@ def _value_to_list(value: str) -> list[Any]: raise ValueError(f"Value {parsed_value} is not a valid list value") -def _value_to_python_type(value: str, python_type: type) -> Any: +def _value_to_python_type(value: str, python_type: type) -> Any: # noqa: ANN401 (any-type) return python_type(value) -def _format_warning(key: str, value: str, expected_type: Any | None) -> str: +def _format_warning(key: str, value: str, expected_type: Any | None) -> str: # noqa: ANN401 (any-type) return f"{key}: value={value},expected_type={expected_type}" @@ -523,5 +526,5 @@ def _no_cast(row: Mapping[str, str]) -> Mapping[str, str]: def _extract_format(config: FileBasedStreamConfig) -> CsvFormat: config_format = config.format if not isinstance(config_format, CsvFormat): - raise ValueError(f"Invalid format config: {config_format}") + raise ValueError(f"Invalid format config: {config_format}") # noqa: TRY004 (expected TypeError) return config_format diff --git a/airbyte_cdk/sources/file_based/file_types/excel_parser.py b/airbyte_cdk/sources/file_based/file_types/excel_parser.py index a5407031..5f01d5c4 100644 --- a/airbyte_cdk/sources/file_based/file_types/excel_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/excel_parser.py @@ -41,7 +41,7 @@ class ExcelParser(FileTypeParser): ENCODING = None - def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: + def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: # noqa: ARG002 (unused) """ExcelParser does not require config checks, implicit pydantic validation is enough.""" return True, None @@ -90,7 +90,7 @@ def parse_records( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - discovered_schema: Mapping[str, SchemaType] | None = None, + discovered_schema: Mapping[str, SchemaType] | None = None, # noqa: ARG002 (unused) ) -> Iterable[dict[str, Any]]: """Parses records from an Excel file based on the provided configuration. diff --git a/airbyte_cdk/sources/file_based/file_types/file_transfer.py b/airbyte_cdk/sources/file_based/file_types/file_transfer.py index 9170b234..76cb67e3 100644 --- a/airbyte_cdk/sources/file_based/file_types/file_transfer.py +++ b/airbyte_cdk/sources/file_based/file_types/file_transfer.py @@ -26,13 +26,13 @@ class FileTransfer: def __init__(self) -> None: self._local_directory = ( AIRBYTE_STAGING_DIRECTORY - if os.path.exists(AIRBYTE_STAGING_DIRECTORY) + if os.path.exists(AIRBYTE_STAGING_DIRECTORY) # noqa: PTH110 (prefer pathlib) else DEFAULT_LOCAL_DIRECTORY ) def get_file( self, - config: FileBasedStreamConfig, + config: FileBasedStreamConfig, # noqa: ARG002 (unused) file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, diff --git a/airbyte_cdk/sources/file_based/file_types/file_type_parser.py b/airbyte_cdk/sources/file_based/file_types/file_type_parser.py index 65e7170f..b36dd43e 100644 --- a/airbyte_cdk/sources/file_based/file_types/file_type_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/file_type_parser.py @@ -38,7 +38,7 @@ def parser_max_n_files_for_parsability(self) -> int | None: """The availability policy decides how many files are loaded for checking whether parsing works correctly. This method can provide a parser-specific override. If it's defined, the smaller of the two values will be used.""" return None - def get_parser_defined_primary_key(self, config: FileBasedStreamConfig) -> str | None: + def get_parser_defined_primary_key(self, config: FileBasedStreamConfig) -> str | None: # noqa: ARG002 (unused) """The parser can define a primary key. If no user-defined primary key is provided, this will be used.""" return None diff --git a/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py b/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py index 2b2a2ee0..51f0aec1 100644 --- a/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/jsonl_parser.py @@ -33,13 +33,13 @@ class JsonlParser(FileTypeParser): MAX_BYTES_PER_FILE_FOR_SCHEMA_INFERENCE = 1_000_000 ENCODING = "utf8" - def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: + def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: # noqa: ARG002 (unused) """JsonlParser does not require config checks, implicit pydantic validation is enough.""" return True, None async def infer_schema( self, - config: FileBasedStreamConfig, + config: FileBasedStreamConfig, # noqa: ARG002 (unused) file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, @@ -57,11 +57,11 @@ async def infer_schema( def parse_records( self, - config: FileBasedStreamConfig, + config: FileBasedStreamConfig, # noqa: ARG002 (unused) file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - discovered_schema: Mapping[str, SchemaType] | None, + discovered_schema: Mapping[str, SchemaType] | None, # noqa: ARG002 (unused) ) -> Iterable[dict[str, Any]]: """This code supports parsing json objects over multiple lines even though this does not align with the JSONL format. This is for backward compatibility reasons i.e. the previous source-s3 parser did support this. The drawback is: @@ -94,6 +94,7 @@ def _parse_jsonl_entries( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, + *, read_limit: bool = False, ) -> Iterable[dict[str, Any]]: with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp: diff --git a/airbyte_cdk/sources/file_based/file_types/parquet_parser.py b/airbyte_cdk/sources/file_based/file_types/parquet_parser.py index e9deaff6..4325e822 100644 --- a/airbyte_cdk/sources/file_based/file_types/parquet_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/parquet_parser.py @@ -39,7 +39,7 @@ class ParquetParser(FileTypeParser): ENCODING = None - def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: + def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None]: # noqa: ARG002 (unused) """ParquetParser does not require config checks, implicit pydantic validation is enough.""" return True, None @@ -52,7 +52,7 @@ async def infer_schema( ) -> SchemaType: parquet_format = config.format if not isinstance(parquet_format, ParquetFormat): - raise ValueError(f"Expected ParquetFormat, got {parquet_format}") + raise ValueError(f"Expected ParquetFormat, got {parquet_format}") # noqa: TRY004 (expected TypeError) with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp: parquet_file = pq.ParquetFile(fp) @@ -78,7 +78,7 @@ def parse_records( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - discovered_schema: Mapping[str, SchemaType] | None, + discovered_schema: Mapping[str, SchemaType] | None, # noqa: ARG002 (unused) ) -> Iterable[dict[str, Any]]: parquet_format = config.format if not isinstance(parquet_format, ParquetFormat): @@ -114,7 +114,7 @@ def parse_records( @staticmethod def _extract_partitions(filepath: str) -> list[str]: - return [unquote(partition) for partition in filepath.split(os.sep) if "=" in partition] + return [unquote(partition) for partition in filepath.split(os.sep) if "=" in partition] # noqa: PTH206 @property def file_read_mode(self) -> FileReadMode: @@ -123,14 +123,17 @@ def file_read_mode(self) -> FileReadMode: @staticmethod def _to_output_value( parquet_value: Scalar | DictionaryArray, parquet_format: ParquetFormat - ) -> Any: + ) -> Any: # noqa: ANN401 (any-type) """Convert an entry in a pyarrow table to a value that can be output by the source.""" if isinstance(parquet_value, DictionaryArray): return ParquetParser._dictionary_array_to_python_value(parquet_value) return ParquetParser._scalar_to_python_value(parquet_value, parquet_format) @staticmethod - def _scalar_to_python_value(parquet_value: Scalar, parquet_format: ParquetFormat) -> Any: + def _scalar_to_python_value( # noqa: PLR0911 (too many returns) + parquet_value: Scalar, + parquet_format: ParquetFormat, + ) -> Any: # noqa: ANN401 (any-type) """Convert a pyarrow scalar to a value that can be output by the source.""" if parquet_value.as_py() is None: return None @@ -190,7 +193,7 @@ def _dictionary_array_to_python_value(parquet_value: DictionaryArray) -> dict[st } @staticmethod - def parquet_type_to_schema_type( + def parquet_type_to_schema_type( # noqa: PLR0911 parquet_type: pa.DataType, parquet_format: ParquetFormat ) -> Mapping[str, str]: """Convert a pyarrow data type to an Airbyte schema type. diff --git a/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py b/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py index 5d69aded..d19a1c9a 100644 --- a/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +++ b/airbyte_cdk/sources/file_based/file_types/unstructured_parser.py @@ -58,9 +58,9 @@ def optional_decode(contents: str | bytes) -> str: def _import_unstructured() -> None: """Dynamically imported as needed, due to slow import speed.""" global unstructured_partition_pdf, unstructured_partition_docx, unstructured_partition_pptx - from unstructured.partition.docx import partition_docx - from unstructured.partition.pdf import partition_pdf - from unstructured.partition.pptx import partition_pptx + from unstructured.partition.docx import partition_docx # noqa: PLC0415 (not top-level import) + from unstructured.partition.pdf import partition_pdf # noqa: PLC0415 (not top-level import) + from unstructured.partition.pptx import partition_pptx # noqa: PLC0415 (not top-level import) # separate global variables to properly propagate typing unstructured_partition_pdf = partition_pdf @@ -74,7 +74,7 @@ def user_error(e: Exception) -> bool: return False if not isinstance(e, requests.exceptions.RequestException): return False - return bool(e.response and 400 <= e.response.status_code < 500) + return bool(e.response and 400 <= e.response.status_code < 500) # noqa: PLR2004 (magic number) CLOUD_DEPLOYMENT_MODE = "cloud" @@ -91,7 +91,7 @@ def parser_max_n_files_for_parsability(self) -> int | None: """Do not check any files for parsability because it might be an expensive operation and doesn't give much confidence whether the sync will succeed.""" return 0 - def get_parser_defined_primary_key(self, config: FileBasedStreamConfig) -> str | None: + def get_parser_defined_primary_key(self, config: FileBasedStreamConfig) -> str | None: # noqa: ARG002 (unused) """Return the document_key field as the primary key. his will pre-select the document key column as the primary key when setting up a connection, making it easier for the user to configure normalization in the destination. @@ -105,7 +105,7 @@ async def infer_schema( stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, ) -> SchemaType: - format = _extract_format(config) + format = _extract_format(config) # noqa: A001 (shadows built-in) with stream_reader.open_file(file, self.file_read_mode, None, logger) as file_handle: filetype = self._get_filetype(file_handle, file) @@ -133,9 +133,9 @@ def parse_records( file: RemoteFile, stream_reader: AbstractFileBasedStreamReader, logger: logging.Logger, - discovered_schema: Mapping[str, SchemaType] | None, + discovered_schema: Mapping[str, SchemaType] | None, # noqa: ARG002 (unused) ) -> Iterable[dict[str, Any]]: - format = _extract_format(config) + format = _extract_format(config) # noqa: A001 (shadowed built-in) with stream_reader.open_file(file, self.file_read_mode, None, logger) as file_handle: try: markdown = self._read_file(file_handle, file, format, logger) @@ -166,8 +166,8 @@ def _read_file( self, file_handle: IOBase, remote_file: RemoteFile, - format: UnstructuredFormat, - logger: logging.Logger, + format: UnstructuredFormat, # noqa: A002 (shadowed built-in) + logger: logging.Logger, # noqa: ARG002 (unused) ) -> str: _import_unstructured() if ( @@ -176,7 +176,7 @@ def _read_file( or (not unstructured_partition_pptx) ): # check whether unstructured library is actually available for better error message and to ensure proper typing (can't be None after this point) - raise Exception("unstructured library is not available") + raise Exception("unstructured library is not available") # noqa: TRY002 (vanilla exception) filetype = self._get_filetype(file_handle, remote_file) @@ -264,7 +264,7 @@ def check_config(self, config: FileBasedStreamConfig) -> tuple[bool, str | None] def _read_file_remotely_with_retries( self, file_handle: IOBase, - format: APIProcessingConfigModel, + format: APIProcessingConfigModel, # noqa: A002 (shadowed built-in) filetype: FileType, strategy: str, remote_file: RemoteFile, @@ -275,7 +275,7 @@ def _read_file_remotely_with_retries( def _read_file_remotely( self, file_handle: IOBase, - format: APIProcessingConfigModel, + format: APIProcessingConfigModel, # noqa: A002 (shadowed built-in) filetype: FileType, strategy: str, remote_file: RemoteFile, @@ -290,7 +290,7 @@ def _read_file_remotely( f"{format.api_url}/general/v0/general", headers=headers, data=data, files=file_data ) - if response.status_code == 422: + if response.status_code == 422: # noqa: PLR2004 (magic number) # 422 means the file couldn't be processed, but the API is working. Treat this as a parsing error (passing an error record to the destination). raise self._create_parse_error(remote_file, response.json()) # Other error statuses are raised as requests exceptions (retry everything except user errors) @@ -310,7 +310,7 @@ def _read_file_locally( or (not unstructured_partition_pptx) ): # check whether unstructured library is actually available for better error message and to ensure proper typing (can't be None after this point) - raise Exception("unstructured library is not available") + raise Exception("unstructured library is not available") # noqa: TRY002 (vanilla exception) file: Any = file_handle @@ -331,7 +331,10 @@ def _read_file_locally( elif filetype == FileType.PPTX: elements = unstructured_partition_pptx(file=file) except Exception as e: - raise self._create_parse_error(remote_file, str(e)) + raise self._create_parse_error( + remote_file=remote_file, + message=str(e), + ) from None return self._render_markdown([element.to_dict() for element in elements]) @@ -375,7 +378,7 @@ def _supported_file_types(self) -> list[Any]: return [FileType.MD, FileType.PDF, FileType.DOCX, FileType.PPTX, FileType.TXT] def _get_file_type_error_message(self, file_type: FileType) -> str: - supported_file_types = ", ".join([str(type) for type in self._supported_file_types()]) + supported_file_types = ", ".join([str(type) for type in self._supported_file_types()]) # noqa: A001 (shadowed built-in) return f"File type {file_type} is not supported. Supported file types are {supported_file_types}" def _render_markdown(self, elements: list[Any]) -> str: @@ -399,5 +402,5 @@ def file_read_mode(self) -> FileReadMode: def _extract_format(config: FileBasedStreamConfig) -> UnstructuredFormat: config_format = config.format if not isinstance(config_format, UnstructuredFormat): - raise ValueError(f"Invalid format config: {config_format}") + raise ValueError(f"Invalid format config: {config_format}") # noqa: TRY004 (expected TypeError) return config_format diff --git a/airbyte_cdk/sources/file_based/schema_helpers.py b/airbyte_cdk/sources/file_based/schema_helpers.py index 442a52d7..d311add4 100644 --- a/airbyte_cdk/sources/file_based/schema_helpers.py +++ b/airbyte_cdk/sources/file_based/schema_helpers.py @@ -17,7 +17,7 @@ ) -JsonSchemaSupportedType = Union[list[str], Literal["string"], str] +JsonSchemaSupportedType = Union[list[str], Literal["string"], str] # noqa: PYI051, UP007 (deprecated Union type, redundant Union) SchemaType = Mapping[str, Mapping[str, JsonSchemaSupportedType]] schemaless_schema = {"type": "object", "properties": {"data": {"type": "object"}}} @@ -36,7 +36,7 @@ class ComparableType(Enum): STRING = 4 OBJECT = 5 - def __lt__(self, other: Any) -> bool: + def __lt__(self, other: Any) -> bool: # noqa: ANN401 (any-type) if self.__class__ is other.__class__: return self.value < other.value # type: ignore return NotImplemented @@ -55,7 +55,7 @@ def __lt__(self, other: Any) -> bool: PYTHON_TYPE_MAPPING = {t: k for k, (_, t) in TYPE_PYTHON_MAPPING.items()} -def get_comparable_type(value: Any) -> ComparableType | None: +def get_comparable_type(value: Any) -> ComparableType | None: # noqa: ANN401, PLR0911 (any-type, too many returns) if value == "null": return ComparableType.NULL if value == "boolean": @@ -71,7 +71,7 @@ def get_comparable_type(value: Any) -> ComparableType | None: return None -def get_inferred_type(value: Any) -> ComparableType | None: +def get_inferred_type(value: Any) -> ComparableType | None: # noqa: ANN401, PLR0911 (any-type, too many returns) if value is None: return ComparableType.NULL if isinstance(value, bool): @@ -163,7 +163,7 @@ def _choose_wider_type(key: str, t1: Mapping[str, Any], t2: Mapping[str, Any]) - ) # accessing the type_mapping value -def is_equal_or_narrower_type(value: Any, expected_type: str) -> bool: +def is_equal_or_narrower_type(value: Any, expected_type: str) -> bool: # noqa: ANN401 (any-type) if isinstance(value, list): # We do not compare lists directly; the individual items are compared. # If we hit this condition, it means that the expected type is not @@ -178,7 +178,7 @@ def is_equal_or_narrower_type(value: Any, expected_type: str) -> bool: return ComparableType(inferred_type) <= ComparableType(get_comparable_type(expected_type)) -def conforms_to_schema(record: Mapping[str, Any], schema: Mapping[str, Any]) -> bool: +def conforms_to_schema(record: Mapping[str, Any], schema: Mapping[str, Any]) -> bool: # noqa: PLR0911 (too many returns) """Return true iff the record conforms to the supplied schema. The record conforms to the supplied schema iff: @@ -248,7 +248,7 @@ def type_mapping_to_jsonschema( json_mapping = _parse_json_input(input_schema) or {} for col_name, type_name in json_mapping.items(): - col_name, type_name = col_name.strip(), type_name.strip() + col_name, type_name = col_name.strip(), type_name.strip() # noqa: PLW2901 (redefined loop var) if not (col_name and type_name): raise ConfigValidationError( FileBasedSourceError.ERROR_PARSING_USER_PROVIDED_SCHEMA, diff --git a/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py b/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py index 01890a2d..7ec87e14 100644 --- a/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +++ b/airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py @@ -22,7 +22,9 @@ class EmitRecordPolicy(AbstractSchemaValidationPolicy): name = "emit_record" def record_passes_validation_policy( - self, record: Mapping[str, Any], schema: Mapping[str, Any] | None + self, + record: Mapping[str, Any], # noqa: ARG002 (unused) + schema: Mapping[str, Any] | None, # noqa: ARG002 (unused) ) -> bool: return True diff --git a/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py b/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py index 4038ac77..9eb5818b 100644 --- a/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +++ b/airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py @@ -59,7 +59,7 @@ class AbstractFileBasedStream(Stream): by the stream. """ - def __init__( + def __init__( # noqa: PLR0913, PLR0917 (too many args) self, config: FileBasedStreamConfig, catalog_schema: Mapping[str, Any] | None, @@ -86,7 +86,7 @@ def __init__( @abstractmethod def primary_key(self) -> PrimaryKeyType: ... - @cache + @cache # noqa: B019 (cached class methods can cause memory leaks) def list_files(self) -> list[RemoteFile]: """List all files that belong to the stream. @@ -103,10 +103,10 @@ def get_files(self) -> Iterable[RemoteFile]: def read_records( self, - sync_mode: SyncMode, - cursor_field: list[str] | None = None, + sync_mode: SyncMode, # noqa: ARG002 (unused) + cursor_field: list[str] | None = None, # noqa: ARG002 (unused) stream_slice: StreamSlice | None = None, - stream_state: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Iterable[Mapping[str, Any] | AirbyteMessage]: """Yield all records from all remote files in `list_files_for_this_sync`. This method acts as an adapter between the generic Stream interface and the file-based's @@ -126,9 +126,9 @@ def read_records_from_slice( def stream_slices( self, *, - sync_mode: SyncMode, - cursor_field: list[str] | None = None, - stream_state: Mapping[str, Any] | None = None, + sync_mode: SyncMode, # noqa: ARG002 (unused) + cursor_field: list[str] | None = None, # noqa: ARG002 (unused) + stream_state: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Iterable[Mapping[str, Any] | None]: """This method acts as an adapter between the generic Stream interface and the file-based's stream since file-based streams manage their own states. @@ -143,7 +143,7 @@ def compute_slices(self) -> Iterable[StreamSlice | None]: ... @abstractmethod - @cache + @cache # noqa: B019 (cached class methods can cause memory leaks) def get_json_schema(self) -> Mapping[str, Any]: """Return the JSON Schema for a stream.""" ... @@ -161,7 +161,7 @@ def get_parser(self) -> FileTypeParser: FileBasedSourceError.UNDEFINED_PARSER, stream=self.name, format=type(self.config.format), - ) + ) from None def record_passes_validation_policy(self, record: Mapping[str, Any]) -> bool: if self.validation_policy: diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py b/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py index b458e3ff..d02bdb37 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/adapters.py @@ -74,7 +74,7 @@ def create_from_stream( """Create a ConcurrentStream from a FileBasedStream object.""" pk = get_primary_key_from_stream(stream.primary_key) cursor_field = get_cursor_field_from_stream(stream) - stream._cursor = cursor + stream._cursor = cursor # noqa: SLF001 (private member) if not source.message_repository: raise ValueError( @@ -106,7 +106,7 @@ def create_from_stream( stream, cursor, logger=logger, - slice_logger=source._slice_logger, + slice_logger=source._slice_logger, # noqa: SLF001 (private member) ) def __init__( @@ -146,7 +146,7 @@ def supports_incremental(self) -> bool: def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy: return self._legacy_stream.availability_strategy - @cache + @cache # noqa: B019 (cached class methods can cause memory leaks) def get_json_schema(self) -> Mapping[str, Any]: return self._abstract_stream.get_json_schema() @@ -177,21 +177,21 @@ def get_underlying_stream(self) -> DefaultStream: def read( self, - configured_stream: ConfiguredAirbyteStream, - logger: logging.Logger, - slice_logger: SliceLogger, - stream_state: MutableMapping[str, Any], - state_manager: ConnectorStateManager, - internal_config: InternalConfig, + configured_stream: ConfiguredAirbyteStream, # noqa: ARG002 (unused) + logger: logging.Logger, # noqa: ARG002 (unused) + slice_logger: SliceLogger, # noqa: ARG002 (unused) + stream_state: MutableMapping[str, Any], # noqa: ARG002 (unused) + state_manager: ConnectorStateManager, # noqa: ARG002 (unused) + internal_config: InternalConfig, # noqa: ARG002 (unused) ) -> Iterable[StreamData]: yield from self._read_records() def read_records( self, - sync_mode: SyncMode, - cursor_field: list[str] | None = None, - stream_slice: Mapping[str, Any] | None = None, - stream_state: Mapping[str, Any] | None = None, + sync_mode: SyncMode, # noqa: ARG002 (unused) + cursor_field: list[str] | None = None, # noqa: ARG002 (unused) + stream_slice: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) + stream_state: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Iterable[StreamData]: try: yield from self._read_records() @@ -264,7 +264,7 @@ def read(self) -> Iterable[Record]: else record_data.record.data ) if not record_message_data: - raise ExceptionWithDisplayMessage("A record without data was found") + raise ExceptionWithDisplayMessage("A record without data was found") # noqa: TRY301 (raise within try) else: yield Record( data=record_message_data, @@ -310,7 +310,7 @@ def __hash__(self) -> int: def stream_name(self) -> str: return self._stream.name - @cache + @cache # noqa: B019 (cached class methods can cause memory leaks) def _use_file_transfer(self) -> bool: return hasattr(self._stream, "use_file_transfer") and self._stream.use_file_transfer @@ -342,7 +342,7 @@ def generate(self) -> Iterable[FileBasedStreamPartition]: ): if _slice is not None: for file in _slice.get("files", []): - pending_partitions.append( + pending_partitions.append( # noqa: PERF401 (consider list comprehension) FileBasedStreamPartition( self._stream, {"files": [copy.deepcopy(file)]}, diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py index 271a18df..3c585232 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py @@ -23,7 +23,7 @@ class AbstractConcurrentFileBasedCursor(Cursor, AbstractFileBasedCursor, ABC): - def __init__(self, *args: Any, **kwargs: Any) -> None: + def __init__(self, *args: Any, **kwargs: Any) -> None: # noqa: ANN401 (any-type) pass @property diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py index 1c08fdd5..9b7c7bdd 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py @@ -77,7 +77,7 @@ def state(self) -> MutableMapping[str, Any]: def observe(self, record: Record) -> None: pass - def close_partition(self, partition: Partition) -> None: + def close_partition(self, partition: Partition) -> None: # noqa: ARG002 (unused) with self._pending_files_lock: if self._pending_files is None: raise RuntimeError( @@ -142,7 +142,7 @@ def add_file(self, file: RemoteFile) -> None: raise RuntimeError( "Expected pending partitions to be set but it was not. This is unexpected. Please contact Support." ) - with self._pending_files_lock: + with self._pending_files_lock: # noqa: SIM117 (consider combining `with` blocks) with self._state_lock: if file.uri not in self._pending_files: self._message_repository.emit_message( @@ -165,7 +165,7 @@ def add_file(self, file: RemoteFile) -> None: if oldest_file: del self._file_to_datetime_history[oldest_file.uri] else: - raise Exception( + raise Exception( # noqa: TRY002 (vanilla exception) "The history is full but there is no files in the history. This should never happen and might be indicative of a bug in the CDK." ) self.emit_state_message() @@ -184,7 +184,7 @@ def emit_state_message(self) -> None: self._message_repository.emit_message(state_message) def _get_new_cursor_value(self) -> str: - with self._pending_files_lock: + with self._pending_files_lock: # noqa: SIM117 (consider combining `with` blocks) with self._state_lock: if self._pending_files: # If there are partitions that haven't been synced, we don't know whether the files that have been synced @@ -234,7 +234,7 @@ def get_files_to_sync( if self._should_sync_file(f, logger): yield f - def _should_sync_file(self, file: RemoteFile, logger: logging.Logger) -> bool: + def _should_sync_file(self, file: RemoteFile, logger: logging.Logger) -> bool: # noqa: ARG002 (unused) with self._state_lock: if file.uri in self._file_to_datetime_history: # If the file's uri is in the history, we should sync the file if it has been modified since it was synced diff --git a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py index 18c5ebbb..bbb46745 100644 --- a/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py @@ -34,7 +34,7 @@ def __init__( stream_config: FileBasedStreamConfig, message_repository: MessageRepository, stream_namespace: str | None, - **kwargs: Any, + **kwargs: Any, # noqa: ANN401, ARG002 (any-type, unused) ) -> None: self._stream_name = stream_config.name self._stream_namespace = stream_namespace @@ -61,14 +61,16 @@ def add_file(self, file: RemoteFile) -> None: pass def get_files_to_sync( - self, all_files: Iterable[RemoteFile], logger: logging.Logger + self, + all_files: Iterable[RemoteFile], + logger: logging.Logger, # noqa: ARG002 (unused) ) -> Iterable[RemoteFile]: return all_files def get_state(self) -> MutableMapping[str, Any]: return {} - def set_initial_state(self, value: StreamState) -> None: + def set_initial_state(self, value: StreamState) -> None: # noqa: ARG002 (unused) return None def get_start_time(self) -> datetime: diff --git a/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py b/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py index 14ae7e0d..03ed7e4a 100644 --- a/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py @@ -21,7 +21,7 @@ class AbstractFileBasedCursor(ABC): """Abstract base class for cursors used by file-based streams.""" @abstractmethod - def __init__(self, stream_config: FileBasedStreamConfig, **kwargs: Any) -> None: + def __init__(self, stream_config: FileBasedStreamConfig, **kwargs: Any) -> None: # noqa: ANN401 (any-type) """Common interface for all cursors.""" ... diff --git a/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py b/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py index 903cbf40..341a3d6c 100644 --- a/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +++ b/airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py @@ -27,7 +27,7 @@ class DefaultFileBasedCursor(AbstractFileBasedCursor): DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ" CURSOR_FIELD = "_ab_source_file_last_modified" - def __init__(self, stream_config: FileBasedStreamConfig, **_: Any) -> None: + def __init__(self, stream_config: FileBasedStreamConfig, **_: Any) -> None: # noqa: ANN401 (any-type) super().__init__(stream_config) self._file_to_datetime_history: MutableMapping[str, str] = {} self._time_window_if_history_is_full = timedelta( @@ -58,7 +58,7 @@ def add_file(self, file: RemoteFile) -> None: if oldest_file: del self._file_to_datetime_history[oldest_file.uri] else: - raise Exception( + raise Exception( # noqa: TRY002 (vanilla exception) "The history is full but there is no files in the history. This should never happen and might be indicative of a bug in the CDK." ) @@ -82,7 +82,7 @@ def _is_history_full(self) -> bool: """Returns true if the state's history is full, meaning new entries will start to replace old entries.""" return len(self._file_to_datetime_history) >= self.DEFAULT_MAX_HISTORY_SIZE - def _should_sync_file(self, file: RemoteFile, logger: logging.Logger) -> bool: + def _should_sync_file(self, file: RemoteFile, logger: logging.Logger) -> bool: # noqa: PLR0911 (too many returns) if file.uri in self._file_to_datetime_history: # If the file's uri is in the history, we should sync the file if it has been modified since it was synced updated_at_from_history = datetime.strptime( @@ -133,7 +133,8 @@ def _compute_earliest_file_in_history(self) -> RemoteFile | None: self._file_to_datetime_history.items(), key=operator.itemgetter(1, 0) ) return RemoteFile( - uri=filename, last_modified=datetime.strptime(last_modified, self.DATE_TIME_FORMAT) + uri=filename, + last_modified=datetime.strptime(last_modified, self.DATE_TIME_FORMAT), ) return None diff --git a/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py b/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py index 5a74c9c2..c21cdc49 100644 --- a/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +++ b/airbyte_cdk/sources/file_based/stream/default_file_based_stream.py @@ -8,7 +8,7 @@ import traceback from copy import deepcopy from functools import cache -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING, Any, ClassVar from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, FailureType, Level from airbyte_cdk.models import Type as MessageType @@ -52,10 +52,10 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin): ab_file_name_col = "_ab_source_file_url" modified = "modified" source_file_url = "source_file_url" - airbyte_columns = [ab_last_mod_col, ab_file_name_col] + airbyte_columns: ClassVar[list[str]] = [ab_last_mod_col, ab_file_name_col] use_file_transfer = False - def __init__(self, **kwargs: Any) -> None: + def __init__(self, **kwargs: Any) -> None: # noqa: ANN401 (any-type) if self.FILE_TRANSFER_KW in kwargs: self.use_file_transfer = kwargs.pop(self.FILE_TRANSFER_KW, False) super().__init__(**kwargs) @@ -156,7 +156,7 @@ def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[Airbyte if not self.record_passes_validation_policy(record): n_skipped += 1 continue - record = self.transform_record_for_file_transfer(record, file) + record = self.transform_record_for_file_transfer(record, file) # noqa: PLW2901 (redefined loop var) yield stream_data_to_airbyte_message( self.name, record, is_file_transfer_message=True ) @@ -166,11 +166,11 @@ def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[Airbyte ): line_no += 1 if self.config.schemaless: - record = {"data": record} + record = {"data": record} # noqa: PLW2901 (redefined loop var) elif not self.record_passes_validation_policy(record): n_skipped += 1 continue - record = self.transform_record(record, file, file_datetime_string) + record = self.transform_record(record, file, file_datetime_string) # noqa: PLW2901 (redefined loop var) yield stream_data_to_airbyte_message(self.name, record) self._cursor.add_file(file) @@ -229,7 +229,7 @@ def cursor_field(self) -> str | list[str]: """ return self.ab_last_mod_col - @cache + @cache # noqa: B019 (cached class methods can cause memory leaks) def get_json_schema(self) -> JsonSchema: extra_fields = { self.ab_last_mod_col: {"type": "string"}, diff --git a/airbyte_cdk/sources/file_based/types.py b/airbyte_cdk/sources/file_based/types.py index 11bb5808..460c0010 100644 --- a/airbyte_cdk/sources/file_based/types.py +++ b/airbyte_cdk/sources/file_based/types.py @@ -1,6 +1,7 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +# ruff: noqa: A005 # Shadows built-in 'types' module from __future__ import annotations diff --git a/airbyte_cdk/sources/source.py b/airbyte_cdk/sources/source.py index b5792f6c..9532f576 100644 --- a/airbyte_cdk/sources/source.py +++ b/airbyte_cdk/sources/source.py @@ -70,7 +70,7 @@ def read_state(cls, state_path: str) -> list[AirbyteStateMessage]: """ parsed_state_messages = [] if state_path: - state_obj = BaseConnector._read_json_file(state_path) + state_obj = BaseConnector._read_json_file(state_path) # noqa: SLF001 (private member) if state_obj: for state in state_obj: # type: ignore # `isinstance(state_obj, List)` ensures that this is a list parsed_message = AirbyteStateMessageSerializer.load(state) diff --git a/airbyte_cdk/sources/streams/call_rate.py b/airbyte_cdk/sources/streams/call_rate.py index 95b5e148..d8e609be 100644 --- a/airbyte_cdk/sources/streams/call_rate.py +++ b/airbyte_cdk/sources/streams/call_rate.py @@ -43,7 +43,7 @@ class CallRateLimitHit(Exception): def __init__( self, error: str, - item: Any, + item: Any, # noqa: ANN401 (any-type) weight: int, rate: str, time_to_wait: timedelta, @@ -69,7 +69,7 @@ class AbstractCallRatePolicy(abc.ABC): """ @abc.abstractmethod - def matches(self, request: Any) -> bool: + def matches(self, request: Any) -> bool: # noqa: ANN401 (any-type) """Tells if this policy matches specific request and should apply to it :param request: @@ -77,7 +77,7 @@ def matches(self, request: Any) -> bool: """ @abc.abstractmethod - def try_acquire(self, request: Any, weight: int) -> None: + def try_acquire(self, request: Any, weight: int) -> None: # noqa: ANN401 (any-type) """Try to acquire request :param request: a request object representing a single call to API @@ -98,7 +98,7 @@ class RequestMatcher(abc.ABC): """Callable that help to match a request object with call rate policies.""" @abc.abstractmethod - def __call__(self, request: Any) -> bool: + def __call__(self, request: Any) -> bool: # noqa: ANN401 (any-type) """:param request: :return: True if matches the provided request object, False - otherwise """ @@ -136,7 +136,7 @@ def _match_dict(obj: Mapping[str, Any], pattern: Mapping[str, Any]) -> bool: """ return pattern.items() <= obj.items() - def __call__(self, request: Any) -> bool: + def __call__(self, request: Any) -> bool: # noqa: ANN401 (any-type) """:param request: :return: True if matches the provided request object, False - otherwise """ @@ -158,7 +158,7 @@ def __call__(self, request: Any) -> bool: params = dict(parse.parse_qsl(str(parsed_url.query))) if not self._match_dict(params, self._params): return False - if self._headers is not None: + if self._headers is not None: # noqa: SIM102 (collapsible-if) if not self._match_dict(prepared_request.headers, self._headers): return False return True @@ -168,7 +168,7 @@ class BaseCallRatePolicy(AbstractCallRatePolicy, abc.ABC): def __init__(self, matchers: list[RequestMatcher]) -> None: self._matchers = matchers - def matches(self, request: Any) -> bool: + def matches(self, request: Any) -> bool: # noqa: ANN401 (any-type) """Tell if this policy matches specific request and should apply to it :param request: @@ -201,7 +201,7 @@ class UnlimitedCallRatePolicy(BaseCallRatePolicy): The code above will limit all calls to /some/method except calls that have header sandbox=True """ - def try_acquire(self, request: Any, weight: int) -> None: + def try_acquire(self, request: Any, weight: int) -> None: # noqa: ANN401 (any-type) """Do nothing""" def update(self, available_calls: int | None, call_reset_ts: datetime.datetime | None) -> None: @@ -230,7 +230,7 @@ def __init__( self._lock = RLock() super().__init__(matchers=matchers) - def try_acquire(self, request: Any, weight: int) -> None: + def try_acquire(self, request: Any, weight: int) -> None: # noqa: ANN401 (any-type) if weight > self._call_limit: raise ValueError("Weight can not exceed the call limit") if not self.matches(request): @@ -314,7 +314,7 @@ def __init__(self, rates: list[Rate], matchers: list[RequestMatcher]) -> None: self._limiter = Limiter(self._bucket) super().__init__(matchers=matchers) - def try_acquire(self, request: Any, weight: int) -> None: + def try_acquire(self, request: Any, weight: int) -> None: # noqa: ANN401 (any-type) if not self.matches(request): raise ValueError("Request does not match the policy") @@ -334,7 +334,7 @@ def try_acquire(self, request: Any, weight: int) -> None: weight=int(exc.meta_info["weight"]), rate=str(exc.meta_info["rate"]), time_to_wait=timedelta(milliseconds=time_to_wait), - ) + ) from None def update(self, available_calls: int | None, call_reset_ts: datetime.datetime | None) -> None: """Adjust call bucket to reflect the state of the API server @@ -343,7 +343,7 @@ def update(self, available_calls: int | None, call_reset_ts: datetime.datetime | :param call_reset_ts: :return: """ - if ( + if ( # noqa: SIM102 (collapsible-if) available_calls is not None and call_reset_ts is None ): # we do our best to sync buckets with API if available_calls == 0: @@ -368,7 +368,13 @@ class AbstractAPIBudget(abc.ABC): """ @abc.abstractmethod - def acquire_call(self, request: Any, block: bool = True, timeout: float | None = None) -> None: + def acquire_call( + self, + request: Any, # noqa: ANN401 (any-type) + *, + block: bool = True, + timeout: float | None = None, + ) -> None: """Try to get a call from budget, will block by default :param request: @@ -378,11 +384,11 @@ def acquire_call(self, request: Any, block: bool = True, timeout: float | None = """ @abc.abstractmethod - def get_matching_policy(self, request: Any) -> AbstractCallRatePolicy | None: + def get_matching_policy(self, request: Any) -> AbstractCallRatePolicy | None: # noqa: ANN401 (any-type) """Find matching call rate policy for specific request""" @abc.abstractmethod - def update_from_response(self, request: Any, response: Any) -> None: + def update_from_response(self, request: Any, response: Any) -> None: # noqa: ANN401 (any-type) """Update budget information based on response from API :param request: the initial request that triggered this response @@ -405,13 +411,19 @@ def __init__( self._policies = policies self._maximum_attempts_to_acquire = maximum_attempts_to_acquire - def get_matching_policy(self, request: Any) -> AbstractCallRatePolicy | None: + def get_matching_policy(self, request: Any) -> AbstractCallRatePolicy | None: # noqa: ANN401 (any-type) for policy in self._policies: if policy.matches(request): return policy return None - def acquire_call(self, request: Any, block: bool = True, timeout: float | None = None) -> None: + def acquire_call( + self, + request: Any, # noqa: ANN401 (any-type) + *, + block: bool = True, + timeout: float | None = None, + ) -> None: """Try to get a call from budget, will block by default. Matchers will be called sequentially in the same order they were added. The first matcher that returns True will @@ -427,7 +439,7 @@ def acquire_call(self, request: Any, block: bool = True, timeout: float | None = elif self._policies: logger.info("no policies matched with requests, allow call by default") - def update_from_response(self, request: Any, response: Any) -> None: + def update_from_response(self, request: Any, response: Any) -> None: # noqa: ANN401 (any-type) """Update budget information based on response from API :param request: the initial request that triggered this response @@ -436,7 +448,12 @@ def update_from_response(self, request: Any, response: Any) -> None: pass def _do_acquire( - self, request: Any, policy: AbstractCallRatePolicy, block: bool, timeout: float | None + self, + request: Any, # noqa: ANN401 (any-type) + policy: AbstractCallRatePolicy, + *, + block: bool, + timeout: float | None, ) -> None: """Internal method to try to acquire a call credit @@ -450,7 +467,7 @@ def _do_acquire( for _attempt in range(1, self._maximum_attempts_to_acquire): try: policy.try_acquire(request, weight=1) - return + return # noqa: TRY300 except CallRateLimitHit as exc: last_exception = exc if block: @@ -484,7 +501,7 @@ def __init__( ratelimit_reset_header: str = "ratelimit-reset", ratelimit_remaining_header: str = "ratelimit-remaining", status_codes_for_ratelimit_hit: tuple[int] = (429,), - **kwargs: Any, + **kwargs: Any, # noqa: ANN401 (any-type) ) -> None: """Constructor @@ -497,7 +514,7 @@ def __init__( self._status_codes_for_ratelimit_hit = status_codes_for_ratelimit_hit super().__init__(**kwargs) - def update_from_response(self, request: Any, response: Any) -> None: + def update_from_response(self, request: Any, response: Any) -> None: # noqa: ANN401 (any-type) policy = self.get_matching_policy(request) if not policy: return @@ -509,7 +526,7 @@ def update_from_response(self, request: Any, response: Any) -> None: def get_reset_ts_from_response(self, response: requests.Response) -> datetime.datetime | None: if response.headers.get(self._ratelimit_reset_header): - return datetime.datetime.fromtimestamp( + return datetime.datetime.fromtimestamp( # noqa: DTZ006 int(response.headers[self._ratelimit_reset_header]) ) return None @@ -530,12 +547,16 @@ class LimiterMixin(MIXIN_BASE): def __init__( self, api_budget: AbstractAPIBudget, - **kwargs: Any, + **kwargs: Any, # noqa: ANN401 (any-type) ) -> None: self._api_budget = api_budget super().__init__(**kwargs) # type: ignore # Base Session doesn't take any kwargs - def send(self, request: requests.PreparedRequest, **kwargs: Any) -> requests.Response: + def send( + self, + request: requests.PreparedRequest, + **kwargs: Any, # noqa: ANN401 (any-type) + ) -> requests.Response: """Send a request with rate-limiting.""" self._api_budget.acquire_call(request) response = super().send(request, **kwargs) diff --git a/airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py b/airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py index 04f77c14..31a3fcde 100644 --- a/airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +++ b/airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py @@ -62,7 +62,7 @@ def next(self) -> Mapping[str, Any] | None: try: next_slice = next(self._stream_slices) self._has_slices = True - return next_slice + return next_slice # noqa: TRY300 (consider try-else) except StopIteration: # This is used to avoid sending a duplicate state message at the end of a sync since the stream has already # emitted state at the end of each slice. If we want to avoid this extra complexity, we can also just accept @@ -90,6 +90,7 @@ def __init__( self, cursor: Cursor, stream_slices: Iterable[Mapping[str, Any] | None], + *, read_state_from_cursor: bool = False, ) -> None: self._cursor = cursor @@ -104,7 +105,7 @@ def __init__( def next(self) -> Mapping[str, Any] | None: try: self.current_slice = self._find_next_slice() - return self.current_slice + return self.current_slice # noqa: TRY300 (consider try-else) except StopIteration: self._finished_sync = True return None @@ -196,7 +197,7 @@ def current_slice(self, value: StreamSlice) -> None: def read_and_convert_slice(self) -> StreamSlice: next_slice = next(self._stream_slices) if not isinstance(next_slice, StreamSlice): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"{self.current_slice} should be of type StreamSlice. This is likely a bug in the CDK, please contact Airbyte support" ) return next_slice @@ -226,6 +227,7 @@ def __init__( self, cursor: Cursor, stream_slices: Iterable[Mapping[str, Any] | None], + *, read_state_from_cursor: bool = False, ) -> None: super().__init__( @@ -259,7 +261,7 @@ def next(self) -> Mapping[str, Any] | None: def read_and_convert_slice(self) -> StreamSlice: next_mapping_slice = next(self._stream_slices) if not isinstance(next_mapping_slice, Mapping): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"{self.current_slice} should be of type Mapping. This is likely a bug in the CDK, please contact Airbyte support" ) diff --git a/airbyte_cdk/sources/streams/checkpoint/cursor.py b/airbyte_cdk/sources/streams/checkpoint/cursor.py index 542f035d..65da38c7 100644 --- a/airbyte_cdk/sources/streams/checkpoint/cursor.py +++ b/airbyte_cdk/sources/streams/checkpoint/cursor.py @@ -24,7 +24,7 @@ def set_initial_state(self, stream_state: StreamState) -> None: :param stream_state: The state of the stream as returned by get_stream_state """ - def observe(self, stream_slice: StreamSlice, record: Record) -> None: + def observe(self, stream_slice: StreamSlice, record: Record) -> None: # noqa: B027 (intentionally empty, not abstract) """Register a record with the cursor; the cursor instance can then use it to manage the state of the in-progress stream read. :param stream_slice: The current slice, which may or may not contain the most recently observed record @@ -34,7 +34,7 @@ def observe(self, stream_slice: StreamSlice, record: Record) -> None: pass @abstractmethod - def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: + def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: # noqa: ANN401 (any-type) """Update state based on the stream slice. Note that `stream_slice.cursor_slice` and `most_recent_record.associated_slice` are expected to be the same but we make it explicit here that `stream_slice` should be leveraged to update the state. We do not pass in the latest record, since cursor instances should maintain the relevant internal state on their own. diff --git a/airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py b/airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py index a01dbce8..723104f7 100644 --- a/airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py +++ b/airbyte_cdk/sources/streams/checkpoint/per_partition_key_serializer.py @@ -17,10 +17,10 @@ class PerPartitionKeySerializer: """ @staticmethod - def to_partition_key(to_serialize: Any) -> str: + def to_partition_key(to_serialize: Any) -> str: # noqa: ANN401 (any-type) # separators have changed in Python 3.4. To avoid being impacted by further change, we explicitly specify our own value return json.dumps(to_serialize, indent=None, separators=(",", ":"), sort_keys=True) @staticmethod - def to_partition(to_deserialize: Any) -> Mapping[str, Any]: + def to_partition(to_deserialize: Any) -> Mapping[str, Any]: # noqa: ANN401 (any-type) return json.loads(to_deserialize) # type: ignore # The partition is known to be a dict, but the type hint is Any diff --git a/airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py b/airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py index c4473336..e50f6360 100644 --- a/airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py +++ b/airbyte_cdk/sources/streams/checkpoint/resumable_full_refresh_cursor.py @@ -31,19 +31,19 @@ def observe(self, stream_slice: StreamSlice, record: Record) -> None: """Resumable full refresh manages state using a page number so it does not need to update state by observing incoming records.""" pass - def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: + def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: # noqa: ANN401, ARG002 (any-type, unused) self._cursor = stream_slice.cursor_slice - def should_be_synced(self, record: Record) -> bool: + def should_be_synced(self, record: Record) -> bool: # noqa: ARG002 (unused) """Unlike date-based cursors which filter out records outside slice boundaries, resumable full refresh records exist within pages that don't have filterable bounds. We should always return them. """ return True - def is_greater_than_or_equal(self, first: Record, second: Record) -> bool: + def is_greater_than_or_equal(self, first: Record, second: Record) -> bool: # noqa: ARG002 (unused) """RFR record don't have ordering to be compared between one another.""" return False - def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: + def select_state(self, stream_slice: StreamSlice | None = None) -> StreamState | None: # noqa: ARG002 (unused) # A top-level RFR cursor only manages the state of a single partition return self._cursor diff --git a/airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py b/airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py index d780ab04..eded0a1a 100644 --- a/airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +++ b/airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py @@ -80,19 +80,19 @@ def observe(self, stream_slice: StreamSlice, record: Record) -> None: """Substream resumable full refresh manages state by closing the slice after syncing a parent so observe is not used.""" pass - def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: + def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None: # noqa: ANN401, ARG002 (any-type, unused) self._per_partition_state[self._to_partition_key(stream_slice.partition)] = { "partition": stream_slice.partition, "cursor": FULL_REFRESH_COMPLETE_STATE, } - def should_be_synced(self, record: Record) -> bool: + def should_be_synced(self, record: Record) -> bool: # noqa: ARG002 (unused) """Unlike date-based cursors which filter out records outside slice boundaries, resumable full refresh records exist within pages that don't have filterable bounds. We should always return them. """ return True - def is_greater_than_or_equal(self, first: Record, second: Record) -> bool: + def is_greater_than_or_equal(self, first: Record, second: Record) -> bool: # noqa: ARG002 (unused) """RFR record don't have ordering to be compared between one another.""" return False diff --git a/airbyte_cdk/sources/streams/concurrent/adapters.py b/airbyte_cdk/sources/streams/concurrent/adapters.py index 23ec6aaf..3d80cb1b 100644 --- a/airbyte_cdk/sources/streams/concurrent/adapters.py +++ b/airbyte_cdk/sources/streams/concurrent/adapters.py @@ -115,7 +115,7 @@ def create_from_stream( ), stream, cursor, - slice_logger=source._slice_logger, + slice_logger=source._slice_logger, # noqa: SLF001 (private member) logger=logger, ) @@ -147,21 +147,21 @@ def __init__( def read( self, - configured_stream: ConfiguredAirbyteStream, - logger: logging.Logger, - slice_logger: SliceLogger, - stream_state: MutableMapping[str, Any], - state_manager: ConnectorStateManager, - internal_config: InternalConfig, + configured_stream: ConfiguredAirbyteStream, # noqa: ARG002 (unused) + logger: logging.Logger, # noqa: ARG002 (unused) + slice_logger: SliceLogger, # noqa: ARG002 (unused) + stream_state: MutableMapping[str, Any], # noqa: ARG002 (unused) + state_manager: ConnectorStateManager, # noqa: ARG002 (unused) + internal_config: InternalConfig, # noqa: ARG002 (unused) ) -> Iterable[StreamData]: yield from self._read_records() def read_records( self, - sync_mode: SyncMode, - cursor_field: list[str] | None = None, - stream_slice: Mapping[str, Any] | None = None, - stream_state: Mapping[str, Any] | None = None, + sync_mode: SyncMode, # noqa: ARG002 (unused) + cursor_field: list[str] | None = None, # noqa: ARG002 (unused) + stream_slice: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) + stream_state: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Iterable[StreamData]: try: yield from self._read_records() @@ -205,7 +205,7 @@ def cursor_field(self) -> str | list[str]: def cursor(self) -> Cursor | None: # type: ignore[override] # StreamFaced expects to use only airbyte_cdk.sources.streams.concurrent.cursor.Cursor return self._cursor - @cache + @cache # noqa: B019 (cached class methods can cause memory leaks) def get_json_schema(self) -> Mapping[str, Any]: return self._abstract_stream.get_json_schema() @@ -214,7 +214,9 @@ def supports_incremental(self) -> bool: return self._legacy_stream.supports_incremental def check_availability( - self, logger: logging.Logger, source: Source | None = None + self, + logger: logging.Logger, # noqa: ARG002 (unused) + source: Source | None = None, # noqa: ARG002 (unused) ) -> tuple[bool, str | None]: """Verifies the stream is available. Delegates to the underlying AbstractStream and ignores the parameters :param logger: (ignored) @@ -235,7 +237,7 @@ def get_underlying_stream(self) -> DefaultStream: class SliceEncoder(json.JSONEncoder): - def default(self, obj: Any) -> Any: + def default(self, obj: Any) -> Any: # noqa: ANN401 (any-type) if hasattr(obj, "__json_serializable__"): return obj.__json_serializable__() @@ -460,9 +462,9 @@ def __init__( def check_availability( self, - stream: Stream, + stream: Stream, # noqa: ARG002 (unused) logger: logging.Logger, - source: Source | None = None, + source: Source | None = None, # noqa: ARG002 (unused) ) -> tuple[bool, str | None]: """Checks stream availability. diff --git a/airbyte_cdk/sources/streams/concurrent/availability_strategy.py b/airbyte_cdk/sources/streams/concurrent/availability_strategy.py index 24adb816..cb317216 100644 --- a/airbyte_cdk/sources/streams/concurrent/availability_strategy.py +++ b/airbyte_cdk/sources/streams/concurrent/availability_strategy.py @@ -75,7 +75,7 @@ class AlwaysAvailableAvailabilityStrategy(AbstractAvailabilityStrategy): without disrupting existing functionality. """ - def check_availability(self, logger: logging.Logger) -> StreamAvailability: + def check_availability(self, logger: logging.Logger) -> StreamAvailability: # noqa: ARG002 (unused) """Checks stream availability. :param logger: logger object to use diff --git a/airbyte_cdk/sources/streams/concurrent/cursor.py b/airbyte_cdk/sources/streams/concurrent/cursor.py index 812e1d2c..b33be361 100644 --- a/airbyte_cdk/sources/streams/concurrent/cursor.py +++ b/airbyte_cdk/sources/streams/concurrent/cursor.py @@ -22,7 +22,7 @@ ) -def _extract_value(mapping: Mapping[str, Any], path: list[str]) -> Any: +def _extract_value(mapping: Mapping[str, Any], path: list[str]) -> Any: # noqa: ANN401 (any-type) return functools.reduce(lambda a, b: a[b], path, mapping) @@ -138,11 +138,11 @@ class ConcurrentCursor(Cursor): _START_BOUNDARY = 0 _END_BOUNDARY = 1 - def __init__( + def __init__( # noqa: PLR0913, PLR0917 (too-many-args) self, stream_name: str, stream_namespace: str | None, - stream_state: Any, + stream_state: Any, # noqa: ANN401 (any-type) message_repository: MessageRepository, connector_state_manager: ConnectorStateManager, connector_state_converter: AbstractStreamStateConverter, @@ -204,7 +204,7 @@ def observe(self, record: Record) -> None: if most_recent_cursor_value is None or most_recent_cursor_value < cursor_value: self._most_recent_cursor_value_per_partition[record.partition] = cursor_value - def _extract_cursor_value(self, record: Record) -> Any: + def _extract_cursor_value(self, record: Record) -> Any: # noqa: ANN401 (any-type) return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record)) def close_partition(self, partition: Partition) -> None: @@ -281,7 +281,7 @@ def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType try: _slice = partition.to_slice() if not _slice: - raise KeyError(f"Could not find key `{key}` in empty slice") + raise KeyError(f"Could not find key `{key}` in empty slice") # noqa: TRY301 (raise within try) return self._connector_state_converter.parse_value(_slice[key]) # type: ignore # we expect the devs to specify a key that would return a CursorValueType except KeyError as exception: raise KeyError( @@ -311,7 +311,7 @@ def generate_slices(self) -> Iterable[tuple[CursorValueType, CursorValueType]]: yield from self._split_per_slice_range( self._start, self.state["slices"][0][self._connector_state_converter.START_KEY], - False, + upper_is_end=False, ) if len(self.state["slices"]) == 1: @@ -320,7 +320,7 @@ def generate_slices(self) -> Iterable[tuple[CursorValueType, CursorValueType]]: self.state["slices"][0][self._connector_state_converter.END_KEY] ), self._end_provider(), - True, + upper_is_end=True, ) elif len(self.state["slices"]) > 1: for i in range(len(self.state["slices"]) - 1): @@ -329,20 +329,20 @@ def generate_slices(self) -> Iterable[tuple[CursorValueType, CursorValueType]]: self.state["slices"][i][self._connector_state_converter.END_KEY] + self._cursor_granularity, self.state["slices"][i + 1][self._connector_state_converter.START_KEY], - False, + upper_is_end=False, ) else: yield from self._split_per_slice_range( self.state["slices"][i][self._connector_state_converter.END_KEY], self.state["slices"][i + 1][self._connector_state_converter.START_KEY], - False, + upper_is_end=False, ) yield from self._split_per_slice_range( self._calculate_lower_boundary_of_last_slice( self.state["slices"][-1][self._connector_state_converter.END_KEY] ), self._end_provider(), - True, + upper_is_end=True, ) else: raise ValueError("Expected at least one slice") @@ -361,7 +361,11 @@ def _calculate_lower_boundary_of_last_slice( return lower_boundary def _split_per_slice_range( - self, lower: CursorValueType, upper: CursorValueType, upper_is_end: bool + self, + lower: CursorValueType, + upper: CursorValueType, + *, + upper_is_end: bool, ) -> Iterable[tuple[CursorValueType, CursorValueType]]: if lower >= upper: return diff --git a/airbyte_cdk/sources/streams/concurrent/default_stream.py b/airbyte_cdk/sources/streams/concurrent/default_stream.py index bf8a8310..c3de3b14 100644 --- a/airbyte_cdk/sources/streams/concurrent/default_stream.py +++ b/airbyte_cdk/sources/streams/concurrent/default_stream.py @@ -26,7 +26,7 @@ class DefaultStream(AbstractStream): - def __init__( + def __init__( # noqa: PLR0913, PLR0917 (too complex) self, partition_generator: PartitionGenerator, name: str, @@ -66,7 +66,7 @@ def check_availability(self) -> StreamAvailability: def cursor_field(self) -> str | None: return self._cursor_field - @cache + @cache # noqa: B019 (cached class methods can cause memory leaks) def get_json_schema(self) -> Mapping[str, Any]: return self._json_schema diff --git a/airbyte_cdk/sources/streams/concurrent/exceptions.py b/airbyte_cdk/sources/streams/concurrent/exceptions.py index 96c380fd..62d9008d 100644 --- a/airbyte_cdk/sources/streams/concurrent/exceptions.py +++ b/airbyte_cdk/sources/streams/concurrent/exceptions.py @@ -12,7 +12,7 @@ class ExceptionWithDisplayMessage(Exception): def __init__( self, display_message: str, - **kwargs: Any, + **kwargs: Any, # noqa: ANN401 (any-type) ) -> None: super().__init__(**kwargs) self.display_message = display_message diff --git a/airbyte_cdk/sources/streams/concurrent/partitions/record.py b/airbyte_cdk/sources/streams/concurrent/partitions/record.py index 4a995983..29e6dee3 100644 --- a/airbyte_cdk/sources/streams/concurrent/partitions/record.py +++ b/airbyte_cdk/sources/streams/concurrent/partitions/record.py @@ -12,7 +12,7 @@ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition -class Record: +class Record: # noqa: PLW1641 # missing __hash__ method """Represents a record read from a stream.""" def __init__( diff --git a/airbyte_cdk/sources/streams/concurrent/partitions/types.py b/airbyte_cdk/sources/streams/concurrent/partitions/types.py index f3866579..7aab43be 100644 --- a/airbyte_cdk/sources/streams/concurrent/partitions/types.py +++ b/airbyte_cdk/sources/streams/concurrent/partitions/types.py @@ -1,6 +1,8 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +# ruff: noqa: A005 # Shadows built-in 'types' module + from __future__ import annotations from typing import Union @@ -12,7 +14,7 @@ from airbyte_cdk.sources.streams.concurrent.partitions.record import Record -class PartitionCompleteSentinel: +class PartitionCompleteSentinel: # noqa: PLW1641 # missing __hash__ method """A sentinel object indicating all records for a partition were produced. Includes a pointer to the partition that was processed. """ @@ -36,6 +38,6 @@ def __eq__(self, other: object) -> bool: """ Typedef representing the items that can be added to the ThreadBasedConcurrentStream """ -QueueItem = Union[ +QueueItem = Union[ # noqa: UP007 (deprecated Union type) Record, Partition, PartitionCompleteSentinel, PartitionGenerationCompletedSentinel, Exception ] diff --git a/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py b/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py index 721873c9..030e4ae4 100644 --- a/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +++ b/airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py @@ -25,11 +25,11 @@ class AbstractStreamStateConverter(ABC): MOST_RECENT_RECORD_KEY = "most_recent_cursor_value" @abstractmethod - def _from_state_message(self, value: Any) -> Any: + def _from_state_message(self, value: Any) -> Any: # noqa: ANN401 (any-type) pass @abstractmethod - def _to_state_message(self, value: Any) -> Any: + def _to_state_message(self, value: Any) -> Any: # noqa: ANN401 (any-type) pass def __init__( @@ -57,7 +57,7 @@ def convert_to_state_message( return legacy_state or {} return self.serialize(stream_state, ConcurrencyCompatibleStateType.date_range) - def _get_latest_complete_time(self, slices: list[MutableMapping[str, Any]]) -> Any: + def _get_latest_complete_time(self, slices: list[MutableMapping[str, Any]]) -> Any: # noqa: ANN401 (any-type) """Get the latest time before which all records have been processed.""" if not slices: raise RuntimeError( @@ -103,7 +103,7 @@ def convert_from_sequential_state( self, cursor_field: CursorField, # to deprecate as it is only needed for sequential state stream_state: MutableMapping[str, Any], - start: Any | None, + start: Any | None, # noqa: ANN401 (any-type) ) -> tuple[Any, MutableMapping[str, Any]]: """Convert the state message to the format required by the ConcurrentCursor. @@ -118,7 +118,7 @@ def convert_from_sequential_state( ... @abstractmethod - def increment(self, value: Any) -> Any: + def increment(self, value: Any) -> Any: # noqa: ANN401 (any-type) """Increment a timestamp by a single unit.""" ... @@ -164,10 +164,10 @@ def merge_intervals( return merged_intervals @abstractmethod - def parse_value(self, value: Any) -> Any: + def parse_value(self, value: Any) -> Any: # noqa: ANN401 (any-type) """Parse the value of the cursor field into a comparable value.""" ... @property @abstractmethod - def zero_value(self) -> Any: ... + def zero_value(self) -> Any: ... # noqa: ANN401 (any-type) diff --git a/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py b/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py index 4a878fb5..e06beaff 100644 --- a/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +++ b/airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py @@ -10,7 +10,7 @@ import pendulum from pendulum.datetime import DateTime -# FIXME We would eventually like the Concurrent package do be agnostic of the declarative package. However, this is a breaking change and +# FIXME We would eventually like the Concurrent package do be agnostic of the declarative package. However, this is a breaking change and # noqa: FIX001, TD001, TD004 # the goal in the short term is only to fix the issue we are seeing for source-declarative-manifest. from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import ( @@ -26,15 +26,15 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter): - def _from_state_message(self, value: Any) -> Any: + def _from_state_message(self, value: Any) -> Any: # noqa: ANN401 (any-type) return self.parse_timestamp(value) - def _to_state_message(self, value: Any) -> Any: + def _to_state_message(self, value: Any) -> Any: # noqa: ANN401 (any-type) return self.output_format(value) @property @abstractmethod - def _zero_value(self) -> Any: ... + def _zero_value(self) -> Any: ... # noqa: ANN401 (any-type) @property def zero_value(self) -> datetime: @@ -48,16 +48,16 @@ def get_end_provider(cls) -> Callable[[], datetime]: def increment(self, timestamp: datetime) -> datetime: ... @abstractmethod - def parse_timestamp(self, timestamp: Any) -> datetime: ... + def parse_timestamp(self, timestamp: Any) -> datetime: ... # noqa: ANN401 (any-type) @abstractmethod - def output_format(self, timestamp: datetime) -> Any: ... + def output_format(self, timestamp: datetime) -> Any: ... # noqa: ANN401 (any-type) - def parse_value(self, value: Any) -> Any: + def parse_value(self, value: Any) -> Any: # noqa: ANN401 (any-type) """Parse the value of the cursor field into a comparable value.""" return self.parse_timestamp(value) - def _compare_intervals(self, end_time: Any, start_time: Any) -> bool: + def _compare_intervals(self, end_time: Any, start_time: Any) -> bool: # noqa: ANN401 (any-type) return bool(self.increment(end_time) >= start_time) def convert_from_sequential_state( @@ -135,7 +135,7 @@ def output_format(self, timestamp: datetime) -> int: def parse_timestamp(self, timestamp: int) -> datetime: dt_object = pendulum.from_timestamp(timestamp) if not isinstance(dt_object, DateTime): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})" ) return dt_object # type: ignore # we are manually type checking because pendulum.parse may return different types @@ -157,7 +157,10 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter): _zero_value = "0001-01-01T00:00:00.000Z" def __init__( - self, is_sequential_state: bool = True, cursor_granularity: timedelta | None = None + self, + *, + is_sequential_state: bool = True, + cursor_granularity: timedelta | None = None, ) -> None: super().__init__(is_sequential_state=is_sequential_state) self._cursor_granularity = cursor_granularity or timedelta(milliseconds=1) @@ -165,13 +168,13 @@ def __init__( def increment(self, timestamp: datetime) -> datetime: return timestamp + self._cursor_granularity - def output_format(self, timestamp: datetime) -> Any: + def output_format(self, timestamp: datetime) -> Any: # noqa: ANN401 (any-type) return timestamp.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z" def parse_timestamp(self, timestamp: str) -> datetime: dt_object = pendulum.parse(timestamp) if not isinstance(dt_object, DateTime): - raise ValueError( + raise ValueError( # noqa: TRY004 (expected TypeError) f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})" ) return dt_object # type: ignore # we are manually type checking because pendulum.parse may return different types @@ -186,6 +189,7 @@ def __init__( self, datetime_format: str, input_datetime_formats: list[str] | None = None, + *, is_sequential_state: bool = True, cursor_granularity: timedelta | None = None, ) -> None: diff --git a/airbyte_cdk/sources/streams/core.py b/airbyte_cdk/sources/streams/core.py index 68a809fa..0f7aecef 100644 --- a/airbyte_cdk/sources/streams/core.py +++ b/airbyte_cdk/sources/streams/core.py @@ -23,6 +23,7 @@ SyncMode, ) from airbyte_cdk.models import Type as MessageType +from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager from airbyte_cdk.sources.streams.checkpoint import ( CheckpointMode, CheckpointReader, @@ -45,7 +46,7 @@ # A stream's read method can return one of the following types: # Mapping[str, Any]: The content of an AirbyteRecordMessage # AirbyteMessage: An AirbyteMessage. Could be of any type -StreamData = Union[Mapping[str, Any], AirbyteMessage] +StreamData = Union[Mapping[str, Any], AirbyteMessage] # noqa: UP007 JsonSchema = Mapping[str, Any] @@ -119,7 +120,7 @@ class StreamClassification: # Moved to class declaration since get_updated_state is called on every record for incremental syncs, and thus the @deprecated decorator as well. -@deprecated( +@deprecated( # noqa: PLR0904 version="0.1.49", reason="Deprecated method get_updated_state, You should use explicit state property instead, see IncrementalMixin docs.", action="ignore", @@ -147,7 +148,7 @@ def name(self) -> str: """:return: Stream name. By default this is the implementing class name, but it can be overridden as needed.""" return casing.camel_to_snake(self.__class__.__name__) - def get_error_display_message(self, exception: BaseException) -> str | None: + def get_error_display_message(self, exception: BaseException) -> str | None: # noqa: ARG002 (unused) """Retrieves the user-friendly display message that corresponds to an exception. This will be called when encountering an exception while reading records from the stream, and used to build the AirbyteTraceMessage. @@ -164,7 +165,7 @@ def read( # type: ignore # ignoring typing for ConnectorStateManager because o logger: logging.Logger, slice_logger: SliceLogger, stream_state: MutableMapping[str, Any], - state_manager, + state_manager, # noqa: ANN001 internal_config: InternalConfig, ) -> Iterable[StreamData]: sync_mode = configured_stream.sync_mode @@ -175,7 +176,7 @@ def read( # type: ignore # ignoring typing for ConnectorStateManager because o # opposed to the incoming stream_state value. Because some connectors like ones using the file-based CDK modify # state before setting the value on the Stream attribute, the most up-to-date state is derived from Stream.state # instead of the stream_state parameter. This does not apply to legacy connectors using get_updated_state(). - try: + try: # noqa: SIM105 (suppressible exception) stream_state = self.state # type: ignore # we know the field might not exist... except AttributeError: pass @@ -291,14 +292,14 @@ def read_records( ) -> Iterable[StreamData]: """This method should be overridden by subclasses to read records based on the inputs""" - @cache + @cache # noqa: B019 (cached class methods can cause memory leaks) def get_json_schema(self) -> Mapping[str, Any]: """:return: A dict of the JSON schema representing this stream. The default implementation of this method looks for a JSONSchema file with the same name as this stream's "name" property. Override as needed. """ - # TODO show an example of using pydantic to define the JSON schema, or reading an OpenAPI spec + # TODO: show an example of using pydantic to define the JSON schema, or reading an OpenAPI spec return ResourceSchemaLoader(package_name_from_class(self.__class__)).get_schema(self.name) def as_airbyte_stream(self) -> AirbyteStream: @@ -392,9 +393,9 @@ def primary_key(self) -> str | list[str] | list[list[str]] | None: def stream_slices( self, *, - sync_mode: SyncMode, - cursor_field: list[str] | None = None, - stream_state: Mapping[str, Any] | None = None, + sync_mode: SyncMode, # noqa: ARG002 (unused) + cursor_field: list[str] | None = None, # noqa: ARG002 (unused) + stream_state: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Iterable[Mapping[str, Any] | None]: """Override to define the slices for this stream. See the stream slicing section of the docs for more information. @@ -419,7 +420,9 @@ def state_checkpoint_interval(self) -> int | None: return None def get_updated_state( - self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any] + self, + current_stream_state: MutableMapping[str, Any], # noqa: ARG002 (unused) + latest_record: Mapping[str, Any], # noqa: ARG002 (unused) ) -> MutableMapping[str, Any]: """Override to extract state from the latest record. Needed to implement incremental sync. @@ -443,7 +446,7 @@ def get_cursor(self) -> Cursor | None: def _get_checkpoint_reader( self, - logger: logging.Logger, + logger: logging.Logger, # noqa: ARG002 (unused) cursor_field: list[str] | None, sync_mode: SyncMode, stream_state: MutableMapping[str, Any], @@ -590,7 +593,7 @@ def _wrapped_primary_key( elif isinstance(component, list): wrapped_keys.append(component) else: - raise ValueError(f"Element must be either list or str. Got: {type(component)}") + raise ValueError(f"Element must be either list or str. Got: {type(component)}") # noqa: TRY004 (should raise TypeError) return wrapped_keys raise ValueError(f"Element must be either list or str. Got: {type(keys)}") @@ -620,7 +623,7 @@ def _observe_state( def _checkpoint_state( # type: ignore # ignoring typing for ConnectorStateManager because of circular dependencies self, stream_state: Mapping[str, Any], - state_manager, + state_manager: ConnectorStateManager, ) -> AirbyteMessage: # TODO: This can be consolidated into one ConnectorStateManager.update_and_create_state_message() method, but I want # to reduce changes right now and this would span concurrent as well diff --git a/airbyte_cdk/sources/streams/http/availability_strategy.py b/airbyte_cdk/sources/streams/http/availability_strategy.py index e4f2f40f..e561ff8c 100644 --- a/airbyte_cdk/sources/streams/http/availability_strategy.py +++ b/airbyte_cdk/sources/streams/http/availability_strategy.py @@ -18,7 +18,10 @@ class HttpAvailabilityStrategy(AvailabilityStrategy): def check_availability( - self, stream: Stream, logger: logging.Logger, source: Source | None = None + self, + stream: Stream, + logger: logging.Logger, + source: Source | None = None, # noqa: ARG002 (unused) ) -> tuple[bool, str | None]: """Check stream availability by attempting to read the first record of the stream. @@ -47,7 +50,7 @@ def check_availability( try: self.get_first_record_for_slice(stream, stream_slice) - return True, None + return True, None # noqa: TRY300 (consider try-else) except StopIteration: logger.info(f"Successfully connected to stream {stream.name}, but got 0 records.") return True, None diff --git a/airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py b/airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py index 4e4f7cdd..8e35940e 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/default_backoff_strategy.py @@ -13,7 +13,7 @@ class DefaultBackoffStrategy(BackoffStrategy): def backoff_time( self, - response_or_exception: requests.Response | requests.RequestException | None, - attempt_count: int, + response_or_exception: requests.Response | requests.RequestException | None, # noqa: ARG002 (unused) + attempt_count: int, # noqa: ARG002 (unused) ) -> float | None: return None diff --git a/airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py b/airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py index efc601f5..7894d97b 100644 --- a/airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +++ b/airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py @@ -49,7 +49,7 @@ def max_retries(self) -> int | None: def max_time(self) -> int | None: return self._max_time - def interpret_response( + def interpret_response( # noqa: PLR0911 (too many returns) self, response_or_exception: requests.Response | Exception | None = None ) -> ErrorResolution: """Interpret the response and return the corresponding response action, failure type, and error message. diff --git a/airbyte_cdk/sources/streams/http/exceptions.py b/airbyte_cdk/sources/streams/http/exceptions.py index 409c40ef..d224533a 100644 --- a/airbyte_cdk/sources/streams/http/exceptions.py +++ b/airbyte_cdk/sources/streams/http/exceptions.py @@ -33,7 +33,7 @@ class UserDefinedBackoffException(BaseBackoffException): def __init__( self, - backoff: int | float, + backoff: int | float, # noqa: PYI041 (redundant union) request: requests.PreparedRequest, response: requests.Response | Exception | None, error_message: str = "", diff --git a/airbyte_cdk/sources/streams/http/http.py b/airbyte_cdk/sources/streams/http/http.py index 638e8304..a474fdc4 100644 --- a/airbyte_cdk/sources/streams/http/http.py +++ b/airbyte_cdk/sources/streams/http/http.py @@ -1,6 +1,8 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +# ruff: noqa: A005 # Shadows built-in 'http' module + from __future__ import annotations import logging @@ -174,9 +176,9 @@ def path( def request_params( self, - stream_state: Mapping[str, Any] | None, - stream_slice: Mapping[str, Any] | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None, # noqa: ARG002 (unused) + stream_slice: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> MutableMapping[str, Any]: """Override this method to define the query parameters that should be set on an outgoing HTTP request given the inputs. @@ -186,18 +188,18 @@ def request_params( def request_headers( self, - stream_state: Mapping[str, Any] | None, - stream_slice: Mapping[str, Any] | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None, # noqa: ARG002 (unused) + stream_slice: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: """Override to return any non-auth headers. Authentication headers will overwrite any overlapping headers returned from this method.""" return {} def request_body_data( self, - stream_state: Mapping[str, Any] | None, - stream_slice: Mapping[str, Any] | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None, # noqa: ARG002 (unused) + stream_slice: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any] | str | None: """Override when creating POST/PUT/PATCH requests to populate the body of the request with a non-JSON payload. @@ -211,9 +213,9 @@ def request_body_data( def request_body_json( self, - stream_state: Mapping[str, Any] | None, - stream_slice: Mapping[str, Any] | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None, # noqa: ARG002 (unused) + stream_slice: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any] | None: """Override when creating POST/PUT/PATCH requests to populate the body of the request with a JSON payload. @@ -223,9 +225,9 @@ def request_body_json( def request_kwargs( self, - stream_state: Mapping[str, Any] | None, - stream_slice: Mapping[str, Any] | None = None, - next_page_token: Mapping[str, Any] | None = None, + stream_state: Mapping[str, Any] | None, # noqa: ARG002 (unused) + stream_slice: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) + next_page_token: Mapping[str, Any] | None = None, # noqa: ARG002 (unused) ) -> Mapping[str, Any]: """Override to return a mapping of keyword arguments to be used when creating the HTTP request. Any option listed in https://docs.python-requests.org/en/latest/api/#requests.adapters.BaseAdapter.send for can be returned from @@ -334,15 +336,15 @@ def get_error_display_message(self, exception: BaseException) -> str | None: def read_records( self, - sync_mode: SyncMode, - cursor_field: list[str] | None = None, + sync_mode: SyncMode, # noqa: ARG002 (unused) + cursor_field: list[str] | None = None, # noqa: ARG002 (unused) stream_slice: Mapping[str, Any] | None = None, stream_state: Mapping[str, Any] | None = None, ) -> Iterable[StreamData]: # A cursor_field indicates this is an incremental stream which offers better checkpointing than RFR enabled via the cursor if self.cursor_field or not isinstance(self.get_cursor(), ResumableFullRefreshCursor): yield from self._read_pages( - lambda req, res, state, _slice: self.parse_response( + lambda req, res, state, _slice: self.parse_response( # noqa: ARG005 (unused) res, stream_slice=_slice, stream_state=state ), stream_slice, @@ -350,7 +352,7 @@ def read_records( ) else: yield from self._read_single_page( - lambda req, res, state, _slice: self.parse_response( + lambda req, res, state, _slice: self.parse_response( # noqa: ARG005 (unused) res, stream_slice=_slice, stream_state=state ), stream_slice, @@ -530,7 +532,7 @@ def get_log_formatter(self) -> Callable[[requests.Response], Any] | None: class HttpSubStream(HttpStream, ABC): - def __init__(self, parent: HttpStream, **kwargs: Any) -> None: + def __init__(self, parent: HttpStream, **kwargs: Any) -> None: # noqa: ANN401 (any-type) """:param parent: should be the instance of HttpStream class""" super().__init__(**kwargs) self.parent = parent @@ -552,8 +554,8 @@ def __init__(self, parent: HttpStream, **kwargs: Any) -> None: def stream_slices( self, - sync_mode: SyncMode, - cursor_field: list[str] | None = None, + sync_mode: SyncMode, # noqa: ARG002 (unused) + cursor_field: list[str] | None = None, # noqa: ARG002 (unused) stream_state: Mapping[str, Any] | None = None, ) -> Iterable[Mapping[str, Any] | None]: # read_stateless() assumes the parent is not concurrent. This is currently okay since the concurrent CDK does @@ -562,11 +564,11 @@ def stream_slices( # Skip non-records (eg AirbyteLogMessage) if isinstance(parent_record, AirbyteMessage): if parent_record.type == MessageType.RECORD: - parent_record = parent_record.record.data + parent_record = parent_record.record.data # noqa: PLW2901 (redefined loop name) else: continue elif isinstance(parent_record, Record): - parent_record = parent_record.data + parent_record = parent_record.data # noqa: PLW2901 (redefined loop name) yield {"parent": parent_record} @@ -581,7 +583,7 @@ def __init__(self, stream: HttpStream) -> None: def backoff_time( self, response_or_exception: requests.Response | requests.RequestException | None, - attempt_count: int, + attempt_count: int, # noqa: ARG002 (unused) ) -> float | None: return self.stream.backoff_time(response_or_exception) # type: ignore # HttpStream.backoff_time has been deprecated @@ -595,7 +597,7 @@ def __init__(self, stream: HttpStream, **kwargs): # type: ignore # noqa self.stream = stream super().__init__(**kwargs) - def interpret_response( + def interpret_response( # noqa: PLR0911 (too-many-return-statements) self, response_or_exception: requests.Response | Exception | None = None ) -> ErrorResolution: if isinstance(response_or_exception, Exception): @@ -603,7 +605,7 @@ def interpret_response( if isinstance(response_or_exception, requests.Response): should_retry = self.stream.should_retry(response_or_exception) # type: ignore if should_retry: - if response_or_exception.status_code == 429: + if response_or_exception.status_code == 429: # noqa: PLR2004 (magic number) return ErrorResolution( response_action=ResponseAction.RATE_LIMITED, failure_type=FailureType.transient_error, diff --git a/airbyte_cdk/sources/streams/http/http_client.py b/airbyte_cdk/sources/streams/http/http_client.py index cdde891d..80aa5310 100644 --- a/airbyte_cdk/sources/streams/http/http_client.py +++ b/airbyte_cdk/sources/streams/http/http_client.py @@ -89,6 +89,7 @@ def __init__( api_budget: APIBudget | None = None, session: requests.Session | requests_cache.CachedSession | None = None, authenticator: AuthBase | None = None, + *, use_cache: bool = False, backoff_strategy: BackoffStrategy | list[BackoffStrategy] | None = None, error_message_parser: ErrorMessageParser | None = None, @@ -173,6 +174,7 @@ def _create_prepared_request( self, http_method: str, url: str, + *, dedupe_query_params: bool = False, headers: Mapping[str, str] | None = None, params: Mapping[str, str] | None = None, @@ -220,6 +222,7 @@ def _send_with_retry( request: requests.PreparedRequest, request_kwargs: Mapping[str, Any], log_formatter: Callable[[requests.Response], Any] | None = None, + *, exit_on_rate_limit: bool | None = False, ) -> requests.Response: """Sends a request with retry logic. @@ -255,6 +258,7 @@ def _send( request: requests.PreparedRequest, request_kwargs: Mapping[str, Any], log_formatter: Callable[[requests.Response], Any] | None = None, + *, exit_on_rate_limit: bool | None = False, ) -> requests.Response: if request not in self._request_attempt_count: @@ -327,6 +331,7 @@ def _handle_error_resolution( exc: requests.RequestException | None, request: requests.PreparedRequest, error_resolution: ErrorResolution, + *, exit_on_rate_limit: bool | None = False, ) -> None: # Emit stream status RUNNING with the reason RATE_LIMITED to log that the rate limit has been reached @@ -424,7 +429,7 @@ def _handle_error_resolution( def name(self) -> str: return self._name - def send_request( + def send_request( # noqa: PLR0913 (too-many-arguments) self, http_method: str, url: str, @@ -433,6 +438,7 @@ def send_request( params: Mapping[str, str] | None = None, json: Mapping[str, Any] | None = None, data: str | Mapping[str, Any] | None = None, + *, dedupe_query_params: bool = False, log_formatter: Callable[[requests.Response], Any] | None = None, exit_on_rate_limit: bool | None = False, diff --git a/airbyte_cdk/sources/streams/http/rate_limiting.py b/airbyte_cdk/sources/streams/http/rate_limiting.py index 6fdbfd51..9bcece02 100644 --- a/airbyte_cdk/sources/streams/http/rate_limiting.py +++ b/airbyte_cdk/sources/streams/http/rate_limiting.py @@ -34,7 +34,10 @@ def default_backoff_handler( - max_tries: int | None, factor: float, max_time: int | None = None, **kwargs: Any + max_tries: int | None, + factor: float, + max_time: int | None = None, + **kwargs: Any, # noqa: ANN401 (any-type) ) -> Callable[[SendRequestCallableType], SendRequestCallableType]: def log_retry_attempt(details: Mapping[str, Any]) -> None: _, exc, _ = sys.exc_info() @@ -52,7 +55,7 @@ def should_give_up(exc: Exception) -> bool: give_up: bool = ( exc.response is not None and exc.response.status_code != codes.too_many_requests - and 400 <= exc.response.status_code < 500 + and 400 <= exc.response.status_code < 500 # noqa: PLR2004 (magic number) ) if give_up: logger.info(f"Giving up for returned HTTP status: {exc.response.status_code!r}") @@ -74,7 +77,9 @@ def should_give_up(exc: Exception) -> bool: def http_client_default_backoff_handler( - max_tries: int | None, max_time: int | None = None, **kwargs: Any + max_tries: int | None, + max_time: int | None = None, + **kwargs: Any, # noqa: ANN401 (any-type) ) -> Callable[[SendRequestCallableType], SendRequestCallableType]: def log_retry_attempt(details: Mapping[str, Any]) -> None: _, exc, _ = sys.exc_info() @@ -86,7 +91,7 @@ def log_retry_attempt(details: Mapping[str, Any]) -> None: f"Caught retryable error '{exc!s}' after {details['tries']} tries. Waiting {details['wait']} seconds then retrying..." ) - def should_give_up(exc: Exception) -> bool: + def should_give_up(exc: Exception) -> bool: # noqa: ARG001 # If made it here, the ResponseAction was RETRY and therefore should not give up return False @@ -103,9 +108,11 @@ def should_give_up(exc: Exception) -> bool: def user_defined_backoff_handler( - max_tries: int | None, max_time: int | None = None, **kwargs: Any + max_tries: int | None, + max_time: int | None = None, + **kwargs: Any, # noqa: ANN401 (any-type) ) -> Callable[[SendRequestCallableType], SendRequestCallableType]: - def sleep_on_ratelimit(details: Mapping[str, Any]) -> None: + def sleep_on_ratelimit(details: Mapping[str, Any]) -> None: # noqa: ARG001 _, exc, _ = sys.exc_info() if isinstance(exc, UserDefinedBackoffException): if exc.response: @@ -139,7 +146,7 @@ def log_give_up(details: Mapping[str, Any]) -> None: def rate_limit_default_backoff_handler( - **kwargs: Any, + **kwargs: Any, # noqa: ANN401 (any-type) ) -> Callable[[SendRequestCallableType], SendRequestCallableType]: def log_retry_attempt(details: Mapping[str, Any]) -> None: _, exc, _ = sys.exc_info() diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py index a1f95aa0..d3f6f6d3 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py @@ -119,7 +119,7 @@ def _wrap_refresh_token_exception( ), max_time=300, ) - def _get_refresh_access_token_response(self) -> Any: + def _get_refresh_access_token_response(self) -> Any: # noqa: ANN401 (any-type) try: response = requests.request( method="POST", @@ -129,11 +129,11 @@ def _get_refresh_access_token_response(self) -> Any: if response.ok: response_json = response.json() # Add the access token to the list of secrets so it is replaced before logging the response - # An argument could be made to remove the prevous access key from the list of secrets, but unmasking values seems like a security incident waiting to happen... + # An argument could be made to remove the previous access key from the list of secrets, but unmasking values seems like a security incident waiting to happen... access_key = response_json.get(self.get_access_token_name()) if not access_key: - raise Exception( - "Token refresh API response was missing access token {self.get_access_token_name()}" + raise Exception( # noqa: TRY002, TRY301 (vanilla exception, raise within try) + f"Token refresh API response was missing access token {self.get_access_token_name()}" ) add_to_secrets(access_key) self._log_response(response) @@ -142,17 +142,20 @@ def _get_refresh_access_token_response(self) -> Any: self._log_response(response) response.raise_for_status() except requests.exceptions.RequestException as e: - if e.response is not None: - if e.response.status_code == 429 or e.response.status_code >= 500: - raise DefaultBackoffException(request=e.response.request, response=e.response) + if e.response is not None: # noqa: SIM102 (collapsible-if) + if e.response.status_code == 429 or e.response.status_code >= 500: # noqa: PLR2004 (magic number) + raise DefaultBackoffException( + request=e.response.request, + response=e.response, + ) from None if self._wrap_refresh_token_exception(e): message = "Refresh token is invalid or expired. Please re-authenticate from Sources//Settings." raise AirbyteTracedException( internal_message=message, message=message, failure_type=FailureType.config_error - ) + ) from None raise except Exception as e: - raise Exception(f"Error while refreshing access token: {e}") from e + raise Exception(f"Error while refreshing access token: {e}") from e # noqa: TRY002 (vanilla exception) def refresh_access_token(self) -> tuple[str, str | int]: """Returns the refresh token and its expiration datetime diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py index d463640f..ce586cfd 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py @@ -16,7 +16,7 @@ class AbstractHeaderAuthenticator(AuthBase): """Abstract class for an header-based authenticators that add a header to outgoing HTTP requests.""" - def __call__(self, request: Any) -> Any: + def __call__(self, request: Any) -> Any: # noqa: ANN401 (any-type) """Attach the HTTP headers required to authenticate on the HTTP request""" request.headers.update(self.get_auth_header()) return request diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py index 1f537195..bd2228d6 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py @@ -28,7 +28,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator): If a connector_config is provided any mutation of it's value in the scope of this class will emit AirbyteControlConnectorConfigMessage. """ - def __init__( + def __init__( # noqa: PLR0913, PLR0917 (too many arguments) self, token_refresh_endpoint: str, client_id: str, @@ -124,7 +124,7 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator): client_secret_config_path, refresh_token_config_path constructor arguments. """ - def __init__( + def __init__( # noqa: PLR0913, PLR0917 (too many arguments) self, connector_config: Mapping[str, Any], token_refresh_endpoint: str, @@ -140,7 +140,7 @@ def __init__( refresh_token_config_path: Sequence[str] = ("credentials", "refresh_token"), token_expiry_date_config_path: Sequence[str] = ("credentials", "token_expiry_date"), token_expiry_date_format: str | None = None, - message_repository: MessageRepository = NoopMessageRepository(), + message_repository: MessageRepository = NoopMessageRepository(), # noqa: B008 (function call in default) *, token_expiry_is_time_of_expiration: bool = False, refresh_token_error_status_codes: tuple[int, ...] = (), @@ -227,9 +227,13 @@ def get_token_expiry_date(self) -> pendulum.DateTime: expiry_date = dpath.get( self._connector_config, self._token_expiry_date_config_path, default="" ) - return pendulum.now().subtract(days=1) if expiry_date == "" else pendulum.parse(expiry_date) + return ( + pendulum.now().subtract(days=1) + if expiry_date == "" # noqa: PLC1901 (comparison to empty string) + else pendulum.parse(expiry_date) + ) - def set_token_expiry_date(self, new_token_expiry_date) -> None: + def set_token_expiry_date(self, new_token_expiry_date) -> None: # noqa: ANN001 dpath.new( self._connector_config, self._token_expiry_date_config_path, @@ -264,7 +268,7 @@ def get_access_token(self) -> str: self.access_token = new_access_token self.set_refresh_token(new_refresh_token) self.set_token_expiry_date(new_token_expiry_date) - # FIXME emit_configuration_as_airbyte_control_message as been deprecated in favor of package airbyte_cdk.sources.message + # TODO: emit_configuration_as_airbyte_control_message as been deprecated in favor of package airbyte_cdk.sources.message # Usually, a class shouldn't care about the implementation details but to keep backward compatibility where we print the # message directly in the console, this is needed if not isinstance(self._message_repository, NoopMessageRepository): diff --git a/airbyte_cdk/sources/streams/http/requests_native_auth/token.py b/airbyte_cdk/sources/streams/http/requests_native_auth/token.py index 652508bd..f7440909 100644 --- a/airbyte_cdk/sources/streams/http/requests_native_auth/token.py +++ b/airbyte_cdk/sources/streams/http/requests_native_auth/token.py @@ -1,6 +1,8 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +# ruff: noqa: A005 # Shadows built-in name 'token' module + from __future__ import annotations import base64 diff --git a/airbyte_cdk/sources/types.py b/airbyte_cdk/sources/types.py index 6ef81260..fbfed683 100644 --- a/airbyte_cdk/sources/types.py +++ b/airbyte_cdk/sources/types.py @@ -1,6 +1,5 @@ -# # Copyright (c) 2023 Airbyte, Inc., all rights reserved. -# +# ruff: noqa: A005 # Shadows built-in 'types' module from __future__ import annotations @@ -16,7 +15,7 @@ StreamState = Mapping[str, Any] -class Record(Mapping[str, Any]): +class Record(Mapping[str, Any]): # noqa: PLW1641 # missing __hash__ method def __init__( self, data: Mapping[str, Any], @@ -36,13 +35,13 @@ def associated_slice(self) -> StreamSlice | None: def __repr__(self) -> str: return repr(self._data) - def __getitem__(self, key: str) -> Any: + def __getitem__(self, key: str) -> Any: # noqa: ANN401 (any-type) return self._data[key] def __len__(self) -> int: return len(self._data) - def __iter__(self) -> Any: + def __iter__(self) -> Any: # noqa: ANN401 (any-type) return iter(self._data) def __contains__(self, item: object) -> bool: @@ -58,7 +57,7 @@ def __ne__(self, other: object) -> bool: return not self.__eq__(other) -class StreamSlice(Mapping[str, Any]): +class StreamSlice(Mapping[str, Any]): # noqa: PLW1641 # missing __hash__ method def __init__( self, *, @@ -104,10 +103,10 @@ def extra_fields(self) -> Mapping[str, Any]: def __repr__(self) -> str: return repr(self._stream_slice) - def __setitem__(self, key: str, value: Any) -> None: + def __setitem__(self, key: str, value: Any) -> None: # noqa: ANN401 (any-type) raise ValueError("StreamSlice is immutable") - def __getitem__(self, key: str) -> Any: + def __getitem__(self, key: str) -> Any: # noqa: ANN401 (any-type) return self._stream_slice[key] def __len__(self) -> int: @@ -116,7 +115,7 @@ def __len__(self) -> int: def __iter__(self) -> Iterator[str]: return iter(self._stream_slice) - def __contains__(self, item: Any) -> bool: + def __contains__(self, item: Any) -> bool: # noqa: ANN401 (any-type) return item in self._stream_slice def keys(self) -> KeysView[str]: @@ -128,7 +127,7 @@ def items(self) -> ItemsView[str, Any]: def values(self) -> ValuesView[Any]: return self._stream_slice.values() - def get(self, key: str, default: Any = None) -> Any | None: + def get(self, key: str, default: Any = None) -> Any | None: # noqa: ANN401 (any-type) return self._stream_slice.get(key, default) def __eq__(self, other: object) -> bool: @@ -142,5 +141,5 @@ def __eq__(self, other: object) -> bool: def __ne__(self, other: object) -> bool: return not self.__eq__(other) - def __json_serializable__(self) -> Any: + def __json_serializable__(self) -> Any: # noqa: ANN401, PLW3201 (any-type, unexpected types) return self._stream_slice diff --git a/airbyte_cdk/sources/utils/record_helper.py b/airbyte_cdk/sources/utils/record_helper.py index 7c8e0ce1..378dc0de 100644 --- a/airbyte_cdk/sources/utils/record_helper.py +++ b/airbyte_cdk/sources/utils/record_helper.py @@ -26,8 +26,9 @@ def stream_data_to_airbyte_message( stream_name: str, data_or_message: StreamData, - transformer: TypeTransformer = TypeTransformer(TransformConfig.NoTransform), + transformer: TypeTransformer = TypeTransformer(TransformConfig.NoTransform), # noqa: B008 (function call in default) schema: Mapping[str, Any] | None = None, + *, is_file_transfer_message: bool = False, ) -> AirbyteMessage: if schema is None: diff --git a/airbyte_cdk/sources/utils/schema_helpers.py b/airbyte_cdk/sources/utils/schema_helpers.py index 78efcd2d..063aab2e 100644 --- a/airbyte_cdk/sources/utils/schema_helpers.py +++ b/airbyte_cdk/sources/utils/schema_helpers.py @@ -31,14 +31,14 @@ def __init__(self, uri_base: str, shared: str) -> None: def __call__(self, uri: str) -> dict[str, Any]: uri = uri.replace(self.uri_base, f"{self.uri_base}/{self.shared}/") - with open(uri, encoding="utf-8") as f: + with open(uri, encoding="utf-8") as f: # noqa: PTH123 (prefer pathlib) data = json.load(f) if isinstance(data, dict): return data raise ValueError(f"Expected to read a dictionary from {uri}. Got: {data}") -def resolve_ref_links(obj: Any) -> Any: +def resolve_ref_links(obj: Any) -> Any: # noqa: ANN401 (any-type) """Scan resolved schema and convert jsonref.JsonRef object to JSON serializable dict. :param obj - jsonschema object with ref field resolved. @@ -59,7 +59,7 @@ def resolve_ref_links(obj: Any) -> Any: return obj -def _expand_refs(schema: Any, ref_resolver: RefResolver | None = None) -> None: +def _expand_refs(schema: Any, ref_resolver: RefResolver | None = None) -> None: # noqa: ANN401 (any-type) """Internal function to iterate over schema and replace all occurrences of $ref with their definitions. Recursive. :param schema: schema that will be patched @@ -83,7 +83,7 @@ def _expand_refs(schema: Any, ref_resolver: RefResolver | None = None) -> None: _expand_refs(value, ref_resolver=ref_resolver) -def expand_refs(schema: Any) -> None: +def expand_refs(schema: Any) -> None: # noqa: ANN401 (any-type) """Iterate over schema and replace all occurrences of $ref with their definitions. :param schema: schema that will be patched @@ -92,7 +92,7 @@ def expand_refs(schema: Any) -> None: schema.pop("definitions", None) # remove definitions created by $ref -def rename_key(schema: Any, old_key: str, new_key: str) -> None: +def rename_key(schema: Any, old_key: str, new_key: str) -> None: # noqa: ANN401 (any-type) """Iterate over nested dictionary and replace one key with another. Used to replace anyOf with oneOf. Recursive. @@ -145,7 +145,7 @@ def _resolve_schema_references(self, raw_schema: dict[str, Any]) -> dict[str, An """ package = importlib.import_module(self.package_name) if package.__file__: - base = os.path.dirname(package.__file__) + "/" + base = os.path.dirname(package.__file__) + "/" # noqa: PTH120 (prefer pathlib) else: raise ValueError(f"Package {package} does not have a valid __file__ field") resolved = jsonref.JsonRef.replace_refs( @@ -182,7 +182,7 @@ class InternalConfig(BaseModel): limit: int = Field(None, alias="_limit") page_size: int = Field(None, alias="_page_size") - def dict(self, *args: Any, **kwargs: Any) -> dict[str, Any]: + def dict(self, *args: Any, **kwargs: Any) -> dict[str, Any]: # noqa: ANN401 (any-type) kwargs["by_alias"] = True kwargs["exclude_unset"] = True return super().dict(*args, **kwargs) # type: ignore[no-any-return] diff --git a/airbyte_cdk/sources/utils/slice_logger.py b/airbyte_cdk/sources/utils/slice_logger.py index 2154931c..9a63007d 100644 --- a/airbyte_cdk/sources/utils/slice_logger.py +++ b/airbyte_cdk/sources/utils/slice_logger.py @@ -53,4 +53,5 @@ def should_log_slice_message(self, logger: logging.Logger) -> bool: class AlwaysLogSliceLogger(SliceLogger): def should_log_slice_message(self, logger: logging.Logger) -> bool: + _ = logger return True diff --git a/airbyte_cdk/sources/utils/transform.py b/airbyte_cdk/sources/utils/transform.py index 8ea8069b..b65db535 100644 --- a/airbyte_cdk/sources/utils/transform.py +++ b/airbyte_cdk/sources/utils/transform.py @@ -12,7 +12,7 @@ if TYPE_CHECKING: - from collections.abc import Callable, Mapping + from collections.abc import Callable, Generator, Mapping json_to_python_simple = { @@ -56,7 +56,7 @@ def __init__(self, config: TransformConfig) -> None: :param config Transform config that would be applied to object """ if TransformConfig.NoTransform in config and config != TransformConfig.NoTransform: - raise Exception("NoTransform option cannot be combined with other flags.") + raise Exception("NoTransform option cannot be combined with other flags.") # noqa: TRY002 (vanilla exception) self._config = config all_validators = { key: self.__get_normalizer(key, orig_validator) @@ -68,7 +68,7 @@ def __init__(self, config: TransformConfig) -> None: meta_schema=Draft7Validator.META_SCHEMA, validators=all_validators ) - def registerCustomTransform( + def registerCustomTransform( # noqa: N802 (violates naming convention) self, normalization_callback: Callable[[Any, dict[str, Any]], Any] ) -> Callable: """Register custom normalization callback. @@ -79,13 +79,13 @@ def registerCustomTransform( :return Same callbeck, this is usefull for using registerCustomTransform function as decorator. """ if TransformConfig.CustomSchemaNormalization not in self._config: - raise Exception( + raise Exception( # noqa: TRY002 (vanilla exception) "Please set TransformConfig.CustomSchemaNormalization config before registering custom normalizer" ) self._custom_normalizer = normalization_callback return normalization_callback - def __normalize(self, original_item: Any, subschema: dict[str, Any]) -> Any: + def __normalize(self, original_item: Any, subschema: dict[str, Any]) -> Any: # noqa: ANN401 (any-type) """Applies different transform function to object's field according to config. :param original_item original value of field. :param subschema part of the jsonschema containing field type/format data. @@ -99,7 +99,10 @@ def __normalize(self, original_item: Any, subschema: dict[str, Any]) -> Any: return original_item @staticmethod - def default_convert(original_item: Any, subschema: dict[str, Any]) -> Any: + def default_convert( # noqa: PLR0911 (too many return statements) + original_item: Any, # noqa: ANN401 (any-type) + subschema: dict[str, Any], + ) -> Any: # noqa: ANN401 (any-type) """Default transform function that is used when TransformConfig.DefaultSchemaNormalization flag set. :param original_item original value of field. :param subschema part of the jsonschema containing field type/format data. @@ -140,15 +143,22 @@ def default_convert(original_item: Any, subschema: dict[str, Any]) -> Any: return original_item return original_item - def __get_normalizer(self, schema_key: str, original_validator: Callable): + def __get_normalizer( + self, + schema_key: str, + original_validator: Callable, + ) -> Callable[..., Generator[Any, Any, None]]: """Traverse through object fields using native jsonschema validator and apply normalization function. :param schema_key related json schema key that currently being validated/normalized. :original_validator: native jsonschema validator callback. """ - def normalizator( - validator_instance: Callable, property_value: Any, instance: Any, schema: dict[str, Any] - ): + def _normalizer_fn( + validator_instance: Callable, + property_value: Any, # noqa: ANN401 (any-type) + instance: Any, # noqa: ANN401 (any-type) + schema: dict[str, Any], + ) -> Generator[Any, Any, None]: """Jsonschema validator callable it uses for validating instance. We override default Draft7Validator to perform value transformation before validation take place. We do not take any action except @@ -160,7 +170,7 @@ def normalizator( : """ - def resolve(subschema): + def resolve(subschema): # noqa: ANN001, ANN202 if "$ref" in subschema: _, resolved = validator_instance.resolver.resolve(subschema["$ref"]) return resolved @@ -172,7 +182,7 @@ def resolve(subschema): if schema_key == "properties" and isinstance(instance, dict): for k, subschema in property_value.items(): if k in instance: - subschema = resolve(subschema) + subschema = resolve(subschema) # noqa: PLW2901 (redefined loop var) instance[k] = self.__normalize(instance[k], subschema) # Recursively normalize every item of the "instance" sub-array, # if "instance" is an incorrect type - skip recursive normalization of "instance" @@ -184,7 +194,7 @@ def resolve(subschema): # Running native jsonschema traverse algorithm after field normalization is done. yield from original_validator(validator_instance, property_value, instance, schema) - return normalizator + return _normalizer_fn def transform(self, record: dict[str, Any], schema: Mapping[str, Any]) -> None: """Normalize and validate according to config. diff --git a/airbyte_cdk/sources/utils/types.py b/airbyte_cdk/sources/utils/types.py index 707aedc5..7d00b6e1 100644 --- a/airbyte_cdk/sources/utils/types.py +++ b/airbyte_cdk/sources/utils/types.py @@ -1,9 +1,19 @@ # # Copyright (c) 2023 Airbyte, Inc., all rights reserved. # +# ruff: noqa: A005 # Shadows built-in 'types' module + from __future__ import annotations from typing import Union -JsonType = Union[dict[str, "JsonType"], list["JsonType"], str, int, float, bool, None] +JsonType = Union[ # noqa: UP007 (deprecated Union type) + dict[str, "JsonType"], + list["JsonType"], + str, + int, + float, + bool, + None, +] diff --git a/airbyte_cdk/sql/secrets.py b/airbyte_cdk/sql/secrets.py index 8df5b488..ac00671a 100644 --- a/airbyte_cdk/sql/secrets.py +++ b/airbyte_cdk/sql/secrets.py @@ -1,5 +1,6 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. """Base classes and methods for working with secrets in Airbyte.""" +# ruff: noqa: A005 # Shadows built-in 'secrets' module from __future__ import annotations @@ -66,7 +67,7 @@ def __bool__(self) -> bool: """ return True - def parse_json(self) -> Any: + def parse_json(self) -> Any: # noqa: ANN401 (any-type) """Parse the secret string as JSON.""" try: return json.loads(self) @@ -97,7 +98,7 @@ def validate( return cls(v) @classmethod - def __get_pydantic_core_schema__( # Pydantic dunder + def __get_pydantic_core_schema__( # Pydantic dunder # noqa: PLW3201 cls, source_type: Any, # noqa: ANN401 # Must allow `Any` to match Pydantic signature handler: GetCoreSchemaHandler, @@ -108,7 +109,7 @@ def __get_pydantic_core_schema__( # Pydantic dunder ) @classmethod - def __get_pydantic_json_schema__( # Pydantic dunder method + def __get_pydantic_json_schema__( # Pydantic dunder method # noqa: PLW3201 cls, _core_schema: core_schema.CoreSchema, handler: GetJsonSchemaHandler ) -> JsonSchemaValue: """Return a modified JSON schema for the secret string. diff --git a/airbyte_cdk/sql/shared/catalog_providers.py b/airbyte_cdk/sql/shared/catalog_providers.py index da66d1a7..ff8be527 100644 --- a/airbyte_cdk/sql/shared/catalog_providers.py +++ b/airbyte_cdk/sql/shared/catalog_providers.py @@ -40,7 +40,7 @@ def __init__( self._catalog: ConfiguredAirbyteCatalog = self.validate_catalog(configured_catalog) @staticmethod - def validate_catalog(catalog: ConfiguredAirbyteCatalog) -> Any: + def validate_catalog(catalog: ConfiguredAirbyteCatalog) -> Any: # noqa: ANN401 (any-type) """Validate the catalog to ensure it is valid. This requires ensuring that `generationId` and `minGenerationId` are both set. If diff --git a/airbyte_cdk/sql/shared/sql_processor.py b/airbyte_cdk/sql/shared/sql_processor.py index 654633ff..d580b008 100644 --- a/airbyte_cdk/sql/shared/sql_processor.py +++ b/airbyte_cdk/sql/shared/sql_processor.py @@ -114,7 +114,7 @@ def get_vendor_client(self) -> object: ) -class SqlProcessorBase(abc.ABC): +class SqlProcessorBase(abc.ABC): # noqa: B024 (no abstract methods) """A base class to be used for SQL Caches.""" type_converter_class: type[SQLTypeConverter] = SQLTypeConverter diff --git a/airbyte_cdk/sql/types.py b/airbyte_cdk/sql/types.py index 8893670a..2585822d 100644 --- a/airbyte_cdk/sql/types.py +++ b/airbyte_cdk/sql/types.py @@ -1,5 +1,5 @@ -# Allow shadowing the built-in 'types' module # Copyright (c) 2023 Airbyte, Inc., all rights reserved. +# ruff: noqa: A005 # Shadows built-in 'types' module """Type conversion methods for SQL Caches.""" @@ -123,7 +123,7 @@ def get_json_type(cls) -> sqlalchemy.types.TypeEngine[Any]: def to_sql_type( # noqa: PLR0911 # Too many return statements self, json_schema_property_def: dict[str, str | dict[str, Any] | list[Any]], - ) -> Any: + ) -> Any: # noqa: ANN401 (any-type) """Convert a value to a SQL type.""" try: airbyte_type, _ = _get_airbyte_type(json_schema_property_def) diff --git a/airbyte_cdk/test/entrypoint_wrapper.py b/airbyte_cdk/test/entrypoint_wrapper.py index e6dd6d6c..a436fe51 100644 --- a/airbyte_cdk/test/entrypoint_wrapper.py +++ b/airbyte_cdk/test/entrypoint_wrapper.py @@ -93,7 +93,7 @@ def state_messages(self) -> list[AirbyteMessage]: return self._get_message_by_types([Type.STATE]) @property - def most_recent_state(self) -> Any: + def most_recent_state(self) -> Any: # noqa: ANN401 (any-type) state_messages = self._get_message_by_types([Type.STATE]) if not state_messages: raise ValueError("Can't provide most recent state as there are no state messages") @@ -154,7 +154,10 @@ def is_not_in_logs(self, pattern: str) -> bool: def _run_command( - source: Source, args: list[str], expecting_exception: bool = False + source: Source, + args: list[str], + *, + expecting_exception: bool = False, ) -> EntrypointOutput: log_capture_buffer = StringIO() stream_handler = logging.StreamHandler(log_capture_buffer) @@ -170,7 +173,7 @@ def _run_command( uncaught_exception = None try: for message in source_entrypoint.run(parsed_args): - messages.append(message) + messages.append(message) # noqa: PERF402 except Exception as exception: if not expecting_exception: print("Printing unexpected error from entrypoint_wrapper") @@ -187,6 +190,7 @@ def _run_command( def discover( source: Source, config: Mapping[str, Any], + *, expecting_exception: bool = False, ) -> EntrypointOutput: """Config must be json serializable @@ -207,6 +211,7 @@ def read( config: Mapping[str, Any], catalog: ConfiguredAirbyteCatalog, state: list[AirbyteStateMessage] | None = None, + *, expecting_exception: bool = False, ) -> EntrypointOutput: """Config and state must be json serializable diff --git a/airbyte_cdk/test/mock_http/matcher.py b/airbyte_cdk/test/mock_http/matcher.py index 6ce64efb..37c7ee4e 100644 --- a/airbyte_cdk/test/mock_http/matcher.py +++ b/airbyte_cdk/test/mock_http/matcher.py @@ -8,7 +8,7 @@ from airbyte_cdk.test.mock_http.request import HttpRequest -class HttpRequestMatcher: +class HttpRequestMatcher: # noqa: PLW1641 # missing __hash__ method def __init__(self, request: HttpRequest, minimum_number_of_expected_match: int) -> None: self._request_to_match = request self._minimum_number_of_expected_match = minimum_number_of_expected_match diff --git a/airbyte_cdk/test/mock_http/mocker.py b/airbyte_cdk/test/mock_http/mocker.py index 6d759a46..73cde13d 100644 --- a/airbyte_cdk/test/mock_http/mocker.py +++ b/airbyte_cdk/test/mock_http/mocker.py @@ -43,13 +43,13 @@ def __init__(self) -> None: self._mocker = requests_mock.Mocker() self._matchers: list[HttpRequestMatcher] = [] - def __enter__(self) -> HttpMocker: + def __enter__(self) -> HttpMocker: # noqa: PYI034 (unexpected return type) self._mocker.__enter__() return self def __exit__( self, - exc_type: BaseException | None, + exc_type: BaseException | None, # noqa: PYI036 (unexpected return type) exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> None: @@ -127,9 +127,9 @@ def assert_number_of_calls(self, request: HttpRequest, number_of_calls: int) -> assert corresponding_matchers[0].actual_number_of_matches == number_of_calls # trying to type that using callables provides the error `incompatible with return type "_F" in supertype "ContextDecorator"` - def __call__(self, f): # type: ignore + def __call__(self, f): # type: ignore # noqa: ANN001, ANN204 (missing types) @functools.wraps(f) - def wrapper(*args, **kwargs): # type: ignore # this is a very generic wrapper that does not need to be typed + def wrapper(*args, **kwargs): # type: ignore # this is a very generic wrapper that does not need to be typed # noqa: ANN002, ANN202 with self: assertion_error = None diff --git a/airbyte_cdk/test/mock_http/request.py b/airbyte_cdk/test/mock_http/request.py index e4950a92..7917a552 100644 --- a/airbyte_cdk/test/mock_http/request.py +++ b/airbyte_cdk/test/mock_http/request.py @@ -14,7 +14,7 @@ def _is_subdict(small: Mapping[str, str], big: Mapping[str, str]) -> bool: return dict(big, **small) == big -class HttpRequest: +class HttpRequest: # noqa: PLW1641 # Missing __hash__ method def __init__( self, url: str, @@ -40,27 +40,27 @@ def _encode_qs(query_params: str | Mapping[str, str | list[str]]) -> str: return query_params return urlencode(query_params, doseq=True) - def matches(self, other: Any) -> bool: + def matches(self, other: Any) -> bool: # noqa: ANN401 (any-type) """If the body of any request is a Mapping, we compare as Mappings which means that the order is not important. If the body is a string, encoding ISO-8859-1 will be assumed Headers only need to be a subset of `other` in order to match """ if isinstance(other, HttpRequest): # if `other` is a mapping, we match as an object and formatting is not considers - if isinstance(self._body, Mapping) or isinstance(other._body, Mapping): - body_match = self._to_mapping(self._body) == self._to_mapping(other._body) + if isinstance(self._body, Mapping) or isinstance(other._body, Mapping): # noqa: SLF001 (private member) + body_match = self._to_mapping(self._body) == self._to_mapping(other._body) # noqa: SLF001 (private member) else: - body_match = self._to_bytes(self._body) == self._to_bytes(other._body) + body_match = self._to_bytes(self._body) == self._to_bytes(other._body) # noqa: SLF001 (private member) return ( - self._parsed_url.scheme == other._parsed_url.scheme - and self._parsed_url.hostname == other._parsed_url.hostname - and self._parsed_url.path == other._parsed_url.path + self._parsed_url.scheme == other._parsed_url.scheme # noqa: SLF001 (private member) + and self._parsed_url.hostname == other._parsed_url.hostname # noqa: SLF001 (private member) + and self._parsed_url.path == other._parsed_url.path # noqa: SLF001 (private member) and ( - ANY_QUERY_PARAMS in (self._query_params, other._query_params) - or parse_qs(self._parsed_url.query) == parse_qs(other._parsed_url.query) + ANY_QUERY_PARAMS in (self._query_params, other._query_params) # noqa: SLF001 (private member) + or parse_qs(self._parsed_url.query) == parse_qs(other._parsed_url.query) # noqa: SLF001 (private member) ) - and _is_subdict(other._headers, self._headers) + and _is_subdict(other._headers, self._headers) # noqa: SLF001 (private member) and body_match ) return False diff --git a/airbyte_cdk/test/mock_http/response_builder.py b/airbyte_cdk/test/mock_http/response_builder.py index 6ab9e578..82343f90 100644 --- a/airbyte_cdk/test/mock_http/response_builder.py +++ b/airbyte_cdk/test/mock_http/response_builder.py @@ -14,18 +14,18 @@ from pathlib import Path as FilePath -def _extract(path: list[str], response_template: dict[str, Any]) -> Any: +def _extract(path: list[str], response_template: dict[str, Any]) -> Any: # noqa: ANN401 (any-type) return functools.reduce(lambda a, b: a[b], path, response_template) -def _replace_value(dictionary: dict[str, Any], path: list[str], value: Any) -> None: +def _replace_value(dictionary: dict[str, Any], path: list[str], value: Any) -> None: # noqa: ANN401 (any-type) current = dictionary for key in path[:-1]: current = current[key] current[path[-1]] = value -def _write(dictionary: dict[str, Any], path: list[str], value: Any) -> None: +def _write(dictionary: dict[str, Any], path: list[str], value: Any) -> None: # noqa: ANN401 (any-type) current = dictionary for key in path[:-1]: current = current.setdefault(key, {}) @@ -34,14 +34,14 @@ def _write(dictionary: dict[str, Any], path: list[str], value: Any) -> None: class Path(ABC): @abstractmethod - def write(self, template: dict[str, Any], value: Any) -> None: + def write(self, template: dict[str, Any], value: Any) -> None: # noqa: ANN401 (any-type) pass @abstractmethod - def update(self, template: dict[str, Any], value: Any) -> None: + def update(self, template: dict[str, Any], value: Any) -> None: # noqa: ANN401 (any-type) pass - def extract(self, template: dict[str, Any]) -> Any: + def extract(self, template: dict[str, Any]) -> Any: # noqa: ANN401, B027 (any-type, intentionally empty) pass @@ -49,13 +49,13 @@ class FieldPath(Path): def __init__(self, field: str) -> None: self._path = [field] - def write(self, template: dict[str, Any], value: Any) -> None: + def write(self, template: dict[str, Any], value: Any) -> None: # noqa: ANN401 (any-type) _write(template, self._path, value) - def update(self, template: dict[str, Any], value: Any) -> None: + def update(self, template: dict[str, Any], value: Any) -> None: # noqa: ANN401 (any-type) _replace_value(template, self._path, value) - def extract(self, template: dict[str, Any]) -> Any: + def extract(self, template: dict[str, Any]) -> Any: # noqa: ANN401 (any-type) return _extract(self._path, template) def __str__(self) -> str: @@ -66,13 +66,13 @@ class NestedPath(Path): def __init__(self, path: list[str]) -> None: self._path = path - def write(self, template: dict[str, Any], value: Any) -> None: + def write(self, template: dict[str, Any], value: Any) -> None: # noqa: ANN401 (any-type) _write(template, self._path, value) - def update(self, template: dict[str, Any], value: Any) -> None: + def update(self, template: dict[str, Any], value: Any) -> None: # noqa: ANN401 (any-type) _replace_value(template, self._path, value) - def extract(self, template: dict[str, Any]) -> Any: + def extract(self, template: dict[str, Any]) -> Any: # noqa: ANN401 (any-type) return _extract(self._path, template) def __str__(self) -> str: @@ -86,7 +86,7 @@ def update(self, response: dict[str, Any]) -> None: class FieldUpdatePaginationStrategy(PaginationStrategy): - def __init__(self, path: Path, value: Any) -> None: + def __init__(self, path: Path, value: Any) -> None: # noqa: ANN401 (any-type) self._path = path self._value = value @@ -126,19 +126,19 @@ def _validate_field(self, field_name: str, path: Path | None) -> None: f"{field_name} `{path}` was provided but it is not part of the template `{self._record}`" ) from exception - def with_id(self, identifier: Any) -> RecordBuilder: + def with_id(self, identifier: Any) -> RecordBuilder: # noqa: ANN401 (any-type) self._set_field("id", self._id_path, identifier) return self - def with_cursor(self, cursor_value: Any) -> RecordBuilder: + def with_cursor(self, cursor_value: Any) -> RecordBuilder: # noqa: ANN401 (any-type) self._set_field("cursor", self._cursor_path, cursor_value) return self - def with_field(self, path: Path, value: Any) -> RecordBuilder: + def with_field(self, path: Path, value: Any) -> RecordBuilder: # noqa: ANN401 (any-type) path.write(self._record, value) return self - def _set_field(self, field_name: str, path: Path | None, value: Any) -> None: + def _set_field(self, field_name: str, path: Path | None, value: Any) -> None: # noqa: ANN401 (any-type) if not path: raise ValueError( f"{field_name}_path was not provided and hence, the record {field_name} can't be modified. Please provide `id_field` while " @@ -186,7 +186,7 @@ def build(self) -> HttpResponse: def _get_unit_test_folder(execution_folder: str) -> FilePath: - # FIXME: This function should be removed after the next CDK release to avoid breaking amazon-seller-partner test code. + # FIXME: This function should be removed after the next CDK release to avoid breaking amazon-seller-partner test code. # noqa: FIX001, TD001 return get_unit_test_folder(execution_folder) # type: ignore # get_unit_test_folder is known to return a FilePath @@ -198,7 +198,7 @@ def find_template(resource: str, execution_folder: str) -> dict[str, Any]: / "response" / f"{resource}.json" ) - with open(response_template_filepath, encoding="utf-8") as template_file: + with open(response_template_filepath, encoding="utf-8") as template_file: # noqa: PTH123 (prefer pathlib) return json.load(template_file) # type: ignore # we assume the dev correctly set up the resource file @@ -220,7 +220,7 @@ def create_record_builder( except (IndexError, KeyError): raise ValueError( f"Error while extracting records at path `{records_path}` from response template `{response_template}`" - ) + ) from None def create_response_builder( diff --git a/airbyte_cdk/test/state_builder.py b/airbyte_cdk/test/state_builder.py index 47f30cda..927b206a 100644 --- a/airbyte_cdk/test/state_builder.py +++ b/airbyte_cdk/test/state_builder.py @@ -16,7 +16,7 @@ class StateBuilder: def __init__(self) -> None: self._state: list[AirbyteStateMessage] = [] - def with_stream_state(self, stream_name: str, state: Any) -> StateBuilder: + def with_stream_state(self, stream_name: str, state: Any) -> StateBuilder: # noqa: ANN401 (any-type) self._state.append( AirbyteStateMessage( type=AirbyteStateType.STREAM, diff --git a/airbyte_cdk/test/utils/data.py b/airbyte_cdk/test/utils/data.py index a72674b9..38b14706 100644 --- a/airbyte_cdk/test/utils/data.py +++ b/airbyte_cdk/test/utils/data.py @@ -20,5 +20,5 @@ def read_resource_file_contents(resource: str, test_location: str) -> str: file_path = str( get_unit_test_folder(test_location) / "resource" / "http" / "response" / f"{resource}" ) - with open(file_path, encoding="utf-8") as f: + with open(file_path, encoding="utf-8") as f: # noqa: PTH123, FURB101 (prefer pathlib) return f.read() diff --git a/airbyte_cdk/test/utils/reading.py b/airbyte_cdk/test/utils/reading.py index 4b771de6..fee49b6f 100644 --- a/airbyte_cdk/test/utils/reading.py +++ b/airbyte_cdk/test/utils/reading.py @@ -25,6 +25,7 @@ def read_records( stream_name: str, sync_mode: SyncMode, state: list[AirbyteStateMessage] | None = None, + *, expecting_exception: bool = False, ) -> EntrypointOutput: """Read records from a stream.""" diff --git a/airbyte_cdk/utils/airbyte_secrets_utils.py b/airbyte_cdk/utils/airbyte_secrets_utils.py index fb5558b8..bde33ae4 100644 --- a/airbyte_cdk/utils/airbyte_secrets_utils.py +++ b/airbyte_cdk/utils/airbyte_secrets_utils.py @@ -15,7 +15,7 @@ def get_secret_paths(spec: Mapping[str, Any]) -> list[list[str]]: paths = [] - def traverse_schema(schema_item: Any, path: list[str]) -> None: + def traverse_schema(schema_item: Any, path: list[str]) -> None: # noqa: ANN401 (any type) """schema_item can be any property or value in the originally input jsonschema, depending on how far down the recursion stack we go path is the path to that schema item in the original input for example if we have the input {'password': {'type': 'string', 'airbyte_secret': True}} then the arguments will evolve @@ -48,8 +48,8 @@ def get_secrets( secret_paths = get_secret_paths(connection_specification.get("properties", {})) result = [] for path in secret_paths: - try: - result.append(dpath.get(config, path)) + try: # noqa: SIM105 (suppressible exception) + result.append(dpath.get(config, path)) # type: ignore [arg-type] # Mapping v ImmutableMapping except KeyError: # Since we try to get paths to all known secrets in the spec, in the case of oneOfs, some secret fields may not be present # In that case, a KeyError is thrown. This is expected behavior. @@ -68,13 +68,13 @@ def update_secrets(secrets: list[str]) -> None: def add_to_secrets(secret: str) -> None: """Add to the list of secrets to be replaced""" - global __SECRETS_FROM_CONFIG + global __SECRETS_FROM_CONFIG # noqa: PLW0602 (global not assigned) __SECRETS_FROM_CONFIG.append(secret) def filter_secrets(string: str) -> str: """Filter secrets from a string by replacing them with ****""" - # TODO this should perform a maximal match for each secret. if "x" and "xk" are both secret values, and this method is called twice on + # TODO this should perform a maximal match for each secret. if "x" and "xk" are both secret values, and this method is called twice on # noqa: TD004 # the input "xk", then depending on call order it might only obfuscate "*k". This is a bug. for secret in __SECRETS_FROM_CONFIG: if secret: diff --git a/airbyte_cdk/utils/analytics_message.py b/airbyte_cdk/utils/analytics_message.py index ad5a379c..32a6741c 100644 --- a/airbyte_cdk/utils/analytics_message.py +++ b/airbyte_cdk/utils/analytics_message.py @@ -13,7 +13,7 @@ ) -def create_analytics_message(type: str, value: Any | None) -> AirbyteMessage: +def create_analytics_message(type: str, value: Any | None) -> AirbyteMessage: # noqa: ANN401, A002 (any type, shadows built-in name) return AirbyteMessage( type=Type.TRACE, trace=AirbyteTraceMessage( diff --git a/airbyte_cdk/utils/datetime_format_inferrer.py b/airbyte_cdk/utils/datetime_format_inferrer.py index 2240a7de..88a75c52 100644 --- a/airbyte_cdk/utils/datetime_format_inferrer.py +++ b/airbyte_cdk/utils/datetime_format_inferrer.py @@ -36,7 +36,7 @@ def __init__(self) -> None: range(1_000_000_000_000, 2_000_000_000_000), ] - def _can_be_datetime(self, value: Any) -> bool: + def _can_be_datetime(self, value: Any) -> bool: # noqa: ANN401 (any-type) """Checks if the value can be a datetime. This is the case if the value is a string or an integer between 1_000_000_000 and 2_000_000_000 for seconds or between 1_000_000_000_000 and 2_000_000_000_000 for milliseconds. @@ -53,11 +53,11 @@ def _can_be_datetime(self, value: Any) -> bool: return True return False - def _matches_format(self, value: Any, format: str) -> bool: + def _matches_format(self, value: Any, format: str) -> bool: # noqa: ANN401, A002 (any type, shadows built-in name) """Checks if the value matches the format""" try: self._parser.parse(value, format) - return True + return True # noqa: TRY300 (consider 'else') except ValueError: return False @@ -67,7 +67,7 @@ def _initialize(self, record: AirbyteRecordMessage) -> None: for field_name, field_value in record.data.items(): if not self._can_be_datetime(field_value): continue - for format in self._formats: + for format in self._formats: # noqa: A001 (shadowed name) if self._matches_format(field_value, format): self._datetime_candidates[field_name] = format break diff --git a/airbyte_cdk/utils/event_timing.py b/airbyte_cdk/utils/event_timing.py index fb3c6069..d092b142 100644 --- a/airbyte_cdk/utils/event_timing.py +++ b/airbyte_cdk/utils/event_timing.py @@ -8,6 +8,11 @@ import time from contextlib import contextmanager from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any + + +if TYPE_CHECKING: + from collections.abc import Generator logger = logging.getLogger("airbyte") @@ -39,7 +44,7 @@ def finish_event(self) -> None: else: logger.warning(f"{self.name} finish_event called without start_event") - def report(self, order_by="name") -> str: + def report(self, order_by: str = "name") -> str: """:param order_by: 'name' or 'duration'""" if order_by == "name": events = sorted(self.events.values(), key=lambda event: event.name) @@ -71,7 +76,7 @@ def finish(self) -> None: @contextmanager -def create_timer(name): +def create_timer(name: str) -> Generator[EventTimer, Any, None]: """Creates a new EventTimer as a context manager to improve code readability.""" a_timer = EventTimer(name) yield a_timer diff --git a/airbyte_cdk/utils/oneof_option_config.py b/airbyte_cdk/utils/oneof_option_config.py index aaacd11e..a63ceee5 100644 --- a/airbyte_cdk/utils/oneof_option_config.py +++ b/airbyte_cdk/utils/oneof_option_config.py @@ -26,7 +26,7 @@ class Config(OneOfOptionConfig): """ @staticmethod - def schema_extra(schema: dict[str, Any], model: Any) -> None: + def schema_extra(schema: dict[str, Any], model: Any) -> None: # noqa: ANN401 (any-type) if hasattr(model.Config, "description"): schema["description"] = model.Config.description if hasattr(model.Config, "discriminator"): diff --git a/airbyte_cdk/utils/print_buffer.py b/airbyte_cdk/utils/print_buffer.py index 8320f00b..726d83ad 100644 --- a/airbyte_cdk/utils/print_buffer.py +++ b/airbyte_cdk/utils/print_buffer.py @@ -60,7 +60,7 @@ def flush(self) -> None: sys.__stdout__.write(combined_message) # type: ignore[union-attr] self.buffer = StringIO() - def __enter__(self) -> PrintBuffer: + def __enter__(self) -> PrintBuffer: # noqa: PYI034 (unexpected type) self.old_stdout, self.old_stderr = sys.stdout, sys.stderr # Used to disable buffering during the pytest session, because it is not compatible with capsys if "pytest" not in str(type(sys.stdout)).lower(): @@ -70,7 +70,7 @@ def __enter__(self) -> PrintBuffer: def __exit__( self, - exc_type: BaseException | None, + exc_type: BaseException | None, # noqa: PYI036 (unexpected type) exc_val: BaseException | None, exc_tb: TracebackType | None, ) -> None: diff --git a/airbyte_cdk/utils/schema_inferrer.py b/airbyte_cdk/utils/schema_inferrer.py index da6da635..f059362f 100644 --- a/airbyte_cdk/utils/schema_inferrer.py +++ b/airbyte_cdk/utils/schema_inferrer.py @@ -65,7 +65,11 @@ def merge_exceptions( # We assume the schema is the same for all SchemaValidationException return SchemaValidationException( exceptions[0].schema, - [x for exception in exceptions for x in exception._validation_errors], + [ + x + for exception in exceptions + for x in exception._validation_errors # noqa: SLF001 (private member) + ], ) def __init__( @@ -117,7 +121,7 @@ def _remove_type_from_any_of(self, node: InferredSchema) -> None: node.pop(_TYPE, None) def _clean_any_of(self, node: InferredSchema) -> None: - if len(node[_ANY_OF]) == 2 and self._null_type_in_any_of(node): + if len(node[_ANY_OF]) == 2 and self._null_type_in_any_of(node): # noqa: PLR2004 (magic number) real_type = ( node[_ANY_OF][1] if node[_ANY_OF][0][_TYPE] == _NULL_TYPE else node[_ANY_OF][0] ) @@ -125,7 +129,7 @@ def _clean_any_of(self, node: InferredSchema) -> None: node[_TYPE] = [node[_TYPE], _NULL_TYPE] node.pop(_ANY_OF) # populate `type` for `anyOf` if it's not present to pass all other checks - elif len(node[_ANY_OF]) == 2 and not self._null_type_in_any_of(node): + elif len(node[_ANY_OF]) == 2 and not self._null_type_in_any_of(node): # noqa: PLR2004 (magic number) node[_TYPE] = [_NULL_TYPE] def _clean_properties(self, node: InferredSchema) -> None: diff --git a/airbyte_cdk/utils/traced_exception.py b/airbyte_cdk/utils/traced_exception.py index 502da8e8..a701a429 100644 --- a/airbyte_cdk/utils/traced_exception.py +++ b/airbyte_cdk/utils/traced_exception.py @@ -5,6 +5,7 @@ import time import traceback +from typing import Any from orjson import orjson @@ -100,20 +101,20 @@ def from_exception( cls, exc: BaseException, stream_descriptor: StreamDescriptor | None = None, - *args, - **kwargs, - ) -> AirbyteTracedException: # type: ignore # ignoring because of args and kwargs + *args: Any, # noqa: ANN401 (any-type) + **kwargs: Any, # noqa: ANN401 (any-type) + ) -> AirbyteTracedException: """Helper to create an AirbyteTracedException from an existing exception :param exc: the exception that caused the error :param stream_descriptor: describe the stream from which the exception comes from """ return cls( + *args, internal_message=str(exc), exception=exc, stream_descriptor=stream_descriptor, - *args, **kwargs, - ) # type: ignore # ignoring because of args and kwargs + ) def as_sanitized_airbyte_message( self, stream_descriptor: StreamDescriptor | None = None diff --git a/pyproject.toml b/pyproject.toml index 6f9e1be2..281043b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -139,7 +139,6 @@ select = [ "ISC", # flake8-implicit-str-concat "LOG", # flake8-logging "N", # pep8-naming - "PD", # pandas-vet "PERF", # Perflint "PGH", # pygrep-hooks "PIE", # flake8-pie @@ -172,6 +171,7 @@ ignore = [ "BLE001", # Do not catch blind exception: Exception "C416", # Allow unnecessary-comprehensions. Auto-fix sometimes unsafe if operating over a mapping. "DTZ005", # Allow use of 'datetime.datetime.now()' without timezone (we should fix these eventually) + "DTZ007", # Allow use of 'strptime()' without timezone (we should fix these eventually) "D", # pydocstyle (Docstring conventions) "D102", # Missing docstring in public method "D103", # Missing docstring in public function @@ -199,6 +199,7 @@ ignore = [ "N818", # Custom exception names should use the suffix "Error" "NPY", # NumPy-specific rules "N805", # Enforce first-arg is 'self' (false positive for class methods in Pydantic) + "PD", # pandas-vet "PERF203", # exception handling in loop "PIE790", # Allow unnecssary 'pass' (sometimes useful for readability) "PLR6201", # Allow membership checks in lists (set-based check is unsafe when values are unhashable)