diff --git a/airbyte-cdk/bulk/core/extract/src/main/kotlin/io/airbyte/cdk/read/StateManager.kt b/airbyte-cdk/bulk/core/extract/src/main/kotlin/io/airbyte/cdk/read/StateManager.kt index 412eb54b5b72..32301851737d 100644 --- a/airbyte-cdk/bulk/core/extract/src/main/kotlin/io/airbyte/cdk/read/StateManager.kt +++ b/airbyte-cdk/bulk/core/extract/src/main/kotlin/io/airbyte/cdk/read/StateManager.kt @@ -37,18 +37,15 @@ class StateManager( .mapKeys { it.key.id } } else { val globalStreams: Map = - global.streams.associateWith { initialStreamStates[it] } + global.streams.associateWith { initialStreamStates[it] } + + initialStreamStates.filterKeys { global.streams.contains(it).not() } this.global = GlobalStateManager( global = global, initialGlobalState = initialGlobalState, initialStreamStates = globalStreams, ) - nonGlobal = - initialStreamStates - .filterKeys { !globalStreams.containsKey(it) } - .mapValues { NonGlobalStreamStateManager(it.key, it.value) } - .mapKeys { it.key.id } + nonGlobal = emptyMap() } } diff --git a/airbyte-cdk/bulk/core/extract/src/test/kotlin/io/airbyte/cdk/read/StateManagerGlobalStatesTest.kt b/airbyte-cdk/bulk/core/extract/src/test/kotlin/io/airbyte/cdk/read/StateManagerGlobalStatesTest.kt index 6f21a1d532c4..163834dd0653 100644 --- a/airbyte-cdk/bulk/core/extract/src/test/kotlin/io/airbyte/cdk/read/StateManagerGlobalStatesTest.kt +++ b/airbyte-cdk/bulk/core/extract/src/test/kotlin/io/airbyte/cdk/read/StateManagerGlobalStatesTest.kt @@ -78,16 +78,11 @@ class StateManagerGlobalStatesTest { |"global":{"shared_state":{"cdc":"starting"}, |"stream_states":[ |{"stream_descriptor":{"name":"KV","namespace":"PUBLIC"}, - |"stream_state":{"initial_sync":"ongoing"}} + |"stream_state":{"initial_sync":"ongoing"}}, + |{"stream_descriptor":{"name":"EVENTS","namespace":"PUBLIC"}, + |"stream_state":{"full_refresh":"ongoing"}} |]}, - |"sourceStats":{"recordCount":123.0} - |} - """.trimMargin(), - """{ - |"type":"STREAM", - |"stream":{"stream_descriptor":{"name":"EVENTS","namespace":"PUBLIC"}, - |"stream_state":{"full_refresh":"ongoing"}}, - |"sourceStats":{"recordCount":456.0} + |"sourceStats":{"recordCount":579.0} |} """.trimMargin(), ) @@ -124,7 +119,9 @@ class StateManagerGlobalStatesTest { |"global":{"shared_state":{"cdc":"starting"}, |"stream_states":[ |{"stream_descriptor":{"name":"KV","namespace":"PUBLIC"}, - |"stream_state":{"initial_sync":"ongoing"}} + |"stream_state":{"initial_sync":"ongoing"}}, + |{"stream_descriptor":{"name":"EVENTS","namespace":"PUBLIC"}, + |"stream_state":{}} |]},"sourceStats":{"recordCount":123.0} |} """.trimMargin(), @@ -147,7 +144,9 @@ class StateManagerGlobalStatesTest { |"global":{"shared_state":{"cdc":"starting"}, |"stream_states":[ |{"stream_descriptor":{"name":"KV","namespace":"PUBLIC"}, - |"stream_state":{"initial_sync":"completed"}} + |"stream_state":{"initial_sync":"completed"}}, + |{"stream_descriptor":{"name":"EVENTS","namespace":"PUBLIC"}, + |"stream_state":{}} |]},"sourceStats":{"recordCount":1245.0} |} """.trimMargin(), @@ -197,7 +196,9 @@ class StateManagerGlobalStatesTest { |"global":{"shared_state":{"cdc":"starting"}, |"stream_states":[ |{"stream_descriptor":{"name":"KV","namespace":"PUBLIC"}, - |"stream_state":{"initial_sync":"completed"}} + |"stream_state":{"initial_sync":"completed"}}, + |{"stream_descriptor":{"name":"EVENTS","namespace":"PUBLIC"}, + |"stream_state":{}} |]},"sourceStats":{"recordCount":789.0} |} """.trimMargin(), @@ -245,7 +246,9 @@ class StateManagerGlobalStatesTest { |"global":{"shared_state":{"cdc":"ongoing"}, |"stream_states":[ |{"stream_descriptor":{"name":"KV","namespace":"PUBLIC"}, - |"stream_state":{"initial_sync":"completed"}} + |"stream_state":{"initial_sync":"completed"}}, + |{"stream_descriptor":{"name":"EVENTS","namespace":"PUBLIC"}, + |"stream_state":{}} |]}, |"sourceStats":{"recordCount":741.0} |} diff --git a/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/command/DestinationConfiguration.kt b/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/command/DestinationConfiguration.kt index 0e5105f227f8..34be7c37d5b7 100644 --- a/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/command/DestinationConfiguration.kt +++ b/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/command/DestinationConfiguration.kt @@ -64,7 +64,7 @@ abstract class DestinationConfiguration : Configuration { /** Memory queue settings */ open val maxMessageQueueMemoryUsageRatio: Double = 0.2 // 0 => No limit, 1.0 => 100% of JVM heap open val estimatedRecordMemoryOverheadRatio: Double = - 0.1 // 0 => No overhead, 1.0 => 100% overhead + 1.1 // 1.0 => No overhead, 2.0 => 100% overhead /** * If we have not flushed state checkpoints in this amount of time, make a best-effort attempt diff --git a/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/config/SyncBeanFactory.kt b/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/config/SyncBeanFactory.kt new file mode 100644 index 000000000000..2d67ce824f73 --- /dev/null +++ b/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/config/SyncBeanFactory.kt @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2024 Airbyte, Inc., all rights reserved. + */ + +package io.airbyte.cdk.load.config + +import io.airbyte.cdk.load.command.DestinationConfiguration +import io.airbyte.cdk.load.state.MemoryManager +import io.micronaut.context.annotation.Factory +import jakarta.inject.Singleton + +/** Factory for instantiating beans necessary for the sync process. */ +@Factory +class SyncBeanFactory { + @Singleton + fun memoryManager( + config: DestinationConfiguration, + ): MemoryManager { + val memory = config.maxMessageQueueMemoryUsageRatio * Runtime.getRuntime().maxMemory() + + return MemoryManager(memory.toLong()) + } +} diff --git a/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/message/DestinationMessageQueues.kt b/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/message/DestinationMessageQueues.kt index eef8d2968059..6e7b28716791 100644 --- a/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/message/DestinationMessageQueues.kt +++ b/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/message/DestinationMessageQueues.kt @@ -55,9 +55,9 @@ data class StreamFileCompleteWrapped( class DestinationRecordQueue : ChannelMessageQueue>() /** - * A supplier of message queues to which ([MemoryManager.reserveBlocking]'d) @ - * [DestinationRecordWrapped] messages can be published on a @ [DestinationStream] key. The queues - * themselves do not manage memory. + * A supplier of message queues to which ([MemoryManager.reserve]'d) @ [DestinationRecordWrapped] + * messages can be published on a @ [DestinationStream] key. The queues themselves do not manage + * memory. */ @Singleton @Secondary diff --git a/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/state/MemoryManager.kt b/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/state/MemoryManager.kt index 99ed41d73cef..85b94e8e806e 100644 --- a/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/state/MemoryManager.kt +++ b/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/state/MemoryManager.kt @@ -5,18 +5,13 @@ package io.airbyte.cdk.load.state import io.airbyte.cdk.load.util.CloseableCoroutine -import io.micronaut.context.annotation.Secondary -import jakarta.inject.Singleton import java.util.concurrent.atomic.AtomicBoolean import java.util.concurrent.atomic.AtomicLong import kotlinx.coroutines.channels.Channel import kotlinx.coroutines.sync.Mutex import kotlinx.coroutines.sync.withLock -/** - * Releasable reservation of memory. For large blocks (ie, from [MemoryManager.reserveRatio], - * provides a submanager that can be used to manage allocating the reservation). - */ +/** Releasable reservation of memory. */ class Reserved( private val memoryManager: MemoryManager, val bytesReserved: Long, @@ -31,8 +26,6 @@ class Reserved( memoryManager.release(bytesReserved) } - fun getReservationManager(): MemoryManager = MemoryManager(bytesReserved) - fun replace(value: U): Reserved = Reserved(memoryManager, bytesReserved, value) override suspend fun close() { @@ -47,18 +40,8 @@ class Reserved( * * TODO: Some degree of logging/monitoring around how accurate we're actually being? */ -@Singleton -class MemoryManager(availableMemoryProvider: AvailableMemoryProvider) { - // This is slightly awkward, but Micronaut only injects the primary constructor - constructor( - availableMemory: Long - ) : this( - object : AvailableMemoryProvider { - override val availableMemoryBytes: Long = availableMemory - } - ) +class MemoryManager(val totalMemoryBytes: Long) { - private val totalMemoryBytes = availableMemoryProvider.availableMemoryBytes private var usedMemoryBytes = AtomicLong(0L) private val mutex = Mutex() private val syncChannel = Channel(Channel.UNLIMITED) @@ -67,7 +50,7 @@ class MemoryManager(availableMemoryProvider: AvailableMemoryProvider) { get() = totalMemoryBytes - usedMemoryBytes.get() /* Attempt to reserve memory. If enough memory is not available, waits until it is, then reserves. */ - suspend fun reserveBlocking(memoryBytes: Long, reservedFor: T): Reserved { + suspend fun reserve(memoryBytes: Long, reservedFor: T): Reserved { if (memoryBytes > totalMemoryBytes) { throw IllegalArgumentException( "Requested ${memoryBytes}b memory exceeds ${totalMemoryBytes}b total" @@ -84,23 +67,8 @@ class MemoryManager(availableMemoryProvider: AvailableMemoryProvider) { } } - suspend fun reserveRatio(ratio: Double, reservedFor: T): Reserved { - val estimatedSize = (totalMemoryBytes.toDouble() * ratio).toLong() - return reserveBlocking(estimatedSize, reservedFor) - } - suspend fun release(memoryBytes: Long) { usedMemoryBytes.addAndGet(-memoryBytes) syncChannel.send(Unit) } } - -interface AvailableMemoryProvider { - val availableMemoryBytes: Long -} - -@Singleton -@Secondary -class JavaRuntimeAvailableMemoryProvider : AvailableMemoryProvider { - override val availableMemoryBytes: Long = Runtime.getRuntime().maxMemory() -} diff --git a/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/task/internal/InputConsumerTask.kt b/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/task/internal/InputConsumerTask.kt index fdf1430f6f96..3ceeab3e2313 100644 --- a/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/task/internal/InputConsumerTask.kt +++ b/airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/task/internal/InputConsumerTask.kt @@ -204,26 +204,21 @@ abstract class ReservingDeserializingInputFlow : SizedInputFlow>>) { - val reservation = memoryManager.reserveRatio(config.maxMessageQueueMemoryUsageRatio, this) - val reservationManager = reservation.getReservationManager() + log.info { "Reserved ${memoryManager.totalMemoryBytes/1024}mb memory for input processing" } - log.info { "Reserved ${reservation.bytesReserved/1024}mb memory for input processing" } - - reservation.use { _ -> - inputStream.bufferedReader().lineSequence().forEachIndexed { index, line -> - if (line.isEmpty()) { - return@forEachIndexed - } + inputStream.bufferedReader().lineSequence().forEachIndexed { index, line -> + if (line.isEmpty()) { + return@forEachIndexed + } - val lineSize = line.length.toLong() - val estimatedSize = lineSize * config.estimatedRecordMemoryOverheadRatio - val reserved = reservationManager.reserveBlocking(estimatedSize.toLong(), line) - val message = deserializer.deserialize(line) - collector.emit(Pair(lineSize, reserved.replace(message))) + val lineSize = line.length.toLong() + val estimatedSize = lineSize * config.estimatedRecordMemoryOverheadRatio + val reserved = memoryManager.reserve(estimatedSize.toLong(), line) + val message = deserializer.deserialize(line) + collector.emit(Pair(lineSize, reserved.replace(message))) - if (index % 10_000 == 0) { - log.info { "Processed $index lines" } - } + if (index % 10_000 == 0) { + log.info { "Processed $index lines" } } } diff --git a/airbyte-cdk/bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/state/MemoryManagerTest.kt b/airbyte-cdk/bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/state/MemoryManagerTest.kt index b2c66db6eeaf..61d1f92c3326 100644 --- a/airbyte-cdk/bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/state/MemoryManagerTest.kt +++ b/airbyte-cdk/bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/state/MemoryManagerTest.kt @@ -4,10 +4,6 @@ package io.airbyte.cdk.load.state -import io.micronaut.context.annotation.Replaces -import io.micronaut.context.annotation.Requires -import io.micronaut.test.extensions.junit5.annotation.MicronautTest -import jakarta.inject.Singleton import java.util.concurrent.atomic.AtomicBoolean import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.launch @@ -17,22 +13,14 @@ import kotlinx.coroutines.withTimeout import org.junit.jupiter.api.Assertions import org.junit.jupiter.api.Test -@MicronautTest(environments = ["MemoryManagerTest"]) class MemoryManagerTest { - @Singleton - @Replaces(MemoryManager::class) - @Requires(env = ["MemoryManagerTest"]) - class MockAvailableMemoryProvider : AvailableMemoryProvider { - override val availableMemoryBytes: Long = 1000 - } - @Test - fun testReserveBlocking() = runTest { - val memoryManager = MemoryManager(MockAvailableMemoryProvider()) + fun testReserve() = runTest { + val memoryManager = MemoryManager(1000) val reserved = AtomicBoolean(false) try { - withTimeout(5000) { memoryManager.reserveBlocking(900, this) } + withTimeout(5000) { memoryManager.reserve(900, this) } } catch (e: Exception) { Assertions.fail("Failed to reserve memory") } @@ -40,20 +28,20 @@ class MemoryManagerTest { Assertions.assertEquals(100, memoryManager.remainingMemoryBytes) val job = launch { - memoryManager.reserveBlocking(200, this) + memoryManager.reserve(200, this) reserved.set(true) } - memoryManager.reserveBlocking(0, this) + memoryManager.reserve(0, this) Assertions.assertFalse(reserved.get()) memoryManager.release(50) - memoryManager.reserveBlocking(0, this) + memoryManager.reserve(0, this) Assertions.assertEquals(150, memoryManager.remainingMemoryBytes) Assertions.assertFalse(reserved.get()) memoryManager.release(25) - memoryManager.reserveBlocking(0, this) + memoryManager.reserve(0, this) Assertions.assertEquals(175, memoryManager.remainingMemoryBytes) Assertions.assertFalse(reserved.get()) @@ -68,15 +56,14 @@ class MemoryManagerTest { } @Test - fun testReserveBlockingMultithreaded() = runTest { - val memoryManager = MemoryManager(MockAvailableMemoryProvider()) + fun testReserveMultithreaded() = runTest { + val memoryManager = MemoryManager(1000) withContext(Dispatchers.IO) { - memoryManager.reserveBlocking(1000, this) + memoryManager.reserve(1000, this) Assertions.assertEquals(0, memoryManager.remainingMemoryBytes) val nIterations = 100000 - val jobs = - (0 until nIterations).map { launch { memoryManager.reserveBlocking(10, this) } } + val jobs = (0 until nIterations).map { launch { memoryManager.reserve(10, this) } } repeat(nIterations) { memoryManager.release(10) @@ -92,9 +79,9 @@ class MemoryManagerTest { @Test fun testRequestingMoreThanAvailableThrows() = runTest { - val memoryManager = MemoryManager(MockAvailableMemoryProvider()) + val memoryManager = MemoryManager(1000) try { - memoryManager.reserveBlocking(1001, this) + memoryManager.reserve(1001, this) } catch (e: IllegalArgumentException) { return@runTest } @@ -103,8 +90,8 @@ class MemoryManagerTest { @Test fun testReservations() = runTest { - val memoryManager = MemoryManager(MockAvailableMemoryProvider()) - val reservation = memoryManager.reserveBlocking(100, this) + val memoryManager = MemoryManager(1000) + val reservation = memoryManager.reserve(100, this) Assertions.assertEquals(900, memoryManager.remainingMemoryBytes) reservation.release() Assertions.assertEquals(1000, memoryManager.remainingMemoryBytes) diff --git a/airbyte-cdk/bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/task/internal/InputConsumerTaskTest.kt b/airbyte-cdk/bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/task/internal/InputConsumerTaskTest.kt index 27511272cc78..806e9b991732 100644 --- a/airbyte-cdk/bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/task/internal/InputConsumerTaskTest.kt +++ b/airbyte-cdk/bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/task/internal/InputConsumerTaskTest.kt @@ -88,7 +88,7 @@ class InputConsumerTaskTest { } suspend fun addMessage(message: DestinationMessage, size: Long = 0L) { - messages.send(Pair(size, memoryManager.reserveBlocking(1, message))) + messages.send(Pair(size, memoryManager.reserve(1, message))) } fun stop() { diff --git a/airbyte-cdk/bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/task/internal/SpillToDiskTaskTest.kt b/airbyte-cdk/bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/task/internal/SpillToDiskTaskTest.kt index cd4db002c22b..bd2e1b3730e6 100644 --- a/airbyte-cdk/bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/task/internal/SpillToDiskTaskTest.kt +++ b/airbyte-cdk/bulk/core/load/src/test/kotlin/io/airbyte/cdk/load/task/internal/SpillToDiskTaskTest.kt @@ -67,7 +67,7 @@ class SpillToDiskTaskTest { val index = recordsWritten++ bytesReserved++ queue.publish( - memoryManager.reserveBlocking( + memoryManager.reserve( 1L, StreamRecordWrapped( index = index, @@ -84,9 +84,7 @@ class SpillToDiskTaskTest { ) ) } - queue.publish( - memoryManager.reserveBlocking(0L, StreamRecordCompleteWrapped(index = maxRecords)) - ) + queue.publish(memoryManager.reserve(0L, StreamRecordCompleteWrapped(index = maxRecords))) return bytesReserved } diff --git a/airbyte-integrations/connectors/source-apple-search-ads/acceptance-test-config.yml b/airbyte-integrations/connectors/source-apple-search-ads/acceptance-test-config.yml index e6f863199d55..08dba5ca8202 100644 --- a/airbyte-integrations/connectors/source-apple-search-ads/acceptance-test-config.yml +++ b/airbyte-integrations/connectors/source-apple-search-ads/acceptance-test-config.yml @@ -8,43 +8,43 @@ acceptance_tests: connection: tests: - config_path: "secrets/config.json" - status: "succeed" - - config_path: "integration_tests/invalid_config.json" status: "failed" - discovery: - tests: - - config_path: "secrets/config.json" - basic_read: - tests: - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog.json" - empty_streams: [] - timeout_seconds: 3600 - incremental: - tests: - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog.json" - future_state: - future_state_path: "integration_tests/abnormal_state.json" - timeout_seconds: 3600 - full_refresh: - tests: - - config_path: "secrets/config.json" - configured_catalog_path: "integration_tests/configured_catalog.json" - ignored_fields: - adgroups_report_daily: - - name: granularity - bypass_reason: "Can't be idempotent by nature" - - name: metadata - bypass_reason: "Can't be idempotent by nature" - campaigns_report_daily: - - name: granularity - bypass_reason: "Can't be idempotent by nature" - - name: metadata - bypass_reason: "Can't be idempotent by nature" - keywords_report_daily: - - name: granularity - bypass_reason: "Can't be idempotent by nature" - - name: metadata - bypass_reason: "Can't be idempotent by nature" - timeout_seconds: 3600 + + ## Uncomment the following tests and mark above connection test status as "succeeded" when we have a usable sandbox environment + # discovery: + # tests: + # - config_path: "secrets/config.json" + # basic_read: + # tests: + # - config_path: "secrets/config.json" + # configured_catalog_path: "integration_tests/configured_catalog.json" + # empty_streams: [] + # timeout_seconds: 3600 + # incremental: + # tests: + # - config_path: "secrets/config.json" + # configured_catalog_path: "integration_tests/configured_catalog.json" + # future_state: + # future_state_path: "integration_tests/abnormal_state.json" + # timeout_seconds: 3600 + # full_refresh: + # tests: + # - config_path: "secrets/config.json" + # configured_catalog_path: "integration_tests/configured_catalog.json" + # ignored_fields: + # adgroups_report_daily: + # - name: granularity + # bypass_reason: "Can't be idempotent by nature" + # - name: metadata + # bypass_reason: "Can't be idempotent by nature" + # campaigns_report_daily: + # - name: granularity + # bypass_reason: "Can't be idempotent by nature" + # - name: metadata + # bypass_reason: "Can't be idempotent by nature" + # keywords_report_daily: + # - name: granularity + # bypass_reason: "Can't be idempotent by nature" + # - name: metadata + # bypass_reason: "Can't be idempotent by nature" + # timeout_seconds: 3600 diff --git a/airbyte-integrations/connectors/source-apple-search-ads/manifest.yaml b/airbyte-integrations/connectors/source-apple-search-ads/manifest.yaml index 286cf3f79f24..5492dcf2663d 100644 --- a/airbyte-integrations/connectors/source-apple-search-ads/manifest.yaml +++ b/airbyte-integrations/connectors/source-apple-search-ads/manifest.yaml @@ -452,6 +452,7 @@ definitions: type: OAuthAuthenticator client_id: "{{ config.client_id }}" client_secret: "{{ config.client_secret }}" + grant_type: client_credentials token_refresh_endpoint: >- https://appleid.apple.com/auth/oauth2/token?grant_type=client_credentials&scope=searchadsorg diff --git a/airbyte-integrations/connectors/source-apple-search-ads/metadata.yaml b/airbyte-integrations/connectors/source-apple-search-ads/metadata.yaml index 590c6c63b2e0..f03c24c4f99d 100644 --- a/airbyte-integrations/connectors/source-apple-search-ads/metadata.yaml +++ b/airbyte-integrations/connectors/source-apple-search-ads/metadata.yaml @@ -2,7 +2,7 @@ data: connectorSubtype: api connectorType: source definitionId: e59c8416-c2fa-4bd3-9e95-52677ea281c1 - dockerImageTag: 0.2.0 + dockerImageTag: 0.2.1 dockerRepository: airbyte/source-apple-search-ads githubIssueLabel: source-apple-search-ads icon: apple.svg @@ -25,6 +25,14 @@ data: ab_internal: sl: 100 ql: 100 + connectorTestSuitesOptions: + - suite: acceptanceTests + testSecrets: + - name: SECRET_SOURCE-APPLE-SEARCH-ADS__CREDS + fileName: config.json + secretStore: + type: GSM + alias: airbyte-connector-testing-secret-store supportLevel: community connectorBuildOptions: baseImage: docker.io/airbyte/source-declarative-manifest:5.7.5@sha256:4832cc13b262b4cae4ba72b07da544e6ee2f5d216b7147483480d5ebc5d0d7ca diff --git a/airbyte-integrations/connectors/source-klaviyo/metadata.yaml b/airbyte-integrations/connectors/source-klaviyo/metadata.yaml index d96e750721e1..01e0985c72e2 100644 --- a/airbyte-integrations/connectors/source-klaviyo/metadata.yaml +++ b/airbyte-integrations/connectors/source-klaviyo/metadata.yaml @@ -8,7 +8,7 @@ data: definitionId: 95e8cffd-b8c4-4039-968e-d32fb4a69bde connectorBuildOptions: baseImage: docker.io/airbyte/python-connector-base:2.0.0@sha256:c44839ba84406116e8ba68722a0f30e8f6e7056c726f447681bb9e9ece8bd916 - dockerImageTag: 2.10.13 + dockerImageTag: 2.10.14 dockerRepository: airbyte/source-klaviyo githubIssueLabel: source-klaviyo icon: klaviyo.svg diff --git a/airbyte-integrations/connectors/source-klaviyo/pyproject.toml b/airbyte-integrations/connectors/source-klaviyo/pyproject.toml index 3d7a04644dcb..95706b012ac2 100644 --- a/airbyte-integrations/connectors/source-klaviyo/pyproject.toml +++ b/airbyte-integrations/connectors/source-klaviyo/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "2.10.13" +version = "2.10.14" name = "source-klaviyo" description = "Source implementation for Klaviyo." authors = [ "Airbyte ",] diff --git a/airbyte-integrations/connectors/source-klaviyo/source_klaviyo/components/datetime_based_cursor.py b/airbyte-integrations/connectors/source-klaviyo/source_klaviyo/components/datetime_based_cursor.py deleted file mode 100644 index d8580fded7d3..000000000000 --- a/airbyte-integrations/connectors/source-klaviyo/source_klaviyo/components/datetime_based_cursor.py +++ /dev/null @@ -1,55 +0,0 @@ -# -# Copyright (c) 2024 Airbyte, Inc., all rights reserved. -# - - -from dataclasses import dataclass -from typing import Any, Mapping, Optional - -from airbyte_cdk.sources.declarative.incremental import DatetimeBasedCursor -from airbyte_cdk.sources.declarative.types import StreamSlice, StreamState - - -@dataclass -class KlaviyoDatetimeBasedCursor(DatetimeBasedCursor): - def get_request_params( - self, - *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Mapping[str, Any]: - if not stream_slice: - return {} - - field = self.cursor_field.eval(self.config) - value = stream_slice.get(self._partition_field_start.eval(self.config)) - return {"filter": f"greater-than({field},{value})", "sort": field} - - -@dataclass -class KlaviyoCheckpointDatetimeBasedCursor(DatetimeBasedCursor): - """ - You can configure the declarative stream with a step to checkpoint after the slice is completed - e.g. - incremental_sync: - type: CustomIncrementalSync - ... some configuration - step: P1M - cursor_granularity: PT1S - """ - - def get_request_params( - self, - *, - stream_state: Optional[StreamState] = None, - stream_slice: Optional[StreamSlice] = None, - next_page_token: Optional[Mapping[str, Any]] = None, - ) -> Mapping[str, Any]: - if not stream_slice: - return {} - - field = self.cursor_field.eval(self.config) - start_value = stream_slice.get(self._partition_field_start.eval(self.config)) - end_value = stream_slice.get(self._partition_field_end.eval(self.config)) - return {"filter": f"greater-or-equal({field},{start_value}),less-or-equal({field},{end_value})", "sort": field} diff --git a/airbyte-integrations/connectors/source-klaviyo/source_klaviyo/manifest.yaml b/airbyte-integrations/connectors/source-klaviyo/source_klaviyo/manifest.yaml index 4ea2663b513f..9dd0470c033b 100644 --- a/airbyte-integrations/connectors/source-klaviyo/source_klaviyo/manifest.yaml +++ b/airbyte-integrations/connectors/source-klaviyo/source_klaviyo/manifest.yaml @@ -100,47 +100,6 @@ definitions: path: ["updated"] value: "{{ record.get('attributes', {}).get('updated') }}" - base_incremental_stream: - $ref: "#/definitions/base_stream" - retriever: "#/definitions/base_retriever" - incremental_sync: - type: CustomIncrementalSync - class_name: source_klaviyo.components.datetime_based_cursor.KlaviyoDatetimeBasedCursor - cursor_field: "{{ parameters.get('cursor_field', 'updated') }}" - start_datetime: "{{ config.get('start_date', '2012-01-01T00:00:00Z') }}" - datetime_format: "%Y-%m-%dT%H:%M:%S%z" - cursor_datetime_formats: - - "%Y-%m-%dT%H:%M:%S.%f%z" - - "%Y-%m-%dT%H:%M:%S%z" - - "%Y-%m-%d %H:%M:%S%z" - start_time_option: - type: RequestOption - field_name: "{{ parameters.get('cursor_field', 'updated') }}" - inject_into: request_parameter - - base_incremental_checkpoint_stream: - $ref: "#/definitions/base_stream" - retriever: "#/definitions/base_retriever" - incremental_sync: - type: CustomIncrementalSync - class_name: source_klaviyo.components.datetime_based_cursor.KlaviyoCheckpointDatetimeBasedCursor - cursor_field: "{{ parameters.get('cursor_field', 'updated') }}" - start_datetime: "{{ config.get('start_date', '2012-01-01T00:00:00Z') }}" - datetime_format: "%Y-%m-%dT%H:%M:%S%z" - cursor_datetime_formats: - - "%Y-%m-%dT%H:%M:%S.%f%z" - - "%Y-%m-%dT%H:%M:%S%z" - - "%Y-%m-%d %H:%M:%S%z" - start_time_option: - type: RequestOption - field_name: "{{ parameters.get('cursor_field', 'updated') }}" - inject_into: request_parameter - # Syncing historical data from events endpoint can take days to sync and cause memory issues. - # Checkpoint after each slice, which means the next sync will start from the last successful slice. - # This ensures that even if the sync runs out of memory or fails, it won’t start over from the beginning. - step: P1M - cursor_granularity: PT1S - base_semi_incremental_stream: $ref: "#/definitions/base_stream" retriever: "#/definitions/semi_incremental_retriever" @@ -154,17 +113,33 @@ definitions: profiles_stream: # Docs: https://developers.klaviyo.com/en/v2023-02-22/reference/get_profiles name: "profiles" - $ref: "#/definitions/base_incremental_stream" + $ref: "#/definitions/base_stream" + incremental_sync: + type: DatetimeBasedCursor + cursor_field: "updated" + start_datetime: "{{ config.get('start_date', '2012-01-01T00:00:00Z') }}" + datetime_format: "%Y-%m-%dT%H:%M:%S%z" + cursor_datetime_formats: + - "%Y-%m-%dT%H:%M:%S.%f%z" + - "%Y-%m-%dT%H:%M:%S%z" + - "%Y-%m-%d %H:%M:%S%z" schema_loader: type: InlineSchemaLoader schema: "#/definitions/profiles_schema" retriever: $ref: "#/definitions/profiles_retriever" + requester: + $ref: "#/definitions/profiles_retriever/requester" + request_parameters: + $ref: "#/definitions/profiles_retriever/requester/request_parameters" + "filter": "greater-than({{ parameters.cursor_field }},{{ stream_interval.start_time }})" + "sort": "{{ parameters.cursor_field }}" record_selector: $ref: "#/definitions/selector" schema_normalization: Default $parameters: path: "profiles" + cursor_field: "updated" global_exclusions_stream: # Docs: https://developers.klaviyo.com/en/v2023-02-22/reference/get_profiles @@ -184,7 +159,18 @@ definitions: events_stream: # Docs: https://developers.klaviyo.com/en/reference/get_events name: "events" - $ref: "#/definitions/base_incremental_checkpoint_stream" + $ref: "#/definitions/base_stream" + incremental_sync: + type: DatetimeBasedCursor + cursor_field: "datetime" + start_datetime: "{{ config.get('start_date', '2012-01-01T00:00:00Z') }}" + datetime_format: "%Y-%m-%dT%H:%M:%S%z" + cursor_datetime_formats: + - "%Y-%m-%dT%H:%M:%S.%f%z" + - "%Y-%m-%dT%H:%M:%S%z" + - "%Y-%m-%d %H:%M:%S%z" + step: P1M + cursor_granularity: PT1S retriever: $ref: "#/definitions/base_retriever" requester: @@ -192,6 +178,8 @@ definitions: request_parameters: "fields[metric]": "name,created,updated,integration" "include": "metric" + "filter": "greater-or-equal({{ parameters.cursor_field }},{{ stream_interval.start_time }}),less-or-equal({{ parameters.cursor_field }},{{ stream_interval.end_time }})" + "sort": "{{ parameters.cursor_field }}" schema_loader: type: InlineSchemaLoader schema: "#/definitions/events_schema" @@ -208,12 +196,29 @@ definitions: email_templates_stream: # Docs: https://developers.klaviyo.com/en/reference/get_templates name: "email_templates" - $ref: "#/definitions/base_incremental_stream" + $ref: "#/definitions/base_stream" + incremental_sync: + type: DatetimeBasedCursor + cursor_field: "updated" + start_datetime: "{{ config.get('start_date', '2012-01-01T00:00:00Z') }}" + datetime_format: "%Y-%m-%dT%H:%M:%S%z" + cursor_datetime_formats: + - "%Y-%m-%dT%H:%M:%S.%f%z" + - "%Y-%m-%dT%H:%M:%S%z" + - "%Y-%m-%d %H:%M:%S%z" schema_loader: type: InlineSchemaLoader schema: "#/definitions/email_templates_schema" + retriever: + $ref: "#/definitions/base_retriever" + requester: + $ref: "#/definitions/requester" + request_parameters: + "filter": "greater-than({{ parameters.cursor_field }},{{ stream_interval.start_time }})" + "sort": "{{ parameters.cursor_field }}" $parameters: path: "templates" + cursor_field: "updated" # Semi-Incremental streams metrics_stream: @@ -262,7 +267,16 @@ definitions: events_detailed_stream: # Docs: https://developers.klaviyo.com/en/reference/get_event name: "events_detailed" - $ref: "#/definitions/base_incremental_stream" + $ref: "#/definitions/base_stream" + incremental_sync: + type: DatetimeBasedCursor + cursor_field: "{{ parameters.get('cursor_field') }}" + start_datetime: "{{ config.get('start_date', '2012-01-01T00:00:00Z') }}" + datetime_format: "%Y-%m-%dT%H:%M:%S%z" + cursor_datetime_formats: + - "%Y-%m-%dT%H:%M:%S.%f%z" + - "%Y-%m-%dT%H:%M:%S%z" + - "%Y-%m-%d %H:%M:%S%z" schema_loader: type: InlineSchemaLoader schema: "#/definitions/events_detailed_schema" @@ -279,6 +293,8 @@ definitions: request_parameters: "include": "metric" "fields[metric]": "name" + "filter": "greater-than({{ parameters.cursor_field }},{{ stream_interval.start_time }})" + "sort": "{{ parameters.cursor_field }}" state_migrations: - type: CustomStateMigration class_name: source_klaviyo.components.per_partition_state_migration.PerPartitionToSingleStateMigration @@ -976,3 +992,55 @@ spec: "predictive_analytics" column from being populated in your downstream destination. order: 2 required: ["api_key"] + +metadata: + testedStreams: + profiles: + streamHash: 7d27c2aee801ec7d0038722136c6b7e06b14a9ed + hasResponse: true + responsesAreSuccessful: true + hasRecords: true + primaryKeysArePresent: true + primaryKeysAreUnique: true + global_exclusions: + streamHash: 7e7633526c2855390903d6e60973bb13b23272d7 + hasResponse: true + responsesAreSuccessful: true + hasRecords: true + primaryKeysArePresent: true + primaryKeysAreUnique: true + events: + streamHash: af0180236001cbacc0788046bdc916026e1f82f6 + hasResponse: true + responsesAreSuccessful: true + hasRecords: false + primaryKeysArePresent: true + primaryKeysAreUnique: true + lists: + streamHash: 9edcccbf069463bf70bdc40db756e0f81eba032b + hasResponse: true + responsesAreSuccessful: true + hasRecords: true + primaryKeysArePresent: true + primaryKeysAreUnique: true + email_templates: + streamHash: 4c12cf304ffe3cd0fcaba1479498ad19c18c6f32 + hasResponse: true + responsesAreSuccessful: true + hasRecords: true + primaryKeysArePresent: true + primaryKeysAreUnique: true + metrics: + streamHash: 96e06644c47a223a29c85dc4318ec5f7da1cc414 + hasResponse: true + responsesAreSuccessful: true + hasRecords: true + primaryKeysArePresent: true + primaryKeysAreUnique: true + lists_detailed: + streamHash: 34e4a9f1fb0c879b915d8558d67feb887d76f8e5 + hasResponse: true + responsesAreSuccessful: false + hasRecords: true + primaryKeysArePresent: true + primaryKeysAreUnique: true diff --git a/airbyte-integrations/connectors/source-klaviyo/unit_tests/integration/config.py b/airbyte-integrations/connectors/source-klaviyo/unit_tests/integration/config.py index f6839b1a8376..878978270154 100644 --- a/airbyte-integrations/connectors/source-klaviyo/unit_tests/integration/config.py +++ b/airbyte-integrations/connectors/source-klaviyo/unit_tests/integration/config.py @@ -1,5 +1,5 @@ # Copyright (c) 2024 Airbyte, Inc., all rights reserved. - +from datetime import datetime from typing import Any, Dict @@ -7,5 +7,9 @@ class KlaviyoConfigBuilder: def __init__(self) -> None: self._config = {"api_key":"an_api_key","start_date":"2021-01-01T00:00:00Z"} + def with_start_date(self, start_date: datetime) -> "KlaviyoConfigBuilder": + self._config["start_date"] = start_date.strftime("%Y-%m-%dT%H:%M:%SZ") + return self + def build(self) -> Dict[str, Any]: return self._config diff --git a/airbyte-integrations/connectors/source-klaviyo/unit_tests/integration/test_profiles.py b/airbyte-integrations/connectors/source-klaviyo/unit_tests/integration/test_profiles.py index 87287261dcee..0886676d0745 100644 --- a/airbyte-integrations/connectors/source-klaviyo/unit_tests/integration/test_profiles.py +++ b/airbyte-integrations/connectors/source-klaviyo/unit_tests/integration/test_profiles.py @@ -1,12 +1,11 @@ # Copyright (c) 2023 Airbyte, Inc., all rights reserved. - +import datetime from typing import Any, Dict, Optional from unittest import TestCase from airbyte_cdk.test.catalog_builder import CatalogBuilder from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest -from airbyte_cdk.test.mock_http.request import ANY_QUERY_PARAMS from airbyte_cdk.test.mock_http.response_builder import ( FieldPath, HttpResponseBuilder, @@ -21,22 +20,28 @@ from source_klaviyo import SourceKlaviyo _ENDPOINT_TEMPLATE_NAME = "profiles" +_START_DATE = datetime.datetime(2021, 1, 1, tzinfo=datetime.timezone.utc) _STREAM_NAME = "profiles" _RECORDS_PATH = FieldPath("data") def _config() -> KlaviyoConfigBuilder: - return KlaviyoConfigBuilder() + return KlaviyoConfigBuilder().with_start_date(_START_DATE) def _catalog(sync_mode: SyncMode) -> ConfiguredAirbyteCatalog: return CatalogBuilder().with_stream(_STREAM_NAME, sync_mode).build() -def _a_profile_request() -> HttpRequest: +def _a_profile_request(start_date: datetime) -> HttpRequest: return HttpRequest( url=f"https://a.klaviyo.com/api/profiles", - query_params=ANY_QUERY_PARAMS + query_params={ + "additional-fields[profile]": "predictive_analytics", + "page[size]": "100", + "filter": f"greater-than(updated,{start_date.strftime('%Y-%m-%dT%H:%M:%S%z')})", + "sort": "updated" + } ) @@ -68,7 +73,7 @@ class FullRefreshTest(TestCase): @HttpMocker() def test_when_read_then_extract_records(self, http_mocker: HttpMocker) -> None: http_mocker.get( - _a_profile_request(), + _a_profile_request(_START_DATE), _profiles_response().with_record(_a_profile()).build(), ) @@ -79,7 +84,7 @@ def test_when_read_then_extract_records(self, http_mocker: HttpMocker) -> None: @HttpMocker() def test_given_region_is_number_when_read_then_cast_as_string(self, http_mocker: HttpMocker) -> None: http_mocker.get( - _a_profile_request(), + _a_profile_request(_START_DATE), _profiles_response().with_record(_a_profile().with_field(NestedPath(["attributes", "location", "region"]), 10)).build(), ) diff --git a/airbyte-integrations/connectors/source-klaviyo/unit_tests/test_streams.py b/airbyte-integrations/connectors/source-klaviyo/unit_tests/test_streams.py index 196c7e736f27..0c09f72cbb6f 100644 --- a/airbyte-integrations/connectors/source-klaviyo/unit_tests/test_streams.py +++ b/airbyte-integrations/connectors/source-klaviyo/unit_tests/test_streams.py @@ -376,21 +376,6 @@ def test_read_records(self, start_date, stream_state, input_records, expected_re class TestProfilesStream: - @pytest.mark.parametrize( - "disable_predictive_analytics, expected_additional_fields", - [ - pytest.param(False, {"additional-fields[profile]": "predictive_analytics"}, id="test_config_with_disable_fetching_predictive_analytics"), - pytest.param(True, {}, id="test_config_with_disable_fetching_predictive_analytics_turned_on") - ] - ) - def test_request_params(self, disable_predictive_analytics, expected_additional_fields): - if disable_predictive_analytics: - config = {"disable_fetching_predictive_analytics": True} | CONFIG - else: - config = CONFIG - stream = get_stream_by_name("profiles", config) - assert stream.retriever.requester.get_request_params() == expected_additional_fields - def test_read_records(self, requests_mock): stream = get_stream_by_name("profiles", CONFIG) json = { diff --git a/airbyte-integrations/connectors/source-monday/metadata.yaml b/airbyte-integrations/connectors/source-monday/metadata.yaml index d962682ecb11..f29b45ba5540 100644 --- a/airbyte-integrations/connectors/source-monday/metadata.yaml +++ b/airbyte-integrations/connectors/source-monday/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: api connectorType: source definitionId: 80a54ea2-9959-4040-aac1-eee42423ec9b - dockerImageTag: 2.1.4 + dockerImageTag: 2.1.5 releases: breakingChanges: 2.0.0: diff --git a/airbyte-integrations/connectors/source-monday/pyproject.toml b/airbyte-integrations/connectors/source-monday/pyproject.toml index 31c4b1e6ac33..09f5f9c8bafc 100644 --- a/airbyte-integrations/connectors/source-monday/pyproject.toml +++ b/airbyte-integrations/connectors/source-monday/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "2.1.4" +version = "2.1.5" name = "source-monday" description = "Source implementation for Monday." authors = [ "Airbyte ",] diff --git a/airbyte-integrations/connectors/source-monday/source_monday/spec.json b/airbyte-integrations/connectors/source-monday/source_monday/spec.json index 9119d34d41c2..f615e370ef2a 100644 --- a/airbyte-integrations/connectors/source-monday/source_monday/spec.json +++ b/airbyte-integrations/connectors/source-monday/source_monday/spec.json @@ -116,6 +116,16 @@ } } }, + "oauth_connector_input_specification": { + "type": "object", + "additionalProperties": false, + "properties": { + "consent_url": "https://auth.monday.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}", + "scope": "me:read boards:read workspaces:read users:read account:read updates:read assets:read tags:read teams:read", + "access_token_url": "https://auth.monday.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}", + "extract_output": ["access_token"] + } + }, "oauth_user_input_from_connector_config_specification": { "type": "object", "additionalProperties": false, diff --git a/airbyte-integrations/connectors/source-mysql/metadata.yaml b/airbyte-integrations/connectors/source-mysql/metadata.yaml index 35cc478ed2bc..cd58a33782da 100644 --- a/airbyte-integrations/connectors/source-mysql/metadata.yaml +++ b/airbyte-integrations/connectors/source-mysql/metadata.yaml @@ -9,7 +9,7 @@ data: connectorSubtype: database connectorType: source definitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad - dockerImageTag: 3.9.0-rc.5 + dockerImageTag: 3.9.0-rc.6 dockerRepository: airbyte/source-mysql documentationUrl: https://docs.airbyte.com/integrations/sources/mysql githubIssueLabel: source-mysql diff --git a/airbyte-integrations/connectors/source-mysql/src/main/kotlin/io/airbyte/integrations/source/mysql/MysqlJdbcEncryption.kt b/airbyte-integrations/connectors/source-mysql/src/main/kotlin/io/airbyte/integrations/source/mysql/MysqlJdbcEncryption.kt index ab21dbc88774..f7bf5a456144 100644 --- a/airbyte-integrations/connectors/source-mysql/src/main/kotlin/io/airbyte/integrations/source/mysql/MysqlJdbcEncryption.kt +++ b/airbyte-integrations/connectors/source-mysql/src/main/kotlin/io/airbyte/integrations/source/mysql/MysqlJdbcEncryption.kt @@ -11,7 +11,7 @@ import io.github.oshai.kotlinlogging.KotlinLogging import java.net.MalformedURLException import java.net.URI import java.nio.file.FileSystems -import java.util.* +import java.util.UUID private val log = KotlinLogging.logger {} diff --git a/airbyte-integrations/connectors/source-s3/integration_tests/cloud_spec.json b/airbyte-integrations/connectors/source-s3/integration_tests/cloud_spec.json index 7f18f8208448..4a84a545efa9 100644 --- a/airbyte-integrations/connectors/source-s3/integration_tests/cloud_spec.json +++ b/airbyte-integrations/connectors/source-s3/integration_tests/cloud_spec.json @@ -372,6 +372,46 @@ "required": ["name", "format"] } }, + "delivery_method": { + "title": "Delivery Method", + "default": "use_records_transfer", + "type": "object", + "order": 6, + "display_type": "radio", + "group": "advanced", + "oneOf": [ + { + "title": "Replicate Records", + "type": "object", + "properties": { + "delivery_type": { + "title": "Delivery Type", + "default": "use_records_transfer", + "const": "use_records_transfer", + "enum": ["use_records_transfer"], + "type": "string" + } + }, + "description": "Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.", + "required": ["delivery_type"] + }, + { + "title": "Copy Raw Files", + "type": "object", + "properties": { + "delivery_type": { + "title": "Delivery Type", + "default": "use_file_transfer", + "const": "use_file_transfer", + "enum": ["use_file_transfer"], + "type": "string" + } + }, + "description": "Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.", + "required": ["delivery_type"] + } + ] + }, "bucket": { "title": "Bucket", "description": "Name of the S3 bucket where the file(s) exist.", diff --git a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json index 2b89a7812eaf..f3cab1f3a0c6 100644 --- a/airbyte-integrations/connectors/source-s3/integration_tests/spec.json +++ b/airbyte-integrations/connectors/source-s3/integration_tests/spec.json @@ -372,6 +372,46 @@ "required": ["name", "format"] } }, + "delivery_method": { + "title": "Delivery Method", + "default": "use_records_transfer", + "type": "object", + "order": 6, + "display_type": "radio", + "group": "advanced", + "oneOf": [ + { + "title": "Replicate Records", + "type": "object", + "properties": { + "delivery_type": { + "title": "Delivery Type", + "default": "use_records_transfer", + "const": "use_records_transfer", + "enum": ["use_records_transfer"], + "type": "string" + } + }, + "description": "Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.", + "required": ["delivery_type"] + }, + { + "title": "Copy Raw Files", + "type": "object", + "properties": { + "delivery_type": { + "title": "Delivery Type", + "default": "use_file_transfer", + "const": "use_file_transfer", + "enum": ["use_file_transfer"], + "type": "string" + } + }, + "description": "Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.", + "required": ["delivery_type"] + } + ] + }, "bucket": { "title": "Bucket", "description": "Name of the S3 bucket where the file(s) exist.", diff --git a/airbyte-integrations/connectors/source-s3/metadata.yaml b/airbyte-integrations/connectors/source-s3/metadata.yaml index 4f8da9e5d4df..d5c5e90bb720 100644 --- a/airbyte-integrations/connectors/source-s3/metadata.yaml +++ b/airbyte-integrations/connectors/source-s3/metadata.yaml @@ -10,7 +10,7 @@ data: connectorSubtype: file connectorType: source definitionId: 69589781-7828-43c5-9f63-8925b1c1ccc2 - dockerImageTag: 4.9.2 + dockerImageTag: 4.10.1 dockerRepository: airbyte/source-s3 documentationUrl: https://docs.airbyte.com/integrations/sources/s3 githubIssueLabel: source-s3 @@ -39,6 +39,7 @@ data: message: Following 4.0.0 config change, we are eliminating the `streams.*.file_type` field which was redundant with `streams.*.format` upgradeDeadline: "2023-10-18" supportLevel: certified + supportsFileTransfer: true tags: - language:python - cdk:python-file-based diff --git a/airbyte-integrations/connectors/source-s3/poetry.lock b/airbyte-integrations/connectors/source-s3/poetry.lock index b49399f4395b..26ed6cbc756f 100644 --- a/airbyte-integrations/connectors/source-s3/poetry.lock +++ b/airbyte-integrations/connectors/source-s3/poetry.lock @@ -1,14 +1,14 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "airbyte-cdk" -version = "5.11.1" +version = "6.5.2" description = "A framework for writing Airbyte Connectors." optional = false python-versions = "<4.0,>=3.10" files = [ - {file = "airbyte_cdk-5.11.1-py3-none-any.whl", hash = "sha256:efddee85179128cb7d65b11a9a4aba353ea5b01daaa56fc3069d12ce156d2857"}, - {file = "airbyte_cdk-5.11.1.tar.gz", hash = "sha256:0cc1cdc1d50909bbb2791a9c389c0f3db32474502addf65eb745d87af7d36fd9"}, + {file = "airbyte_cdk-6.5.2-py3-none-any.whl", hash = "sha256:ea33322da43cb5cef4f51ea759b7769e59a074249710b14e28d93bfb72cd825b"}, + {file = "airbyte_cdk-6.5.2.tar.gz", hash = "sha256:9cb026230c2649d4578574fd031f32a64f0d3f934f01ccfd3f5daf4f41bd1384"}, ] [package.dependencies] @@ -28,11 +28,13 @@ jsonschema = ">=3.2.0,<3.3.0" langchain_core = "0.1.42" markdown = {version = "*", optional = true, markers = "extra == \"file-based\""} nltk = "3.8.1" +numpy = "<2" orjson = ">=3.10.7,<4.0.0" pandas = "2.2.2" pdf2image = {version = "1.16.3", optional = true, markers = "extra == \"file-based\""} "pdfminer.six" = {version = "20221105", optional = true, markers = "extra == \"file-based\""} pendulum = "<3.0.0" +psutil = "6.1.0" pyarrow = {version = ">=15.0.0,<15.1.0", optional = true, markers = "extra == \"file-based\""} pydantic = ">=2.7,<3.0" pyjwt = ">=2.8.0,<3.0.0" @@ -49,21 +51,23 @@ serpyco-rs = ">=1.10.2,<2.0.0" unstructured = {version = "0.10.27", extras = ["docx", "pptx"], optional = true, markers = "extra == \"file-based\""} "unstructured.pytesseract" = {version = ">=0.3.12", optional = true, markers = "extra == \"file-based\""} wcmatch = "8.4" +xmltodict = ">=0.13.0,<0.14.0" [package.extras] file-based = ["avro (>=1.11.2,<1.12.0)", "fastavro (>=1.8.0,<1.9.0)", "markdown", "pdf2image (==1.16.3)", "pdfminer.six (==20221105)", "pyarrow (>=15.0.0,<15.1.0)", "pytesseract (==0.3.10)", "python-calamine (==0.2.3)", "python-snappy (==0.7.3)", "unstructured.pytesseract (>=0.3.12)", "unstructured[docx,pptx] (==0.10.27)"] sphinx-docs = ["Sphinx (>=4.2,<4.3)", "sphinx-rtd-theme (>=1.0,<1.1)"] +sql = ["sqlalchemy (>=2.0,!=2.0.36,<3.0)"] vector-db-based = ["cohere (==4.21)", "langchain (==0.1.16)", "openai[embeddings] (==0.27.9)", "tiktoken (==0.4.0)"] [[package]] name = "airbyte-protocol-models-dataclasses" -version = "0.13.0" +version = "0.13.1" description = "Declares the Airbyte Protocol using Python Dataclasses. Dataclasses in Python have less performance overhead compared to Pydantic models, making them a more efficient choice for scenarios where speed and memory usage are critical" optional = false python-versions = ">=3.8" files = [ - {file = "airbyte_protocol_models_dataclasses-0.13.0-py3-none-any.whl", hash = "sha256:0aedb99ffc4f9aab0ce91bba2c292fa17cd8fd4b42eeba196d6a16c20bbbd7a5"}, - {file = "airbyte_protocol_models_dataclasses-0.13.0.tar.gz", hash = "sha256:72e67850d661e2808406aec5839b3158ebb94d3553b798dbdae1b4a278548d2f"}, + {file = "airbyte_protocol_models_dataclasses-0.13.1-py3-none-any.whl", hash = "sha256:20a734b7b1c3479a643777830db6a2e0a34428f33d16abcfd320552576fabe5a"}, + {file = "airbyte_protocol_models_dataclasses-0.13.1.tar.gz", hash = "sha256:ec6a0fb6b16267bde910f52279445d06c8e1a3e4ed82ac2937b405ab280449d5"}, ] [[package]] @@ -187,17 +191,17 @@ lxml = ["lxml"] [[package]] name = "boto3" -version = "1.35.54" +version = "1.35.57" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.35.54-py3-none-any.whl", hash = "sha256:2d5e160b614db55fbee7981001c54476cb827c441cef65b2fcb2c52a62019909"}, - {file = "boto3-1.35.54.tar.gz", hash = "sha256:7d9c359bbbc858a60b51c86328db813353c8bd1940212cdbd0a7da835291c2e1"}, + {file = "boto3-1.35.57-py3-none-any.whl", hash = "sha256:9edf49640c79a05b0a72f4c2d1e24dfc164344b680535a645f455ac624dc3680"}, + {file = "boto3-1.35.57.tar.gz", hash = "sha256:db58348849a5af061f0f5ec9c3b699da5221ca83354059fdccb798e3ddb6b62a"}, ] [package.dependencies] -botocore = ">=1.35.54,<1.36.0" +botocore = ">=1.35.57,<1.36.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -206,13 +210,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.35.54" +version = "1.35.57" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.35.54-py3-none-any.whl", hash = "sha256:9cca1811094b6cdc144c2c063a3ec2db6d7c88194b04d4277cd34fc8e3473aff"}, - {file = "botocore-1.35.54.tar.gz", hash = "sha256:131bb59ce59c8a939b31e8e647242d70cf11d32d4529fa4dca01feea1e891a76"}, + {file = "botocore-1.35.57-py3-none-any.whl", hash = "sha256:92ddd02469213766872cb2399269dd20948f90348b42bf08379881d5e946cc34"}, + {file = "botocore-1.35.57.tar.gz", hash = "sha256:d96306558085baf0bcb3b022d7a8c39c93494f031edb376694d2b2dcd0e81327"}, ] [package.dependencies] @@ -1160,13 +1164,13 @@ six = "*" [[package]] name = "langsmith" -version = "0.1.139" +version = "0.1.142" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langsmith-0.1.139-py3-none-any.whl", hash = "sha256:2a4a541bfbd0a9727255df28a60048c85bc8c4c6a276975923785c3fd82dc879"}, - {file = "langsmith-0.1.139.tar.gz", hash = "sha256:2f9e4d32fef3ad7ef42c8506448cce3a31ad6b78bb4f3310db04ddaa1e9d744d"}, + {file = "langsmith-0.1.142-py3-none-any.whl", hash = "sha256:f639ca23c9a0bb77af5fb881679b2f66ff1f21f19d0bebf4e51375e7585a8b38"}, + {file = "langsmith-0.1.142.tar.gz", hash = "sha256:f8a84d100f3052233ff0a1d66ae14c5dfc20b7e41a1601de011384f16ee6cb82"}, ] [package.dependencies] @@ -1900,6 +1904,36 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "psutil" +version = "6.1.0" +description = "Cross-platform lib for process and system monitoring in Python." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" +files = [ + {file = "psutil-6.1.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ff34df86226c0227c52f38b919213157588a678d049688eded74c76c8ba4a5d0"}, + {file = "psutil-6.1.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:c0e0c00aa18ca2d3b2b991643b799a15fc8f0563d2ebb6040f64ce8dc027b942"}, + {file = "psutil-6.1.0-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:000d1d1ebd634b4efb383f4034437384e44a6d455260aaee2eca1e9c1b55f047"}, + {file = "psutil-6.1.0-cp27-cp27mu-manylinux2010_i686.whl", hash = "sha256:5cd2bcdc75b452ba2e10f0e8ecc0b57b827dd5d7aaffbc6821b2a9a242823a76"}, + {file = "psutil-6.1.0-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:045f00a43c737f960d273a83973b2511430d61f283a44c96bf13a6e829ba8fdc"}, + {file = "psutil-6.1.0-cp27-none-win32.whl", hash = "sha256:9118f27452b70bb1d9ab3198c1f626c2499384935aaf55388211ad982611407e"}, + {file = "psutil-6.1.0-cp27-none-win_amd64.whl", hash = "sha256:a8506f6119cff7015678e2bce904a4da21025cc70ad283a53b099e7620061d85"}, + {file = "psutil-6.1.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6e2dcd475ce8b80522e51d923d10c7871e45f20918e027ab682f94f1c6351688"}, + {file = "psutil-6.1.0-cp36-abi3-macosx_11_0_arm64.whl", hash = "sha256:0895b8414afafc526712c498bd9de2b063deaac4021a3b3c34566283464aff8e"}, + {file = "psutil-6.1.0-cp36-abi3-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9dcbfce5d89f1d1f2546a2090f4fcf87c7f669d1d90aacb7d7582addece9fb38"}, + {file = "psutil-6.1.0-cp36-abi3-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:498c6979f9c6637ebc3a73b3f87f9eb1ec24e1ce53a7c5173b8508981614a90b"}, + {file = "psutil-6.1.0-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d905186d647b16755a800e7263d43df08b790d709d575105d419f8b6ef65423a"}, + {file = "psutil-6.1.0-cp36-cp36m-win32.whl", hash = "sha256:6d3fbbc8d23fcdcb500d2c9f94e07b1342df8ed71b948a2649b5cb060a7c94ca"}, + {file = "psutil-6.1.0-cp36-cp36m-win_amd64.whl", hash = "sha256:1209036fbd0421afde505a4879dee3b2fd7b1e14fee81c0069807adcbbcca747"}, + {file = "psutil-6.1.0-cp37-abi3-win32.whl", hash = "sha256:1ad45a1f5d0b608253b11508f80940985d1d0c8f6111b5cb637533a0e6ddc13e"}, + {file = "psutil-6.1.0-cp37-abi3-win_amd64.whl", hash = "sha256:a8fb3752b491d246034fa4d279ff076501588ce8cbcdbb62c32fd7a377d996be"}, + {file = "psutil-6.1.0.tar.gz", hash = "sha256:353815f59a7f64cdaca1c0307ee13558a0512f6db064e92fe833784f08539c7a"}, +] + +[package.extras] +dev = ["black", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest-cov", "requests", "rstcheck", "ruff", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "wheel"] +test = ["pytest", "pytest-xdist", "setuptools"] + [[package]] name = "py" version = "1.11.0" @@ -2625,105 +2659,105 @@ all = ["numpy"] [[package]] name = "regex" -version = "2024.9.11" +version = "2024.11.6" description = "Alternative regular expression module, to replace re." optional = false python-versions = ">=3.8" files = [ - {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1494fa8725c285a81d01dc8c06b55287a1ee5e0e382d8413adc0a9197aac6408"}, - {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0e12c481ad92d129c78f13a2a3662317e46ee7ef96c94fd332e1c29131875b7d"}, - {file = "regex-2024.9.11-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:16e13a7929791ac1216afde26f712802e3df7bf0360b32e4914dca3ab8baeea5"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:46989629904bad940bbec2106528140a218b4a36bb3042d8406980be1941429c"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a906ed5e47a0ce5f04b2c981af1c9acf9e8696066900bf03b9d7879a6f679fc8"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9a091b0550b3b0207784a7d6d0f1a00d1d1c8a11699c1a4d93db3fbefc3ad35"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5ddcd9a179c0a6fa8add279a4444015acddcd7f232a49071ae57fa6e278f1f71"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6b41e1adc61fa347662b09398e31ad446afadff932a24807d3ceb955ed865cc8"}, - {file = "regex-2024.9.11-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ced479f601cd2f8ca1fd7b23925a7e0ad512a56d6e9476f79b8f381d9d37090a"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:635a1d96665f84b292e401c3d62775851aedc31d4f8784117b3c68c4fcd4118d"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:c0256beda696edcf7d97ef16b2a33a8e5a875affd6fa6567b54f7c577b30a137"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:3ce4f1185db3fbde8ed8aa223fc9620f276c58de8b0d4f8cc86fd1360829edb6"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:09d77559e80dcc9d24570da3745ab859a9cf91953062e4ab126ba9d5993688ca"}, - {file = "regex-2024.9.11-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7a22ccefd4db3f12b526eccb129390942fe874a3a9fdbdd24cf55773a1faab1a"}, - {file = "regex-2024.9.11-cp310-cp310-win32.whl", hash = "sha256:f745ec09bc1b0bd15cfc73df6fa4f726dcc26bb16c23a03f9e3367d357eeedd0"}, - {file = "regex-2024.9.11-cp310-cp310-win_amd64.whl", hash = "sha256:01c2acb51f8a7d6494c8c5eafe3d8e06d76563d8a8a4643b37e9b2dd8a2ff623"}, - {file = "regex-2024.9.11-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:2cce2449e5927a0bf084d346da6cd5eb016b2beca10d0013ab50e3c226ffc0df"}, - {file = "regex-2024.9.11-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b37fa423beefa44919e009745ccbf353d8c981516e807995b2bd11c2c77d268"}, - {file = "regex-2024.9.11-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:64ce2799bd75039b480cc0360907c4fb2f50022f030bf9e7a8705b636e408fad"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a4cc92bb6db56ab0c1cbd17294e14f5e9224f0cc6521167ef388332604e92679"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d05ac6fa06959c4172eccd99a222e1fbf17b5670c4d596cb1e5cde99600674c4"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:040562757795eeea356394a7fb13076ad4f99d3c62ab0f8bdfb21f99a1f85664"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6113c008a7780792efc80f9dfe10ba0cd043cbf8dc9a76ef757850f51b4edc50"}, - {file = "regex-2024.9.11-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e5fb5f77c8745a60105403a774fe2c1759b71d3e7b4ca237a5e67ad066c7199"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:54d9ff35d4515debf14bc27f1e3b38bfc453eff3220f5bce159642fa762fe5d4"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:df5cbb1fbc74a8305b6065d4ade43b993be03dbe0f8b30032cced0d7740994bd"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:7fb89ee5d106e4a7a51bce305ac4efb981536301895f7bdcf93ec92ae0d91c7f"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:a738b937d512b30bf75995c0159c0ddf9eec0775c9d72ac0202076c72f24aa96"}, - {file = "regex-2024.9.11-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e28f9faeb14b6f23ac55bfbbfd3643f5c7c18ede093977f1df249f73fd22c7b1"}, - {file = "regex-2024.9.11-cp311-cp311-win32.whl", hash = "sha256:18e707ce6c92d7282dfce370cd205098384b8ee21544e7cb29b8aab955b66fa9"}, - {file = "regex-2024.9.11-cp311-cp311-win_amd64.whl", hash = "sha256:313ea15e5ff2a8cbbad96ccef6be638393041b0a7863183c2d31e0c6116688cf"}, - {file = "regex-2024.9.11-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:b0d0a6c64fcc4ef9c69bd5b3b3626cc3776520a1637d8abaa62b9edc147a58f7"}, - {file = "regex-2024.9.11-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:49b0e06786ea663f933f3710a51e9385ce0cba0ea56b67107fd841a55d56a231"}, - {file = "regex-2024.9.11-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5b513b6997a0b2f10e4fd3a1313568e373926e8c252bd76c960f96fd039cd28d"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ee439691d8c23e76f9802c42a95cfeebf9d47cf4ffd06f18489122dbb0a7ad64"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8f877c89719d759e52783f7fe6e1c67121076b87b40542966c02de5503ace42"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:23b30c62d0f16827f2ae9f2bb87619bc4fba2044911e2e6c2eb1af0161cdb766"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:85ab7824093d8f10d44330fe1e6493f756f252d145323dd17ab6b48733ff6c0a"}, - {file = "regex-2024.9.11-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8dee5b4810a89447151999428fe096977346cf2f29f4d5e29609d2e19e0199c9"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:98eeee2f2e63edae2181c886d7911ce502e1292794f4c5ee71e60e23e8d26b5d"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:57fdd2e0b2694ce6fc2e5ccf189789c3e2962916fb38779d3e3521ff8fe7a822"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:d552c78411f60b1fdaafd117a1fca2f02e562e309223b9d44b7de8be451ec5e0"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a0b2b80321c2ed3fcf0385ec9e51a12253c50f146fddb2abbb10f033fe3d049a"}, - {file = "regex-2024.9.11-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:18406efb2f5a0e57e3a5881cd9354c1512d3bb4f5c45d96d110a66114d84d23a"}, - {file = "regex-2024.9.11-cp312-cp312-win32.whl", hash = "sha256:e464b467f1588e2c42d26814231edecbcfe77f5ac414d92cbf4e7b55b2c2a776"}, - {file = "regex-2024.9.11-cp312-cp312-win_amd64.whl", hash = "sha256:9e8719792ca63c6b8340380352c24dcb8cd7ec49dae36e963742a275dfae6009"}, - {file = "regex-2024.9.11-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:c157bb447303070f256e084668b702073db99bbb61d44f85d811025fcf38f784"}, - {file = "regex-2024.9.11-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:4db21ece84dfeefc5d8a3863f101995de646c6cb0536952c321a2650aa202c36"}, - {file = "regex-2024.9.11-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:220e92a30b426daf23bb67a7962900ed4613589bab80382be09b48896d211e92"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb1ae19e64c14c7ec1995f40bd932448713d3c73509e82d8cd7744dc00e29e86"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f47cd43a5bfa48f86925fe26fbdd0a488ff15b62468abb5d2a1e092a4fb10e85"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9d4a76b96f398697fe01117093613166e6aa8195d63f1b4ec3f21ab637632963"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0ea51dcc0835eea2ea31d66456210a4e01a076d820e9039b04ae8d17ac11dee6"}, - {file = "regex-2024.9.11-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b7aaa315101c6567a9a45d2839322c51c8d6e81f67683d529512f5bcfb99c802"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c57d08ad67aba97af57a7263c2d9006d5c404d721c5f7542f077f109ec2a4a29"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f8404bf61298bb6f8224bb9176c1424548ee1181130818fcd2cbffddc768bed8"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:dd4490a33eb909ef5078ab20f5f000087afa2a4daa27b4c072ccb3cb3050ad84"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:eee9130eaad130649fd73e5cd92f60e55708952260ede70da64de420cdcad554"}, - {file = "regex-2024.9.11-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6a2644a93da36c784e546de579ec1806bfd2763ef47babc1b03d765fe560c9f8"}, - {file = "regex-2024.9.11-cp313-cp313-win32.whl", hash = "sha256:e997fd30430c57138adc06bba4c7c2968fb13d101e57dd5bb9355bf8ce3fa7e8"}, - {file = "regex-2024.9.11-cp313-cp313-win_amd64.whl", hash = "sha256:042c55879cfeb21a8adacc84ea347721d3d83a159da6acdf1116859e2427c43f"}, - {file = "regex-2024.9.11-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:35f4a6f96aa6cb3f2f7247027b07b15a374f0d5b912c0001418d1d55024d5cb4"}, - {file = "regex-2024.9.11-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:55b96e7ce3a69a8449a66984c268062fbaa0d8ae437b285428e12797baefce7e"}, - {file = "regex-2024.9.11-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cb130fccd1a37ed894824b8c046321540263013da72745d755f2d35114b81a60"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:323c1f04be6b2968944d730e5c2091c8c89767903ecaa135203eec4565ed2b2b"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:be1c8ed48c4c4065ecb19d882a0ce1afe0745dfad8ce48c49586b90a55f02366"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b5b029322e6e7b94fff16cd120ab35a253236a5f99a79fb04fda7ae71ca20ae8"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6fff13ef6b5f29221d6904aa816c34701462956aa72a77f1f151a8ec4f56aeb"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:587d4af3979376652010e400accc30404e6c16b7df574048ab1f581af82065e4"}, - {file = "regex-2024.9.11-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:079400a8269544b955ffa9e31f186f01d96829110a3bf79dc338e9910f794fca"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:f9268774428ec173654985ce55fc6caf4c6d11ade0f6f914d48ef4719eb05ebb"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:23f9985c8784e544d53fc2930fc1ac1a7319f5d5332d228437acc9f418f2f168"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:ae2941333154baff9838e88aa71c1d84f4438189ecc6021a12c7573728b5838e"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:e93f1c331ca8e86fe877a48ad64e77882c0c4da0097f2212873a69bbfea95d0c"}, - {file = "regex-2024.9.11-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:846bc79ee753acf93aef4184c040d709940c9d001029ceb7b7a52747b80ed2dd"}, - {file = "regex-2024.9.11-cp38-cp38-win32.whl", hash = "sha256:c94bb0a9f1db10a1d16c00880bdebd5f9faf267273b8f5bd1878126e0fbde771"}, - {file = "regex-2024.9.11-cp38-cp38-win_amd64.whl", hash = "sha256:2b08fce89fbd45664d3df6ad93e554b6c16933ffa9d55cb7e01182baaf971508"}, - {file = "regex-2024.9.11-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:07f45f287469039ffc2c53caf6803cd506eb5f5f637f1d4acb37a738f71dd066"}, - {file = "regex-2024.9.11-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4838e24ee015101d9f901988001038f7f0d90dc0c3b115541a1365fb439add62"}, - {file = "regex-2024.9.11-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:6edd623bae6a737f10ce853ea076f56f507fd7726bee96a41ee3d68d347e4d16"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c69ada171c2d0e97a4b5aa78fbb835e0ffbb6b13fc5da968c09811346564f0d3"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02087ea0a03b4af1ed6ebab2c54d7118127fee8d71b26398e8e4b05b78963199"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:69dee6a020693d12a3cf892aba4808fe168d2a4cef368eb9bf74f5398bfd4ee8"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:297f54910247508e6e5cae669f2bc308985c60540a4edd1c77203ef19bfa63ca"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ecea58b43a67b1b79805f1a0255730edaf5191ecef84dbc4cc85eb30bc8b63b9"}, - {file = "regex-2024.9.11-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eab4bb380f15e189d1313195b062a6aa908f5bd687a0ceccd47c8211e9cf0d4a"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0cbff728659ce4bbf4c30b2a1be040faafaa9eca6ecde40aaff86f7889f4ab39"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:54c4a097b8bc5bb0dfc83ae498061d53ad7b5762e00f4adaa23bee22b012e6ba"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:73d6d2f64f4d894c96626a75578b0bf7d9e56dcda8c3d037a2118fdfe9b1c664"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:e53b5fbab5d675aec9f0c501274c467c0f9a5d23696cfc94247e1fb56501ed89"}, - {file = "regex-2024.9.11-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:0ffbcf9221e04502fc35e54d1ce9567541979c3fdfb93d2c554f0ca583a19b35"}, - {file = "regex-2024.9.11-cp39-cp39-win32.whl", hash = "sha256:e4c22e1ac1f1ec1e09f72e6c44d8f2244173db7eb9629cc3a346a8d7ccc31142"}, - {file = "regex-2024.9.11-cp39-cp39-win_amd64.whl", hash = "sha256:faa3c142464efec496967359ca99696c896c591c56c53506bac1ad465f66e919"}, - {file = "regex-2024.9.11.tar.gz", hash = "sha256:6c188c307e8433bcb63dc1915022deb553b4203a70722fc542c363bf120a01fd"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ff590880083d60acc0433f9c3f713c51f7ac6ebb9adf889c79a261ecf541aa91"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:658f90550f38270639e83ce492f27d2c8d2cd63805c65a13a14d36ca126753f0"}, + {file = "regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:164d8b7b3b4bcb2068b97428060b2a53be050085ef94eca7f240e7947f1b080e"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3660c82f209655a06b587d55e723f0b813d3a7db2e32e5e7dc64ac2a9e86fde"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d22326fcdef5e08c154280b71163ced384b428343ae16a5ab2b3354aed12436e"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1ac758ef6aebfc8943560194e9fd0fa18bcb34d89fd8bd2af18183afd8da3a2"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:997d6a487ff00807ba810e0f8332c18b4eb8d29463cfb7c820dc4b6e7562d0cf"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:02a02d2bb04fec86ad61f3ea7f49c015a0681bf76abb9857f945d26159d2968c"}, + {file = "regex-2024.11.6-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:f02f93b92358ee3f78660e43b4b0091229260c5d5c408d17d60bf26b6c900e86"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:06eb1be98df10e81ebaded73fcd51989dcf534e3c753466e4b60c4697a003b67"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:040df6fe1a5504eb0f04f048e6d09cd7c7110fef851d7c567a6b6e09942feb7d"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabbfc59f2c6edba2a6622c647b716e34e8e3867e0ab975412c5c2f79b82da2"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8447d2d39b5abe381419319f942de20b7ecd60ce86f16a23b0698f22e1b70008"}, + {file = "regex-2024.11.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da8f5fc57d1933de22a9e23eec290a0d8a5927a5370d24bda9a6abe50683fe62"}, + {file = "regex-2024.11.6-cp310-cp310-win32.whl", hash = "sha256:b489578720afb782f6ccf2840920f3a32e31ba28a4b162e13900c3e6bd3f930e"}, + {file = "regex-2024.11.6-cp310-cp310-win_amd64.whl", hash = "sha256:5071b2093e793357c9d8b2929dfc13ac5f0a6c650559503bb81189d0a3814519"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5478c6962ad548b54a591778e93cd7c456a7a29f8eca9c49e4f9a806dcc5d638"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:2c89a8cc122b25ce6945f0423dc1352cb9593c68abd19223eebbd4e56612c5b7"}, + {file = "regex-2024.11.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:94d87b689cdd831934fa3ce16cc15cd65748e6d689f5d2b8f4f4df2065c9fa20"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1062b39a0a2b75a9c694f7a08e7183a80c63c0d62b301418ffd9c35f55aaa114"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:167ed4852351d8a750da48712c3930b031f6efdaa0f22fa1933716bfcd6bf4a3"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d548dafee61f06ebdb584080621f3e0c23fff312f0de1afc776e2a2ba99a74f"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f2a19f302cd1ce5dd01a9099aaa19cae6173306d1302a43b627f62e21cf18ac0"}, + {file = "regex-2024.11.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bec9931dfb61ddd8ef2ebc05646293812cb6b16b60cf7c9511a832b6f1854b55"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9714398225f299aa85267fd222f7142fcb5c769e73d7733344efc46f2ef5cf89"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:202eb32e89f60fc147a41e55cb086db2a3f8cb82f9a9a88440dcfc5d37faae8d"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:4181b814e56078e9b00427ca358ec44333765f5ca1b45597ec7446d3a1ef6e34"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:068376da5a7e4da51968ce4c122a7cd31afaaec4fccc7856c92f63876e57b51d"}, + {file = "regex-2024.11.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ac10f2c4184420d881a3475fb2c6f4d95d53a8d50209a2500723d831036f7c45"}, + {file = "regex-2024.11.6-cp311-cp311-win32.whl", hash = "sha256:c36f9b6f5f8649bb251a5f3f66564438977b7ef8386a52460ae77e6070d309d9"}, + {file = "regex-2024.11.6-cp311-cp311-win_amd64.whl", hash = "sha256:02e28184be537f0e75c1f9b2f8847dc51e08e6e171c6bde130b2687e0c33cf60"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:52fb28f528778f184f870b7cf8f225f5eef0a8f6e3778529bdd40c7b3920796a"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdd6028445d2460f33136c55eeb1f601ab06d74cb3347132e1c24250187500d9"}, + {file = "regex-2024.11.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805e6b60c54bf766b251e94526ebad60b7de0c70f70a4e6210ee2891acb70bf2"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b85c2530be953a890eaffde05485238f07029600e8f098cdf1848d414a8b45e4"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb26437975da7dc36b7efad18aa9dd4ea569d2357ae6b783bf1118dabd9ea577"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abfa5080c374a76a251ba60683242bc17eeb2c9818d0d30117b4486be10c59d3"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b7fa6606c2881c1db9479b0eaa11ed5dfa11c8d60a474ff0e095099f39d98e"}, + {file = "regex-2024.11.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c32f75920cf99fe6b6c539c399a4a128452eaf1af27f39bce8909c9a3fd8cbe"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:982e6d21414e78e1f51cf595d7f321dcd14de1f2881c5dc6a6e23bbbbd68435e"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a7c2155f790e2fb448faed6dd241386719802296ec588a8b9051c1f5c481bc29"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:149f5008d286636e48cd0b1dd65018548944e495b0265b45e1bffecce1ef7f39"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:e5364a4502efca094731680e80009632ad6624084aff9a23ce8c8c6820de3e51"}, + {file = "regex-2024.11.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0a86e7eeca091c09e021db8eb72d54751e527fa47b8d5787caf96d9831bd02ad"}, + {file = "regex-2024.11.6-cp312-cp312-win32.whl", hash = "sha256:32f9a4c643baad4efa81d549c2aadefaeba12249b2adc5af541759237eee1c54"}, + {file = "regex-2024.11.6-cp312-cp312-win_amd64.whl", hash = "sha256:a93c194e2df18f7d264092dc8539b8ffb86b45b899ab976aa15d48214138e81b"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a6ba92c0bcdf96cbf43a12c717eae4bc98325ca3730f6b130ffa2e3c3c723d84"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:525eab0b789891ac3be914d36893bdf972d483fe66551f79d3e27146191a37d4"}, + {file = "regex-2024.11.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:086a27a0b4ca227941700e0b31425e7a28ef1ae8e5e05a33826e17e47fbfdba0"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bde01f35767c4a7899b7eb6e823b125a64de314a8ee9791367c9a34d56af18d0"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b583904576650166b3d920d2bcce13971f6f9e9a396c673187f49811b2769dc7"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c4de13f06a0d54fa0d5ab1b7138bfa0d883220965a29616e3ea61b35d5f5fc7"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3cde6e9f2580eb1665965ce9bf17ff4952f34f5b126beb509fee8f4e994f143c"}, + {file = "regex-2024.11.6-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0d7f453dca13f40a02b79636a339c5b62b670141e63efd511d3f8f73fba162b3"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:59dfe1ed21aea057a65c6b586afd2a945de04fc7db3de0a6e3ed5397ad491b07"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b97c1e0bd37c5cd7902e65f410779d39eeda155800b65fc4d04cc432efa9bc6e"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f9d1e379028e0fc2ae3654bac3cbbef81bf3fd571272a42d56c24007979bafb6"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:13291b39131e2d002a7940fb176e120bec5145f3aeb7621be6534e46251912c4"}, + {file = "regex-2024.11.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4f51f88c126370dcec4908576c5a627220da6c09d0bff31cfa89f2523843316d"}, + {file = "regex-2024.11.6-cp313-cp313-win32.whl", hash = "sha256:63b13cfd72e9601125027202cad74995ab26921d8cd935c25f09c630436348ff"}, + {file = "regex-2024.11.6-cp313-cp313-win_amd64.whl", hash = "sha256:2b3361af3198667e99927da8b84c1b010752fa4b1115ee30beaa332cabc3ef1a"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:3a51ccc315653ba012774efca4f23d1d2a8a8f278a6072e29c7147eee7da446b"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ad182d02e40de7459b73155deb8996bbd8e96852267879396fb274e8700190e3"}, + {file = "regex-2024.11.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:ba9b72e5643641b7d41fa1f6d5abda2c9a263ae835b917348fc3c928182ad467"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40291b1b89ca6ad8d3f2b82782cc33807f1406cf68c8d440861da6304d8ffbbd"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cdf58d0e516ee426a48f7b2c03a332a4114420716d55769ff7108c37a09951bf"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a36fdf2af13c2b14738f6e973aba563623cb77d753bbbd8d414d18bfaa3105dd"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1cee317bfc014c2419a76bcc87f071405e3966da434e03e13beb45f8aced1a6"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:50153825ee016b91549962f970d6a4442fa106832e14c918acd1c8e479916c4f"}, + {file = "regex-2024.11.6-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ea1bfda2f7162605f6e8178223576856b3d791109f15ea99a9f95c16a7636fb5"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:df951c5f4a1b1910f1a99ff42c473ff60f8225baa1cdd3539fe2819d9543e9df"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:072623554418a9911446278f16ecb398fb3b540147a7828c06e2011fa531e773"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:f654882311409afb1d780b940234208a252322c24a93b442ca714d119e68086c"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:89d75e7293d2b3e674db7d4d9b1bee7f8f3d1609428e293771d1a962617150cc"}, + {file = "regex-2024.11.6-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:f65557897fc977a44ab205ea871b690adaef6b9da6afda4790a2484b04293a5f"}, + {file = "regex-2024.11.6-cp38-cp38-win32.whl", hash = "sha256:6f44ec28b1f858c98d3036ad5d7d0bfc568bdd7a74f9c24e25f41ef1ebfd81a4"}, + {file = "regex-2024.11.6-cp38-cp38-win_amd64.whl", hash = "sha256:bb8f74f2f10dbf13a0be8de623ba4f9491faf58c24064f32b65679b021ed0001"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:5704e174f8ccab2026bd2f1ab6c510345ae8eac818b613d7d73e785f1310f839"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:220902c3c5cc6af55d4fe19ead504de80eb91f786dc102fbd74894b1551f095e"}, + {file = "regex-2024.11.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5e7e351589da0850c125f1600a4c4ba3c722efefe16b297de54300f08d734fbf"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5056b185ca113c88e18223183aa1a50e66507769c9640a6ff75859619d73957b"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2e34b51b650b23ed3354b5a07aab37034d9f923db2a40519139af34f485f77d0"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5670bce7b200273eee1840ef307bfa07cda90b38ae56e9a6ebcc9f50da9c469b"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:08986dce1339bc932923e7d1232ce9881499a0e02925f7402fb7c982515419ef"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:93c0b12d3d3bc25af4ebbf38f9ee780a487e8bf6954c115b9f015822d3bb8e48"}, + {file = "regex-2024.11.6-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:764e71f22ab3b305e7f4c21f1a97e1526a25ebdd22513e251cf376760213da13"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:f056bf21105c2515c32372bbc057f43eb02aae2fda61052e2f7622c801f0b4e2"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:69ab78f848845569401469da20df3e081e6b5a11cb086de3eed1d48f5ed57c95"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:86fddba590aad9208e2fa8b43b4c098bb0ec74f15718bb6a704e3c63e2cef3e9"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:684d7a212682996d21ca12ef3c17353c021fe9de6049e19ac8481ec35574a70f"}, + {file = "regex-2024.11.6-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a03e02f48cd1abbd9f3b7e3586d97c8f7a9721c436f51a5245b3b9483044480b"}, + {file = "regex-2024.11.6-cp39-cp39-win32.whl", hash = "sha256:41758407fc32d5c3c5de163888068cfee69cb4c2be844e7ac517a52770f9af57"}, + {file = "regex-2024.11.6-cp39-cp39-win_amd64.whl", hash = "sha256:b2837718570f95dd41675328e111345f9b7095d821bac435aac173ac80b19983"}, + {file = "regex-2024.11.6.tar.gz", hash = "sha256:7ab159b063c52a0333c884e4679f8d7a85112ee3078fe3d9004b2dd875585519"}, ] [[package]] @@ -3275,13 +3309,13 @@ files = [ [[package]] name = "tqdm" -version = "4.66.6" +version = "4.67.0" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63"}, - {file = "tqdm-4.66.6.tar.gz", hash = "sha256:4bdd694238bef1485ce839d67967ab50af8f9272aab687c0d7702a01da0be090"}, + {file = "tqdm-4.67.0-py3-none-any.whl", hash = "sha256:0cd8af9d56911acab92182e88d763100d4788bdf421d251616040cc4d44863be"}, + {file = "tqdm-4.67.0.tar.gz", hash = "sha256:fe5a6f95e6fe0b9755e9469b77b9c3cf850048224ecaa8293d7d2d31f97d869a"}, ] [package.dependencies] @@ -3289,6 +3323,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +discord = ["requests"] notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] @@ -3539,13 +3574,13 @@ bracex = ">=2.1.1" [[package]] name = "werkzeug" -version = "3.1.2" +version = "3.1.3" description = "The comprehensive WSGI web application library." optional = false python-versions = ">=3.9" files = [ - {file = "werkzeug-3.1.2-py3-none-any.whl", hash = "sha256:4f7d1a5de312c810a8a2c6f0b47e9f6a7cffb7c8322def35e4d4d9841ff85597"}, - {file = "werkzeug-3.1.2.tar.gz", hash = "sha256:f471a4cd167233077e9d2a8190c3471c5bc520c636a9e3c1e9300c33bced03bc"}, + {file = "werkzeug-3.1.3-py3-none-any.whl", hash = "sha256:54b78bf3716d19a65be4fceccc0d1d7b89e608834989dfae50ea87564639213e"}, + {file = "werkzeug-3.1.3.tar.gz", hash = "sha256:60723ce945c19328679790e3282cc758aa4a6040e4bb330f53d30fa546d44746"}, ] [package.dependencies] @@ -3646,16 +3681,16 @@ files = [ [[package]] name = "xmltodict" -version = "0.14.2" +version = "0.13.0" description = "Makes working with XML feel like you are working with JSON" optional = false -python-versions = ">=3.6" +python-versions = ">=3.4" files = [ - {file = "xmltodict-0.14.2-py2.py3-none-any.whl", hash = "sha256:20cc7d723ed729276e808f26fb6b3599f786cbc37e06c65e192ba77c40f20aac"}, - {file = "xmltodict-0.14.2.tar.gz", hash = "sha256:201e7c28bb210e374999d1dde6382923ab0ed1a8a5faeece48ab525b7810a553"}, + {file = "xmltodict-0.13.0-py2.py3-none-any.whl", hash = "sha256:aa89e8fd76320154a40d19a0df04a4695fb9dc5ba977cbb68ab3e4eb225e7852"}, + {file = "xmltodict-0.13.0.tar.gz", hash = "sha256:341595a488e3e01a85a9d8911d8912fd922ede5fecc4dce437eb4b6c8d037e56"}, ] [metadata] lock-version = "2.0" python-versions = "^3.10,<3.12" -content-hash = "991c6a8c2048db0182ec59fc540454f25d0cd6973595731baf1aa0509f369e86" +content-hash = "245a6af0b3725c6c360b99a653210c751a518e973925d9c094fad0ce253e52d7" diff --git a/airbyte-integrations/connectors/source-s3/pyproject.toml b/airbyte-integrations/connectors/source-s3/pyproject.toml index 08c61b874fc4..e038af3f9f9d 100644 --- a/airbyte-integrations/connectors/source-s3/pyproject.toml +++ b/airbyte-integrations/connectors/source-s3/pyproject.toml @@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",] build-backend = "poetry.core.masonry.api" [tool.poetry] -version = "4.9.2" +version = "4.10.1" name = "source-s3" description = "Source implementation for S3." authors = [ "Airbyte ",] @@ -22,7 +22,7 @@ wcmatch = "==8.4" dill = "==0.3.4" transformers = "4.38.2" urllib3 = "<2" -airbyte-cdk = {extras = ["file-based"], version = "^5.7.4"} +airbyte-cdk = {extras = ["file-based"], version = "^6.5.2"} [tool.poetry.scripts] source-s3 = "source_s3.run:run" diff --git a/airbyte-integrations/connectors/source-s3/source_s3/v4/config.py b/airbyte-integrations/connectors/source-s3/source_s3/v4/config.py index 0fafd9a5ae52..5af899aea95c 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/v4/config.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/v4/config.py @@ -6,7 +6,7 @@ import dpath.util from airbyte_cdk import is_cloud_environment -from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec +from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec, DeliverRawFiles, DeliverRecords from pydantic.v1 import AnyUrl, Field, root_validator from pydantic.v1.error_wrappers import ValidationError @@ -64,6 +64,16 @@ def documentation_url(cls) -> AnyUrl: order=5, ) + delivery_method: DeliverRecords | DeliverRawFiles = Field( + title="Delivery Method", + discriminator="delivery_type", + type="object", + order=6, + display_type="radio", + group="advanced", + default="use_records_transfer", + ) + @root_validator def validate_optional_args(cls, values): aws_access_key_id = values.get("aws_access_key_id") diff --git a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py index 62d7376e861b..b92cf14077b6 100644 --- a/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py +++ b/airbyte-integrations/connectors/source-s3/source_s3/v4/stream_reader.py @@ -3,17 +3,19 @@ # import logging +import time from datetime import datetime from io import IOBase -from os import getenv -from typing import Iterable, List, Optional, Set +from os import getenv, makedirs, path +from typing import Dict, Iterable, List, Optional, Set, cast import boto3.session import pendulum +import psutil import pytz import smart_open from airbyte_cdk import FailureType -from airbyte_cdk.sources.file_based.exceptions import CustomFileBasedException, ErrorListingFiles, FileBasedSourceError +from airbyte_cdk.sources.file_based.exceptions import CustomFileBasedException, ErrorListingFiles, FileBasedSourceError, FileSizeLimitError from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode from airbyte_cdk.sources.file_based.remote_file import RemoteFile from botocore.client import BaseClient @@ -23,11 +25,14 @@ from botocore.session import get_session from source_s3.v4.config import Config from source_s3.v4.zip_reader import DecompressedStream, RemoteFileInsideArchive, ZipContentReader, ZipFileHandler +from typing_extensions import override AWS_EXTERNAL_ID = getenv("AWS_ASSUME_ROLE_EXTERNAL_ID") class SourceS3StreamReader(AbstractFileBasedStreamReader): + FILE_SIZE_LIMIT = 1_000_000_000 + def __init__(self): super().__init__() self._s3_client = None @@ -183,6 +188,84 @@ def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str # we can simply return the result here as it is a context manager itself that will release all resources return result + @staticmethod + def create_progress_handler(file_size: int, local_file_path: str, logger: logging.Logger): + previous_bytes_checkpoint = 0 + total_bytes_transferred = 0 + + def progress_handler(bytes_transferred: int): + nonlocal previous_bytes_checkpoint, total_bytes_transferred + total_bytes_transferred += bytes_transferred + if total_bytes_transferred - previous_bytes_checkpoint >= 100 * 1024 * 1024: + logger.info( + f"{total_bytes_transferred / (1024 * 1024):,.2f} MB ({total_bytes_transferred / (1024 * 1024 * 1024):.2f} GB) " + f"of {file_size / (1024 * 1024):,.2f} MB ({file_size / (1024 * 1024 * 1024):.2f} GB) " + f"written to {local_file_path}" + ) + previous_bytes_checkpoint = total_bytes_transferred + + # Get available disk space + disk_usage = psutil.disk_usage("/") + available_disk_space = disk_usage.free + + # Get available memory + memory_info = psutil.virtual_memory() + available_memory = memory_info.available + logger.info( + f"Available disk space: {available_disk_space / (1024 * 1024):,.2f} MB ({available_disk_space / (1024 * 1024 * 1024):.2f} GB), " + f"available memory: {available_memory / (1024 * 1024):,.2f} MB ({available_memory / (1024 * 1024 * 1024):.2f} GB)." + ) + + return progress_handler + + @override + def get_file(self, file: RemoteFile, local_directory: str, logger: logging.Logger) -> Dict[str, str | int]: + """ + Downloads a file from an S3 bucket to a specified local directory. + + Args: + file (RemoteFile): The remote file object containing URI and metadata. + local_directory (str): The local directory path where the file will be downloaded. + logger (logging.Logger): Logger for logging information and errors. + + Returns: + dict: A dictionary containing the following: + - "file_url" (str): The absolute path of the downloaded file. + - "bytes" (int): The file size in bytes. + - "file_relative_path" (str): The relative path of the file for local storage. Is relative to local_directory as + this a mounted volume in the pod container. + + Raises: + FileSizeLimitError: If the file size exceeds the predefined limit (1 GB). + """ + file_size = self.file_size(file) + # I'm putting this check here so we can remove the safety wheels per connector when ready. + if file_size > self.FILE_SIZE_LIMIT: + message = "File size exceeds the 1 GB limit." + raise FileSizeLimitError(message=message, internal_message=message, failure_type=FailureType.config_error) + + file_relative_path, local_file_path, absolute_file_path = self._get_file_transfer_paths(file, local_directory) + + logger.info( + f"Starting to download the file {file.uri} with size: {file_size / (1024 * 1024):,.2f} MB ({file_size / (1024 * 1024 * 1024):.2f} GB)" + ) + # at some moment maybe we will require to play with the max_pool_connections and max_concurrency of s3 config + start_download_time = time.time() + progress_handler = self.create_progress_handler(file_size, local_file_path, logger) + self.s3_client.download_file(self.config.bucket, file.uri, local_file_path, Callback=progress_handler) + write_duration = time.time() - start_download_time + logger.info(f"Finished downloading the file {file.uri} and saved to {local_file_path} in {write_duration:,.2f} seconds.") + + return {"file_url": absolute_file_path, "bytes": file_size, "file_relative_path": file_relative_path} + + @override + def file_size(self, file: RemoteFile) -> int: + s3_object: boto3.s3.Object = self.s3_client.get_object( + Bucket=self.config.bucket, + Key=file.uri, + ) + return cast(int, s3_object["ContentLength"]) + @staticmethod def _is_folder(file) -> bool: return file["Key"].endswith("/") diff --git a/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_stream_reader.py b/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_stream_reader.py index 37de3ec1bf08..02e76e5790ea 100644 --- a/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_stream_reader.py +++ b/airbyte-integrations/connectors/source-s3/unit_tests/v4/test_stream_reader.py @@ -8,7 +8,7 @@ from datetime import datetime, timedelta from itertools import product from typing import Any, Dict, List, Optional, Set -from unittest.mock import MagicMock, patch +from unittest.mock import ANY, MagicMock, Mock, patch import pytest from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec @@ -242,6 +242,35 @@ def test_open_file_calls_any_open_with_the_right_encoding(smart_open_mock): assert smart_open_mock.call_args.kwargs["encoding"] == encoding +@patch("source_s3.v4.stream_reader.SourceS3StreamReader.file_size") +@patch("boto3.client") +def test_get_file(mock_boto_client, s3_reader_file_size_mock): + s3_reader_file_size_mock.return_value = 100 + + mock_s3_client_instance = Mock() + mock_boto_client.return_value = mock_s3_client_instance + mock_s3_client_instance.download_file.return_value = None + + reader = SourceS3StreamReader() + reader.config = Config(bucket="test", aws_access_key_id="test", aws_secret_access_key="test", streams=[], delivery_method= { "delivery_type": "use_file_transfer" }) + try: + reader.config = Config( + bucket="test", + aws_access_key_id="test", + aws_secret_access_key="test", + streams=[], + endpoint=None, + delivery_method={"delivery_type": "use_file_transfer"} + ) + except Exception as exc: + raise exc + test_file_path = "directory/file.txt" + result = reader.get_file(RemoteFile(uri="", last_modified=datetime.now()), test_file_path, logger) + + assert result == {'bytes': 100, 'file_relative_path': ANY, 'file_url': ANY} + assert result["file_url"].endswith(test_file_path) + + def test_get_s3_client_without_config_raises_exception(): with pytest.raises(ValueError): SourceS3StreamReader().s3_client diff --git a/docs/cloud/managing-airbyte-cloud/manage-data-residency.md b/docs/cloud/managing-airbyte-cloud/manage-data-residency.md index bbec07165edc..b3ae4df1e20e 100644 --- a/docs/cloud/managing-airbyte-cloud/manage-data-residency.md +++ b/docs/cloud/managing-airbyte-cloud/manage-data-residency.md @@ -37,3 +37,9 @@ To choose a custom data residency for your connection, click **Connections** in Changes to data residency will not affect any sync in progress. ::: + +## Connector Builder data residency + +The Connector Builder currently processes all data through US data planes, regardless of your workspace's default data residency settings. This limitation applies to the development and testing of connectors within the builder interface. + +If your use case requires strict data residency compliance outside the US, you can still publish a custom connector from the builder which will respect your workspace's data residency settings during syncs. However, you will be unable to verify the connector's behavior within the builder itself. diff --git a/docs/connector-development/config-based/understanding-the-yaml-file/partition-router.md b/docs/connector-development/config-based/understanding-the-yaml-file/partition-router.md index 357918cbab79..f4b8891c873f 100644 --- a/docs/connector-development/config-based/understanding-the-yaml-file/partition-router.md +++ b/docs/connector-development/config-based/understanding-the-yaml-file/partition-router.md @@ -119,7 +119,7 @@ Example: ```yaml partition_router: type: SubstreamPartitionRouter - parent_streams_configs: + parent_stream_configs: - stream: "#/repositories_stream" parent_key: "id" partition_field: "repository" diff --git a/docs/integrations/sources/apple-search-ads.md b/docs/integrations/sources/apple-search-ads.md index fad6891fe51c..97a4050a1d7d 100644 --- a/docs/integrations/sources/apple-search-ads.md +++ b/docs/integrations/sources/apple-search-ads.md @@ -60,6 +60,7 @@ However, at this moment and as indicated in the stream names, the connector only | Version | Date | Pull Request | Subject | | :------ | :--------- | :------------------------------------------------------- | :----------------------------------------------------------------------------------- | +| 0.2.1 | 2024-11-08 | [48440](https://github.com/airbytehq/airbyte/pull/48440) | Set authentication grant_type to client_credentials | | 0.2.0 | 2024-10-01 | [46288](https://github.com/airbytehq/airbyte/pull/46288) | Migrate to Manifest-only | | 0.1.20 | 2024-09-28 | [46153](https://github.com/airbytehq/airbyte/pull/46153) | Update dependencies | | 0.1.19 | 2024-09-21 | [45803](https://github.com/airbytehq/airbyte/pull/45803) | Update dependencies | diff --git a/docs/integrations/sources/klaviyo.md b/docs/integrations/sources/klaviyo.md index 6189539b9283..cd6a81074388 100644 --- a/docs/integrations/sources/klaviyo.md +++ b/docs/integrations/sources/klaviyo.md @@ -95,6 +95,7 @@ contain the `predictive_analytics` field and workflows depending on this field w | Version | Date | Pull Request | Subject | |:--------|:-----------|:-----------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------| +| 2.10.14 | 2024-11-07 | [48391](https://github.com/airbytehq/airbyte/pull/48391) | Remove custom datetime cursor dependency | | 2.10.13 | 2024-11-05 | [48331](https://github.com/airbytehq/airbyte/pull/48331) | Update dependencies | | 2.10.12 | 2024-10-29 | [47797](https://github.com/airbytehq/airbyte/pull/47797) | Update dependencies | | 2.10.11 | 2024-10-28 | [47043](https://github.com/airbytehq/airbyte/pull/47043) | Update dependencies | diff --git a/docs/integrations/sources/monday.md b/docs/integrations/sources/monday.md index e95b86419cdf..8468981df331 100644 --- a/docs/integrations/sources/monday.md +++ b/docs/integrations/sources/monday.md @@ -77,6 +77,7 @@ The Monday connector should not run into Monday API limitations under normal usa | Version | Date | Pull Request | Subject | | :------ | :--------- | :-------------------------------------------------------- | :------------------------------------------------------------------------------------------------ | +| 2.1.5 | 2024-10-31 | [48054](https://github.com/airbytehq/airbyte/pull/48054) | Moved to `DeclarativeOAuthFlow` specification | | 2.1.4 | 2024-08-17 | [44201](https://github.com/airbytehq/airbyte/pull/44201) | Add boards name to the `items` stream | | 2.1.3 | 2024-06-04 | [38958](https://github.com/airbytehq/airbyte/pull/38958) | [autopull] Upgrade base image to v1.2.1 | | 2.1.2 | 2024-04-30 | [37722](https://github.com/airbytehq/airbyte/pull/37722) | Fetch `display_value` field for column values of `Mirror`, `Dependency` and `Connect Board` types | diff --git a/docs/integrations/sources/s3.md b/docs/integrations/sources/s3.md index 2ee4725db13f..3bb1640a3ab3 100644 --- a/docs/integrations/sources/s3.md +++ b/docs/integrations/sources/s3.md @@ -141,6 +141,28 @@ All other fields are optional and can be left empty. Refer to the [S3 Provider S 3. On the Set up the source page, select S3 from the Source type dropdown. 4. Enter a name for the S3 connector. +#### Copy Raw Files Configuration + + + +:::info + +The raw file replication feature has the following requirements and limitations: +- **Supported Airbyte Versions:** + - Cloud: All Workspaces + - OSS / Enterprise: `v1.2.0` or later +- **Max File Size:** `1GB` per file +- **Supported Destinations:** + - S3: `v1.4.0` or later + +::: + +Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files. + +Format options will not be taken into account. Instead, files will be transferred to the file-based destination without parsing underlying data. + + + ## Supported sync modes The S3 source connector supports the following [sync modes](https://docs.airbyte.com/cloud/core-concepts/#connection-sync-modes): @@ -339,6 +361,7 @@ This connector utilizes the open source [Unstructured](https://unstructured-io.g | Version | Date | Pull Request | Subject | |:--------|:-----------|:----------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------| +| 4.10.1 | 2024-11-12 | [48346](https://github.com/airbytehq/airbyte/pull/48346) | Implement file-transfer capabilities | | 4.9.2 | 2024-11-04 | [48259](https://github.com/airbytehq/airbyte/pull/48259) | Update dependencies | | 4.9.1 | 2024-10-29 | [47038](https://github.com/airbytehq/airbyte/pull/47038) | Update dependencies | | 4.9.0 | 2024-10-17 | [46973](https://github.com/airbytehq/airbyte/pull/46973) | Promote releae candidate. |