Skip to content

Commit

Permalink
Feat: Incorporate SDM in CDK and add publish workflow (#58)
Browse files Browse the repository at this point in the history
Co-authored-by: Aaron Steers <[email protected]>
Co-authored-by: octavia-squidington-iii <[email protected]>
Co-authored-by: Augustin <[email protected]>
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
  • Loading branch information
5 people authored Nov 19, 2024
1 parent 72117aa commit 1294b43
Show file tree
Hide file tree
Showing 9 changed files with 377 additions and 3 deletions.
93 changes: 93 additions & 0 deletions .github/workflows/cdk-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
name: Publish CDK and Source Declarative Manifest
on:
push:
paths:
- 'airbyte_cdk/pyproject.toml' # To only publish on CDK version change
- 'Dockerfile'
workflow_dispatch:

jobs:
test:
name: Test Source Declarative Manifest Docker Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up QEMU for multi-platform builds
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Build test image
uses: docker/build-push-action@v5
with:
context: .
platforms: linux/amd64 # Just build for the runner's architecture during test
load: true
tags: airbyte/source-declarative-manifest:build-test

- name: Test image
run: |
docker run airbyte/source-declarative-manifest:build-test spec
- name: Scan for vulnerabilities
uses: aquasecurity/trivy-action@master
continue-on-error: true # Prevent security scan from failing the build
with:
image-ref: airbyte/source-declarative-manifest:build-test
format: 'table,sarif'
output: 'trivy-results.sarif'
exit-code: 1
severity: 'CRITICAL,HIGH'
timeout: '5m'

publish:
name: Publish SDM Docker Image
runs-on: ubuntu-latest
needs: test
if: ${{ success() && (github.ref == 'refs/heads/main' || github.event_name == 'workflow_dispatch') }}
permissions:
id-token: write # Required for trusted publishing
contents: write # Required for artifact uploads
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up QEMU for multi-platform builds
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_PASSWORD }}

- name: Get CDK version
run: |
cdk_version="$(poetry version --short | tr -d '[:space:]')"
echo "CDK_VERSION=$cdk_version" >> $GITHUB_ENV
- name: Check if tag already exists
run: |
tag="airbyte/source-declarative-manifest:${{ env.CDK_VERSION}}-${{ github.run_number }}"
if DOCKER_CLI_EXPERIMENTAL=enabled docker manifest inspect "$tag" > /dev/null 2>&1; then
echo "The tag $tag already exists on Dockerhub. Skipping publish to prevent overwrite."
exit 1
fi
- name: Build and push
uses: docker/build-push-action@v5
with:
context: .
platforms: linux/amd64,linux/arm64
push: true
tags: |
airbyte/source-declarative-manifest:latest
airbyte/source-declarative-manifest:${{ env.CDK_VERSION }}
airbyte/source-declarative-manifest:${{ env.CDK_VERSION }}-${{ github.run_number }}
6 changes: 6 additions & 0 deletions .github/workflows/connector-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ jobs:
cdk_extra: vector-db-based
- connector: destination-motherduck
cdk_extra: sql
# TODO: These are manifest connectors and won't work as expected until we
# add `--use-local-cdk` support for manifest connectors.
- connector: source-the-guardian-api
cdk_extra: n/a
- connector: source-pokeapi
cdk_extra: n/a

name: "Check: '${{matrix.connector}}' (skip=${{needs.cdk_changes.outputs[matrix.cdk_extra] == 'false'}})"
steps:
Expand Down
18 changes: 18 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM docker.io/airbyte/python-connector-base:2.0.0@sha256:c44839ba84406116e8ba68722a0f30e8f6e7056c726f447681bb9e9ece8bd916

WORKDIR /airbyte/integration_code

# Copy project files needed for build
COPY pyproject.toml poetry.lock README.md ./

# Install dependencies - ignore keyring warnings
RUN poetry config virtualenvs.create false \
&& poetry install --only main --no-interaction --no-ansi || true

# Copy source code
COPY airbyte_cdk ./airbyte_cdk

# Build and install the package
RUN poetry build && pip install dist/*.whl

ENTRYPOINT ["poetry", "run", "source-declarative-manifest"]
1 change: 1 addition & 0 deletions airbyte_cdk/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
6 changes: 6 additions & 0 deletions airbyte_cdk/cli/source_declarative_manifest/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from airbyte_cdk.cli.source_declarative_manifest._run import run


__all__ = [
"run",
]
223 changes: 223 additions & 0 deletions airbyte_cdk/cli/source_declarative_manifest/_run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
"""Defines the `source-declarative-manifest` connector, which installs alongside CDK.
This file was originally imported from the dedicated connector directory, under the
`airbyte` monorepo.
Usage:
```
pipx install airbyte-cdk
source-declarative-manifest --help
source-declarative-manifest spec
...
```
"""

from __future__ import annotations

import json
import pkgutil
import sys
import traceback
from collections.abc import Mapping
from datetime import datetime
from pathlib import Path
from typing import Any, cast

from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
from airbyte_cdk.models import (
AirbyteErrorTraceMessage,
AirbyteMessage,
AirbyteMessageSerializer,
AirbyteStateMessage,
AirbyteTraceMessage,
ConfiguredAirbyteCatalog,
ConnectorSpecificationSerializer,
TraceType,
Type,
)
from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
ConcurrentDeclarativeSource,
)
from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
from airbyte_cdk.sources.source import TState
from orjson import orjson


class SourceLocalYaml(YamlDeclarativeSource):
"""
Declarative source defined by a yaml file in the local filesystem
"""

def __init__(
self,
catalog: ConfiguredAirbyteCatalog | None,
config: Mapping[str, Any] | None,
state: TState,
**kwargs: Any,
) -> None:
"""
HACK!
Problem: YamlDeclarativeSource relies on the calling module name/path to find the yaml file.
Implication: If you call YamlDeclarativeSource directly it will look for the yaml file in the wrong place. (e.g. the airbyte-cdk package)
Solution: Subclass YamlDeclarativeSource from the same location as the manifest to load.
When can we remove this?
When the airbyte-cdk is updated to not rely on the calling module name/path to find the yaml file.
When all manifest connectors are updated to use the new airbyte-cdk.
When all manifest connectors are updated to use the source-declarative-manifest as the base image.
"""
super().__init__(
catalog=catalog,
config=config,
state=state,
path_to_yaml="manifest.yaml",
)


def _is_local_manifest_command(args: list[str]) -> bool:
# Check for a local manifest.yaml file
return Path("/airbyte/integration_code/source_declarative_manifest/manifest.yaml").exists()


def handle_command(args: list[str]) -> None:
if _is_local_manifest_command(args):
handle_local_manifest_command(args)
else:
handle_remote_manifest_command(args)


def _get_local_yaml_source(args: list[str]) -> SourceLocalYaml:
try:
config, catalog, state = _parse_inputs_into_config_catalog_state(args)
return SourceLocalYaml(config=config, catalog=catalog, state=state)
except Exception as error:
print(
orjson.dumps(
AirbyteMessageSerializer.dump(
AirbyteMessage(
type=Type.TRACE,
trace=AirbyteTraceMessage(
type=TraceType.ERROR,
emitted_at=int(datetime.now().timestamp() * 1000),
error=AirbyteErrorTraceMessage(
message=f"Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance. Error: {error}",
stack_trace=traceback.format_exc(),
),
),
)
)
).decode()
)
raise error


def handle_local_manifest_command(args: list[str]) -> None:
source = _get_local_yaml_source(args)
launch(
source=source,
args=args,
)


def handle_remote_manifest_command(args: list[str]) -> None:
"""Overrides the spec command to return the generalized spec for the declarative manifest source.
This is different from a typical low-code, but built and published separately source built as a ManifestDeclarativeSource,
because that will have a spec method that returns the spec for that specific source. Other than spec,
the generalized connector behaves the same as any other, since the manifest is provided in the config.
"""
if args[0] == "spec":
json_spec = pkgutil.get_data(
"airbyte_cdk.cli.source_declarative_manifest",
"spec.json",
)
if json_spec is None:
raise FileNotFoundError(
"Could not find `spec.json` file for source-declarative-manifest"
)

spec_obj = json.loads(json_spec)
spec = ConnectorSpecificationSerializer.load(spec_obj)

message = AirbyteMessage(type=Type.SPEC, spec=spec)
print(AirbyteEntrypoint.airbyte_message_to_string(message))
else:
source = create_declarative_source(args)
launch(
source=source,
args=args,
)


def create_declarative_source(args: list[str]) -> ConcurrentDeclarativeSource:
"""Creates the source with the injected config.
This essentially does what other low-code sources do at build time, but at runtime,
with a user-provided manifest in the config. This better reflects what happens in the
connector builder.
"""
try:
config, catalog, state = _parse_inputs_into_config_catalog_state(args)
if "__injected_declarative_manifest" not in config:
raise ValueError(
f"Invalid config: `__injected_declarative_manifest` should be provided at the root of the config but config only has keys {list(config.keys())}"
)
return ConcurrentDeclarativeSource(
config=config,
catalog=catalog,
state=state,
source_config=cast(dict[str, Any], config["__injected_declarative_manifest"]),
)
except Exception as error:
print(
orjson.dumps(
AirbyteMessageSerializer.dump(
AirbyteMessage(
type=Type.TRACE,
trace=AirbyteTraceMessage(
type=TraceType.ERROR,
emitted_at=int(datetime.now().timestamp() * 1000),
error=AirbyteErrorTraceMessage(
message=f"Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance. Error: {error}",
stack_trace=traceback.format_exc(),
),
),
)
)
).decode()
)
raise error


def _parse_inputs_into_config_catalog_state(
args: list[str],
) -> tuple[
Mapping[str, Any] | None,
ConfiguredAirbyteCatalog | None,
list[AirbyteStateMessage],
]:
parsed_args = AirbyteEntrypoint.parse_args(args)
config = (
ConcurrentDeclarativeSource.read_config(parsed_args.config)
if hasattr(parsed_args, "config")
else None
)
catalog = (
ConcurrentDeclarativeSource.read_catalog(parsed_args.catalog)
if hasattr(parsed_args, "catalog")
else None
)
state = (
ConcurrentDeclarativeSource.read_state(parsed_args.state)
if hasattr(parsed_args, "state")
else []
)

return config, catalog, state


def run() -> None:
args: list[str] = sys.argv[1:]
handle_command(args)
17 changes: 17 additions & 0 deletions airbyte_cdk/cli/source_declarative_manifest/spec.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"documentationUrl": "https://docs.airbyte.com/integrations/sources/low-code",
"connectionSpecification": {
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Low-code source spec",
"type": "object",
"required": ["__injected_declarative_manifest"],
"additionalProperties": true,
"properties": {
"__injected_declarative_manifest": {
"title": "Low-code manifest",
"type": "object",
"description": "The low-code manifest that defines the components of the source."
}
}
}
}
Loading

0 comments on commit 1294b43

Please sign in to comment.