From 20017caa56498cf4866e7aa6d3b54e9e449a6529 Mon Sep 17 00:00:00 2001 From: Dan Kolbman Date: Sat, 17 Nov 2018 19:44:27 -0500 Subject: [PATCH 1/2] conditionaly import backend library based on destination --- operators/github_to_cloud_storage_operator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/operators/github_to_cloud_storage_operator.py b/operators/github_to_cloud_storage_operator.py index 69563b4..c6c0e6a 100755 --- a/operators/github_to_cloud_storage_operator.py +++ b/operators/github_to_cloud_storage_operator.py @@ -5,7 +5,6 @@ from airflow.utils.decorators import apply_defaults from airflow.models import BaseOperator -from airflow.hooks import S3Hook, GoogleCloudStorageHook from github_plugin.hooks.github_hook import GithubHook @@ -112,6 +111,7 @@ def output_manager(self, output): output = '\n'.join([json.dumps(flatten(record)) for record in output]) if self.destination.lower() == 's3': + from airflow.hooks import S3Hook s3 = S3Hook(self.dest_conn_id) s3.load_string( @@ -124,6 +124,7 @@ def output_manager(self, output): s3.connection.close() elif self.destination.lower() == 'gcs': + from airflow.hooks import GoogleCloudStorageHook with NamedTemporaryFile('w') as tmp: tmp.write(output) From ff172d9afba5de3c39705d0a316e58ae733277f7 Mon Sep 17 00:00:00 2001 From: Dan Kolbman Date: Sat, 17 Nov 2018 19:44:47 -0500 Subject: [PATCH 2/2] update readme with current operator arguments --- README.md | 52 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 44e3d41..512fba1 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Github Plugin -This plugin moves data from the Github API to Google Cloud Storage based on the specified object. +This plugin moves data from the Github API to S3 or Google Cloud Storage based on the specified object. ## Hooks ### GithubHook @@ -10,18 +10,40 @@ Core Airflow S3Hook with the standard boto dependency. ## Operators ### GithubtoCloudStorageOperator -This operator composes the logic for this plugin. It fetches the Github specified object and saves the result in GCS. The parameters it can accept include the following: -```:param src: Path to the local file. (templated) - :type src: str - :param dst: Destination path within the specified bucket. (templated) - :type dst: str - :param bucket: The bucket to upload to. (templated) - :type bucket: str - :param google_cloud_storage_conn_id: The Airflow connection ID to upload with - :type google_cloud_storage_conn_id: str - :param mime_type: The mime-type string - :type mime_type: str - :param delegate_to: The account to impersonate, if any - :type delegate_to: str - :param gzip: Allows for file to be compressed and uploaded as gzip +This operator composes the logic for this plugin. It fetches the Github specified object and saves the result in GCS or S3. The parameters it can accept include the following: +``` +:param github_conn_id: The Github connection id. +:type github_conn_id: string +:param github_org: The Github organization. +:type github_org: string +:param github_repo: The Github repository. Required for + commits, commit_comments, issue_comments, + and issues objects. +:type github_repo: string +:param github_object: The desired Github object. The currently + supported values are: + - commits + - commit_comments + - issue_comments + - issues + - members + - organizations + - pull_requests + - repositories +:type github_object: string +:param payload: The associated github parameters to + pass into the object request as + keyword arguments. +:type payload: dict +:param destination: The final destination where the data + should be stored. Possible values include: + - GCS + - S3 +:type destination: string +:param dest_conn_id: The destination connection id. +:type dest_conn_id: string +:param bucket: The bucket to be used to store the data. +:type bucket: string +:param key: The filename to be used to store the data. +:type key: string ```