From f657d3c2008d3d517c333ef4d54a714e33b4ea47 Mon Sep 17 00:00:00 2001 From: wersly Date: Wed, 23 Feb 2022 17:59:30 -0500 Subject: [PATCH] feat: add streams for group and project variables (#64) * Add support for Group Variables and Project Variables * Update README * Add config for fetching group/project variables - defaults both to False * Update README * Fixup trailing commas Co-authored-by: Warren Ersly Co-authored-by: Warren Ersly --- README.md | 10 ++++- setup.py | 2 + tap_gitlab/__init__.py | 45 +++++++++++++++++++++-- tap_gitlab/schemas/group_variables.json | 26 +++++++++++++ tap_gitlab/schemas/project_variables.json | 26 +++++++++++++ 5 files changed, 105 insertions(+), 4 deletions(-) create mode 100644 tap_gitlab/schemas/group_variables.json create mode 100644 tap_gitlab/schemas/project_variables.json diff --git a/README.md b/README.md index 7ee6833..0ca5f60 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ This tap: - [Epics](https://docs.gitlab.com/ee/api/epics.html) (only available for GitLab Ultimate and GitLab.com Gold accounts) - [Epic Issues](https://docs.gitlab.com/ee/api/epic_issues.html) (only available for GitLab Ultimate and GitLab.com Gold accounts) - [Vulnerabilities](https://docs.gitlab.com/ee/api/project_vulnerabilities.html) + - [Group Variables](https://docs.gitlab.com/ee/api/group_level_variables.html) + - [Project Variables](https://docs.gitlab.com/ee/api/project_level_variables.html) - Outputs the schema for each resource - Incrementally pulls data based on the input state @@ -68,7 +70,9 @@ pip install git+https://gitlab.com/meltano/tap-gitlab.git "start_date": "2018-01-01T00:00:00Z", "ultimate_license": true, "fetch_merge_request_commits": false, - "fetch_pipelines_extended": false + "fetch_pipelines_extended": false, + "fetch_group_variables": false, + "fetch_project_variables": false } ``` @@ -80,6 +84,10 @@ pip install git+https://gitlab.com/meltano/tap-gitlab.git If `fetch_pipelines_extended` is true (defaults to false), then for every Pipeline fetched with `sync_pipelines` (which returns N pages containing all pipelines per project), also fetch extended details of each of these pipelines with `sync_pipelines_extended`. Similar concerns as those related to `fetch_merge_request_commits` apply here - every pipeline fetched with `sync_pipelines_extended` requires a separate API call. + If `fetch_group_variables` is true (defaults to false), then Group-level CI/CD variables will be retrieved for each available / specified group. This feature is treated as an opt-in to prevent users from accidentally extracting any potential secrets stored as Group-level CI/CD variables. + + If `fetch_project_variables` is true (defaults to false), then Project-level CI/CD variables will be retrieved for each available / specified project. This feature is treated as an opt-in to prevent users from accidentally extracting any potential secrets stored as Project-level CI/CD variables. + 4. [Optional] Create the initial state file You can provide JSON file that contains a date for the API endpoints diff --git a/setup.py b/setup.py index 9d8a23b..43e6415 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,8 @@ "tags.json", "releases.json", "vulnerabilities.json", + "project_variables.json", + "group_variables.json" ], }, include_package_data=True, diff --git a/tap_gitlab/__init__.py b/tap_gitlab/__init__.py index a5cc279..eede72c 100644 --- a/tap_gitlab/__init__.py +++ b/tap_gitlab/__init__.py @@ -23,7 +23,9 @@ 'groups': '', 'ultimate_license': False, 'fetch_merge_request_commits': False, - 'fetch_pipelines_extended': False + 'fetch_pipelines_extended': False, + 'fetch_group_variables': False, + 'fetch_project_variables': False, } STATE = {} CATALOG = None @@ -191,10 +193,27 @@ def load_schema(entity): 'key_properties': ['id'], 'replication_method': 'FULL_TABLE', }, + 'project_variables': { + 'url': '/projects/{id}/variables', + 'schema': load_schema('project_variables'), + 'key_properties': ['project_id', 'key'], + 'replication_method': 'FULL_TABLE', + }, + 'group_variables': { + 'url': '/groups/{id}/variables', + 'schema': load_schema('group_variables'), + 'key_properties': ['group_id', 'key'], + 'replication_method': 'FULL_TABLE', + } } ULTIMATE_RESOURCES = ("epics", "epic_issues") -STREAM_CONFIG_SWITCHES = ('merge_request_commits', 'pipelines_extended') +STREAM_CONFIG_SWITCHES = ( + 'merge_request_commits', + 'pipelines_extended', + 'group_variables', + 'project_variables', +) LOGGER = singer.get_logger() SESSION = requests.Session() @@ -649,7 +668,7 @@ def sync_group(gid, pids): if not pids: # Get all the projects of the group if none are provided - group_projects_url = get_url(entity="group_projects", id=gid) + group_projects_url = get_url(entity="group_projects", id=gid) for project in gen_request(group_projects_url): if project["id"]: sync_project(project["id"]) @@ -665,6 +684,8 @@ def sync_group(gid, pids): sync_labels(data, "group") + sync_variables(data, "group") + if CONFIG['ultimate_license']: sync_epics(data) @@ -758,6 +779,21 @@ def sync_jobs(project, pipeline): transformed_row = transformer.transform(row, RESOURCES[entity]['schema'], mdata) singer.write_record(entity, transformed_row, time_extracted=utils.now()) +def sync_variables(entity, element="project"): + stream_name = "{}_variables".format(element) + stream = CATALOG.get_stream(stream_name) + if stream is None or not stream.is_selected(): + return + mdata = metadata.to_map(stream.metadata) + + url = get_url(entity=element + "_variables", id=entity['id']) + + with Transformer(pre_hook=format_timestamp) as transformer: + for row in gen_request(url): + row[element + '_id'] = entity['id'] + transformed_row = transformer.transform(row, RESOURCES[element + "_variables"]["schema"], mdata) + singer.write_record(element + "_variables", transformed_row, time_extracted=utils.now()) + def sync_project(pid): url = get_url(entity="projects", id=pid) @@ -797,6 +833,7 @@ def sync_project(pid): sync_tags(data) sync_pipelines(data) sync_vulnerabilities(data) + sync_variables(data) if not stream.is_selected(): return @@ -891,6 +928,8 @@ def main_impl(): CONFIG['ultimate_license'] = truthy(CONFIG['ultimate_license']) CONFIG['fetch_merge_request_commits'] = truthy(CONFIG['fetch_merge_request_commits']) CONFIG['fetch_pipelines_extended'] = truthy(CONFIG['fetch_pipelines_extended']) + CONFIG['fetch_group_variables'] = truthy(CONFIG['fetch_group_variables']) + CONFIG['fetch_project_variables'] = truthy(CONFIG['fetch_project_variables']) if '/api/' not in CONFIG['api_url']: CONFIG['api_url'] += '/api/v4' diff --git a/tap_gitlab/schemas/group_variables.json b/tap_gitlab/schemas/group_variables.json new file mode 100644 index 0000000..2352ed0 --- /dev/null +++ b/tap_gitlab/schemas/group_variables.json @@ -0,0 +1,26 @@ +{ + "type": "object", + "properties": { + "group_id": { + "type": ["null", "integer"] + }, + "variable_type": { + "type": ["null", "string"] + }, + "key": { + "type": ["null", "string"] + }, + "value": { + "type": ["null", "string"] + }, + "protected": { + "type": ["null", "boolean"] + }, + "masked": { + "type": ["null", "boolean"] + }, + "environment_scope": { + "type": ["null", "string"] + } + } +} diff --git a/tap_gitlab/schemas/project_variables.json b/tap_gitlab/schemas/project_variables.json new file mode 100644 index 0000000..62f0b49 --- /dev/null +++ b/tap_gitlab/schemas/project_variables.json @@ -0,0 +1,26 @@ +{ + "type": "object", + "properties": { + "project_id": { + "type": ["null", "integer"] + }, + "variable_type": { + "type": ["null", "string"] + }, + "key": { + "type": ["null", "string"] + }, + "value": { + "type": ["null", "string"] + }, + "protected": { + "type": ["null", "boolean"] + }, + "masked": { + "type": ["null", "boolean"] + }, + "environment_scope": { + "type": ["null", "string"] + } + } +}