diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c60b70..589f595 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,7 +1,25 @@ # Change Log +## [1.0.0](https://github.com/ome9ax/target-s3-jsonl/tree/1.0.0) (2021-08-18) +[Full Changelog](https://github.com/ome9ax/target-s3-jsonl/compare/0.0.7...1.0.0) + +### Closed issues: +- release version 1.0.0 bump 🥳🥂🍾 tests & spec, `100%` complete coverage + +### Merged pull requests: +- [[coverage] release version 1.0.0 bump 🥳🥂🍾 tests & spec, `100%` complete coverage](https://github.com/ome9ax/target-s3-jsonl/pull/17) + +## [0.0.7](https://github.com/ome9ax/target-s3-jsonl/tree/0.0.7) (2021-08-18) +[Full Changelog](https://github.com/ome9ax/target-s3-jsonl/compare/0.0.6...0.0.7) + +### Closed issues: +- Much more specs and tests, coverage increased to `98.09%` + +### Merged pull requests: +- [[coverage] tests & spec, 98.09% further coverage](https://github.com/ome9ax/target-s3-jsonl/pull/16) + ## [0.0.6](https://github.com/ome9ax/target-s3-jsonl/tree/0.0.6) (2021-08-17) -[Full Changelog](https://github.com/ome9ax/target-s3-jsonl/tree/0.0.5.2...0.0.6) +[Full Changelog](https://github.com/ome9ax/target-s3-jsonl/compare/0.0.5.2...0.0.6) ### Closed issues: - Much more specs and tests, coverage increased to `96.91%` @@ -10,7 +28,7 @@ - [[coverage] bump version 0.0.6 changelog update: Much more specs and tests, coverage increased to `96.91%`](https://github.com/ome9ax/target-s3-jsonl/pull/15) ## [0.0.5.2](https://github.com/ome9ax/target-s3-jsonl/tree/0.0.5.2) (2021-08-13) -[Full Changelog](https://github.com/ome9ax/target-s3-jsonl/tree/0.0.5.1...0.0.5.2) +[Full Changelog](https://github.com/ome9ax/target-s3-jsonl/compare/0.0.5.1...0.0.5.2) ### New features: - replace `io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')` with `sys.stdin` as it's already natively defined as `<_io.TextIOWrapper name='' mode='r' encoding='utf-8'>` @@ -19,7 +37,7 @@ - [[readlines] replace `io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8')` with `sys.stdin`](https://github.com/ome9ax/target-s3-jsonl/pull/13) ## [0.0.5.1](https://github.com/ome9ax/target-s3-jsonl/tree/0.0.5.1) (2021-08-12) -[Full Changelog](https://github.com/ome9ax/target-s3-jsonl/tree/0.0.5...0.0.5.1) +[Full Changelog](https://github.com/ome9ax/target-s3-jsonl/compare/0.0.5...0.0.5.1) ### Fixed bugs: - Issue to decompress archived files @@ -31,7 +49,7 @@ - [[compression] fix compression management](https://github.com/ome9ax/target-s3-jsonl/pull/12) ## [0.0.5](https://github.com/ome9ax/target-s3-jsonl/tree/0.0.5) (2021-08-12) -[Full Changelog](https://github.com/ome9ax/target-s3-jsonl/tree/0.0.4...0.0.5) +[Full Changelog](https://github.com/ome9ax/target-s3-jsonl/compare/0.0.4...0.0.5) ### New features: - I now store the rows in an Array on memory, and unload the Array into the file by batches. By default the batch size is 64Mb configurable with the `memory_buffer` config option. @@ -46,7 +64,7 @@ - [[Metadata] manage tap Metadata _sdc columns according to the stitch documentation](https://github.com/ome9ax/target-s3-jsonl/pull/9) ## [0.0.4](https://github.com/ome9ax/target-s3-jsonl/tree/0.0.4) (2021-08-09) -[Full Changelog](https://github.com/ome9ax/target-s3-jsonl/tree/0.0.0...0.0.4) +[Full Changelog](https://github.com/ome9ax/target-s3-jsonl/compare/0.0.0...0.0.4) ### New features: - Initial release diff --git a/README.md b/README.md index eeabb95..dabe8bd 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,11 @@ # target-s3-jsonl - - ![GitHub - License](https://img.shields.io/github/license/ome9ax/target-s3-jsonl) [![Python package builder](https://github.com/ome9ax/target-s3-jsonl/workflows/Python%20package/badge.svg)](https://github.com/ome9ax/target-s3-jsonl) [![codecov](https://codecov.io/gh/ome9ax/target-s3-jsonl/branch/main/graph/badge.svg?token=KV0cn4jKs2)](https://codecov.io/gh/ome9ax/target-s3-jsonl) [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/target-s3-jsonl.svg)](https://pypi.org/project/target-s3-jsonl/) [![PyPI version](https://badge.fury.io/py/target-s3-jsonl.svg)](https://badge.fury.io/py/target-s3-jsonl) [![PyPi project installs](https://img.shields.io/pypi/dm/target-s3-jsonl.svg?maxAge=2592000&label=installs&color=%2327B1FF)](https://pypi.org/project/target-s3-jsonl) - [Singer](https://www.singer.io/) target that uploads loads data to S3 in JSONL format following the [Singer spec](https://github.com/singer-io/getting-started/blob/master/docs/SPEC.md). @@ -17,8 +14,6 @@ following the [Singer spec](https://github.com/singer-io/getting-started/blob/ma `target-s3-jsonl` is a [Singer](https://singer.io) Target which intend to work with regular [Singer](https://singer.io) Tap. It take the output of the tap and export it as a [JSON Lines](http://jsonlines.org/) files. -It re-uses [PipelineWise `pipelinewise-target-s3-csv`](https://transferwise.github.io/pipelinewise) s3 client logic to upload the JSONL file directly to S3. - ## Install First, make sure Python 3 is installed on your system or follow these @@ -104,7 +99,7 @@ Full list of options in `config.json`: Run pytest ```bash -python -m pytest -p no:cacheprovider +pytest -p no:cacheprovider ``` ## License diff --git a/requirements.txt b/requirements.txt index 53de860..3009b7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ jsonschema==3.2.0 -boto3==1.18.16 +boto3==1.18.22 backoff==1.11.1 diff --git a/setup.cfg b/setup.cfg index b31fe4b..8bb46fd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,7 +2,6 @@ name = target-s3-jsonl version = attr: target_s3_jsonl.__version__ description = Singer.io target for writing JSON Line files and upload to S3 -# long_description = file: README.md, CHANGELOG.md, LICENSE long_description = file: README.md long_description_content_type = text/markdown author = Eddy ∆ @@ -11,14 +10,13 @@ url = https://github.com/ome9ax/target-s3-jsonl keywords = target-s3-jsonl, target-s3-json, singer, singer.io, tap, target, etl, json, jsonl, aws, s3 license = Apache License 2.0 classifiers = - Development Status :: 4 - Beta + Development Status :: 5 - Production/Stable Operating System :: OS Independent License :: OSI Approved :: Apache Software License Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 [options] -# zip_safe = False packages = find: py_modules = target_s3_jsonl python_requires = >=3.8 @@ -60,12 +58,7 @@ show_missing = True skip_covered = False [flake8] -# filename = . extend-exclude = venv -# exclude = tests/* -# ignore = E226,E302,E41 -# ignore = E9,F63,F7,F82 -# ignore = E731,E402,W503,E203,F401,F821 ignore = C901 max-line-length = 160 max-complexity = 10 diff --git a/target_s3_jsonl/__init__.py b/target_s3_jsonl/__init__.py index 078dfc6..6377a1b 100644 --- a/target_s3_jsonl/__init__.py +++ b/target_s3_jsonl/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -__version__ = '0.0.7' +__version__ = '1.0.0' import argparse import gzip @@ -58,15 +58,16 @@ def add_metadata_values_to_record(record_message, schema_message, timestamp): def remove_metadata_values_from_record(record_message): '''Removes every metadata _sdc column from a given record message ''' - keys = { + for key in { '_sdc_batched_at', '_sdc_deleted_at', '_sdc_extracted_at', '_sdc_primary_key', '_sdc_received_at', '_sdc_sequence', - '_sdc_table_version'} - for key in keys: + '_sdc_table_version' + }: + record_message['record'].pop(key, None) return record_message['record'] @@ -76,7 +77,7 @@ def emit_state(state): if state is not None: line = json.dumps(state) LOGGER.debug('Emitting state {}'.format(line)) - sys.stdout.write("{}\n".format(line)) + sys.stdout.write('{}\n'.format(line)) sys.stdout.flush() @@ -193,7 +194,10 @@ def persist_lines(messages, config): validators[stream].validate(float_to_decimal(record_to_load)) except Exception as ex: # NOTE: let anything but 'InvalidOperation' raised Exception slip by - if type(ex).__name__ == "InvalidOperation": # TODO pragma: no cover + # And actual references of the validator logic can be find + # at https://github.com/Julian/jsonschema/blob/main/jsonschema/_validators.py + # logic covered in the 'jsonschema' package + if type(ex).__name__ == "InvalidOperation": # pragma: no cover LOGGER.error( "Data validation failed and cannot load to destination. RECORD: {}\n" "'multipleOf' validations that allows long precisions are not supported" @@ -208,8 +212,7 @@ def persist_lines(messages, config): file_data[stream]['file_data'].append(json.dumps(record_to_load) + '\n') - # NOTE: write temporary file - # Use 64Mb default memory buffer + # NOTE: write the lines into the temporary file when received data over 64Mb default memory buffer if sys.getsizeof(file_data[stream]['file_data']) > config.get('memory_buffer', 64e6): save_file(file_data[stream], open_func) @@ -262,8 +265,8 @@ def main(): parser.add_argument('-c', '--config', help='Config file', required=True) args = parser.parse_args() - with open(args.config) as input_json: - config = json.load(input_json) + with open(args.config) as input_file: + config = json.load(input_file) missing_params = {'s3_bucket'} - set(config.keys()) if missing_params: diff --git a/target_s3_jsonl/s3.py b/target_s3_jsonl/s3.py index 053ee4b..a0acc35 100644 --- a/target_s3_jsonl/s3.py +++ b/target_s3_jsonl/s3.py @@ -41,7 +41,7 @@ def create_client(config): ) # AWS Profile based authentication else: - aws_session = boto3.session.Session(profile_name=aws_profile) # TODO pragma: no cover + aws_session = boto3.session.Session(profile_name=aws_profile) if aws_endpoint_url: s3 = aws_session.client('s3', endpoint_url=aws_endpoint_url) @@ -60,23 +60,22 @@ def upload_file(filename, s3_client, bucket, s3_key, # No encryption config (defaults to settings on the bucket): encryption_desc = "" encryption_args = None - else: - if encryption_type.lower() == "kms": - encryption_args = {"ServerSideEncryption": "aws:kms"} - if encryption_key: - encryption_desc = ( - " using KMS encryption key ID '{}'" - .format(encryption_key) - ) - encryption_args["SSEKMSKeyId"] = encryption_key - else: - encryption_desc = " using default KMS encryption" - else: - raise NotImplementedError( - "Encryption type '{}' is not supported. " - "Expected: 'none' or 'KMS'" - .format(encryption_type) + elif encryption_type.lower() == "kms": + encryption_args = {"ServerSideEncryption": "aws:kms"} + if encryption_key: + encryption_desc = ( + " using KMS encryption key ID '{}'" + .format(encryption_key) ) + encryption_args["SSEKMSKeyId"] = encryption_key + else: + encryption_desc = " using default KMS encryption" + else: + raise NotImplementedError( + "Encryption type '{}' is not supported. " + "Expected: 'none' or 'KMS'" + .format(encryption_type) + ) LOGGER.info( "Uploading {} to bucket {} at {}{}" .format(filename, bucket, s3_key, encryption_desc) diff --git a/tests/resources/aws_credentials b/tests/resources/aws_credentials index 22b62f0..01c6fcd 100644 --- a/tests/resources/aws_credentials +++ b/tests/resources/aws_credentials @@ -1,3 +1,5 @@ [dummy] -aws_access_key_id = testing -aws_secret_access_key = testing +aws_access_key_id = a_key +aws_secret_access_key = no_big_secret +aws_security_token = testing +aws_session_token = testing diff --git a/tests/test_init.py b/tests/test_init.py index 982ad20..c24bb28 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -351,8 +351,8 @@ def test_persist_lines(caplog, config, input_data, input_multi_stream_data, inva dummy_type = '{"type": "DUMMY", "value": {"currently_syncing": "tap_dummy_test-test_table_one"}}' output_state, output_file_metadata = persist_lines([dummy_type] + input_multi_stream_data, config) - assert 'WARNING root:__init__.py:252 Unknown message type "{}" in message "{}"'.format( - json.loads(dummy_type)['type'], dummy_type.replace('"', "'")) + '\n' == caplog.text + assert caplog.text == 'WARNING root:__init__.py:255 Unknown message type "{}" in message "{}"'.format( + json.loads(dummy_type)['type'], dummy_type.replace('"', "'")) + '\n' with raises(NotImplementedError): config_copy = deepcopy(config) @@ -417,6 +417,25 @@ def test_persist_lines(caplog, config, input_data, input_multi_stream_data, inva clear_dir(Path(config['temp_dir'])) + # schema = { + # "type": "SCHEMA", "stream": "users", "key_properties": ["id"], + # "schema": { + # "required": ["id"], "type": "object", + # "properties": {"id": {"type": "integer"}}}} + + # record = {"type": "RECORD", "stream": "users", "record": {"id": 1, "name": "X"}} + + # with raises(Exception): + # dummy_input_data = deepcopy(input_data) + # dummy_schema = deepcopy(schema) + # # dummy_schema['schema']['properties']['id']['minimum'] = -2147483648 + # # dummy_schema['schema']['properties']['id']['maximum'] = 2147483647 + # dummy_schema['schema']['properties']['id']['multipleOf'] = 64.0 + # dummy_record = deepcopy(record) + # dummy_record['record']['id'] = 9007199254740996e646 + # dummy_input_data.insert(1, json.dumps(dummy_schema)) + # output_state, output_file_metadata = persist_lines(dummy_input_data, config) + @mock_s3 def test_main(monkeypatch, capsys, patch_datetime, patch_argument_parser, input_multi_stream_data, config, state, file_metadata): diff --git a/tests/test_s3.py b/tests/test_s3.py index f4b290d..409382e 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -28,6 +28,9 @@ def aws_credentials(): moto_credentials_file_path = Path('tests', 'resources', 'aws_credentials') os.environ['AWS_SHARED_CREDENTIALS_FILE'] = str(moto_credentials_file_path) + os.environ['AWS_ACCESS_KEY_ID'] = 'that_key' + os.environ['AWS_SECRET_ACCESS_KEY'] = 'no_big_secret' + @mock_s3 def test_create_client(aws_credentials, config): @@ -51,6 +54,20 @@ def test_create_client(aws_credentials, config): client.put_object(Bucket=config_copy['s3_bucket'], Key='Eddy is', Body='awesome!') body = conn.Object(config_copy['s3_bucket'], 'Eddy is').get()['Body'].read().decode("utf-8") + # NOTE: AWS Profile based authentication + config_copy = deepcopy(config) + config_copy['aws_profile'] = 'dummy' + config_copy.pop('aws_access_key_id') + config_copy.pop('aws_secret_access_key') + os.environ.pop('AWS_ACCESS_KEY_ID') + os.environ.pop('AWS_SECRET_ACCESS_KEY') + + client = create_client(config_copy) + client.put_object(Bucket=config_copy['s3_bucket'], Key='Look!', Body='No access key!') + body = conn.Object(config_copy['s3_bucket'], 'Look!').get()['Body'].read().decode("utf-8") + + assert body == 'No access key!' + @mock_s3 def test_upload_file(config):