diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 6f7888d..44b0375 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -68,7 +68,7 @@ jobs: python -m venv venv || virtualenv venv . venv/bin/activate pip install --upgrade pip # setuptools - pip install .[test,lint,dist] + pip install tox - name: Get pip cache dir id: pip-cache @@ -87,11 +87,11 @@ jobs: . venv/bin/activate # stop the build if there are Python syntax errors or undefined names # exit-zero treats all errors as warnings. The GitHub editor is 255 chars wide - flake8 --show-source --statistics --count + TOX_PARALLEL_NO_SPINNER=1 tox -e lint - name: Test run: | . venv/bin/activate - pytest + TOX_PARALLEL_NO_SPINNER=1 tox -e py - name: Upload coverage test results to Codecov uses: codecov/codecov-action@v2 if: | @@ -109,7 +109,8 @@ jobs: - name: Build distribution package run: | . venv/bin/activate - python setup.py sdist bdist_wheel + pip install build + python -m build ls -l dist - name: Publish distribution package to TestPyPI if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') diff --git a/README.md b/README.md index f067c3e..6337d68 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ # target-s3-jsonl -![GitHub - License](https://img.shields.io/github/license/ome9ax/target-s3-jsonl) +[![GitHub - License](https://img.shields.io/github/license/ome9ax/target-s3-jsonl)](https://github.com/ome9ax/target-s3-jsonl/blob/main/LICENSE) [![Python package builder](https://github.com/ome9ax/target-s3-jsonl/workflows/Python%20package/badge.svg)](https://github.com/ome9ax/target-s3-jsonl/actions) [![codecov](https://codecov.io/gh/ome9ax/target-s3-jsonl/branch/main/graph/badge.svg?token=KV0cn4jKs2)](https://codecov.io/gh/ome9ax/target-s3-jsonl) -[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/target-s3-jsonl.svg)](https://pypi.org/project/target-s3-jsonl/) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/target-s3-jsonl.svg)](https://pypi.org/project/target-s3-jsonl) [![PyPI version](https://badge.fury.io/py/target-s3-jsonl.svg)](https://badge.fury.io/py/target-s3-jsonl) [![PyPi project installs](https://img.shields.io/pypi/dm/target-s3-jsonl.svg?maxAge=2592000&label=installs&color=%2327B1FF)](https://pypistats.org/packages/target-s3-jsonl) @@ -12,7 +12,7 @@ following the [Singer spec](https://github.com/singer-io/getting-started/blob/ma ## How to use it -`target-s3-jsonl` is a [Singer](https://singer.io) Target which intend to work with regular [Singer](https://singer.io) Tap. It take the output of the tap and export it as a [JSON Lines](http://jsonlines.org/) files. +`target-s3-jsonl` is a [Singer](https://singer.io) Target which intend to work with regular [Singer](https://singer.io) Tap. It take the output of the tap and export it as a [JSON Lines](http://jsonlines.org/) files into an AWS S3 bucket. ## Install diff --git a/pyproject.toml b/pyproject.toml index 374b58c..e587f24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,50 @@ [build-system] requires = [ - "setuptools>=42", + "setuptools", "wheel" ] build-backend = "setuptools.build_meta" + +[tool.mypy] +show_error_context = true +ignore_missing_imports = true + +files = "." + +exclude = [ + "setup.py", + "docs.*", + "docs.conf.*", + "build", + "venv" +] + +# 'strict = true' is equivalent to the following: +check_untyped_defs = true +disallow_incomplete_defs = true +disallow_subclassing_any = true +disallow_untyped_decorators = true +disallow_untyped_defs = true +warn_redundant_casts = true +warn_unused_configs = true +warn_unused_ignores = true +disallow_untyped_calls = true +no_implicit_reexport = true +strict_equality = true + +# The following need to have changes made to be able to enable them: +# disallow_any_generics = true +# no_implicit_optional = true +# warn_return_any = true + +[[tool.mypy.overrides]] # Overrides for currently untyped modules +module = [ + "target_s3_jsonl.*" +] + +[[tool.mypy.overrides]] # Overrides for currently untyped modules +ignore_errors = true + +module = [ + "tests.*" +] diff --git a/setup.cfg b/setup.cfg index f3c428c..081d729 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,10 +16,17 @@ classifiers = Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 +project_urls = + # Documentation = https://ome9ax.github.io/target-s3-jsonl + Releases = https://github.com/ome9ax/target-s3-jsonl/releases + Changelog = https://github.com/ome9ax/target-s3-jsonl/blob/main/CHANGELOG.rst + Issue Tracker = https://github.com/ome9ax/target-s3-jsonl/issues [options] packages = find: -py_modules = target_s3_jsonl +package_dir = + = src +# py_modules = target_s3_jsonl python_requires = >=3.8 # install_requires = file: requirements.txt # install_requires = @@ -27,40 +34,153 @@ python_requires = >=3.8 # boto3==1.24.52 # backoff==2.1.2 include_package_data = True +platforms = any + +[options.entry_points] +console_scripts = + target-s3-jsonl = target_s3_jsonl:main [options.package_data] target_s3_jsonl = logging.conf +[options.packages.find] +where = src +exclude = + tests + [options.extras_require] test = pytest-cov moto[s3,sts] lint = flake8 -dist = wheel - -[options.packages.find] -exclude = - tests - -[options.entry_points] -console_scripts = - target-s3-jsonl = target_s3_jsonl:main +static = mypy +dist = build +deploy = twine [tool:pytest] addopts = -v --cov=target_s3_jsonl --cov-fail-under 95 --cov-report annotate --cov-report xml --cov-report term --cov-report html:htmlcov --doctest-modules testpaths = tests +asyncio_mode = auto [coverage:run] branch = True omit = + ./setup.py + tests/.* venv/* [coverage:report] show_missing = True skip_covered = False +exclude_lines = + if __name__ == .__main__.: [flake8] +count = True +show-source = True +statistics = True extend-exclude = venv + build ignore = C901 max-line-length = 160 max-complexity = 10 + +[build_sphinx] +builder = html +warning-is-error = true +# keep-going = true +project = 'Target S3 Jsonl' +version = attr: target_s3_jsonl.__version__ +release = attr: target_s3_jsonl.__version__ +source-dir = 'docs' + +[tox:tox] +passenv = TOXENV TOX_* CI_* GITLAB_* +# requires = tox-pipenv +envlist = py{38,39,310} +# labels = +# test = py{39,310,pi} +# static = flake8, mypy +# envlist = .virtualenvs/target-s3-jsonl +isolated_build = True +# skipsdist = false +# parallel_show_output=True + +# requires = tox-pip-extensions +# tox_pip_extensions_ext_venv_update = true + +[testenv] +usedevelop = True +extras = test +commands = pytest {posargs} + # publish: python setup.py sdist upload --sign -r pypi + # publish: echo Publish that + +[testenv:lint] +usedevelop = True +skip_install = true +deps = flake8 +commands = flake8 {posargs} + +[testenv:static] +usedevelop = True +skip_install = true +deps = mypy +commands = mypy {posargs} + +[testenv:coverage] +usedevelop = True +passenv = CODECOV_TOKEN CI_* +skip_install = true +deps = codecov +# allowlist_externals = gpg +# install_command = echo Install codecov {packages} +# curl https://keybase.io/codecovsecurity/pgp_keys.asc | gpg --no-default-keyring --keyring trustedkeys.gpg --import # One-time step +# curl -Os https://uploader.codecov.io/latest/linux/codecov +# curl -Os https://uploader.codecov.io/latest/linux/codecov.SHA256SUM +# curl -Os https://uploader.codecov.io/latest/linux/codecov.SHA256SUM.sig +# gpgv codecov.SHA256SUM.sig codecov.SHA256SUM +# shasum -a 256 -c codecov.SHA256SUM +# chmod +x ./codecov +commands = + codecov \ + --file "{toxinidir}/coverage.xml" \ + --name "codecov-$CI_PROJECT_NAME" \ + --branch "$CI_COMMIT_BRANCH" \ + --commit "$CI_COMMIT_SHA" \ + --tag "$CI_COMMIT_TAG" \ + --flags "unittests" {posargs} || echo 'Codecov upload failed' + +[testenv:docs] +# https://packaging-guide.openastronomy.org/en/latest/docs.html +# Init +# sphinx-quickstart docs; cd docs +# edit index.rst >>> add modules +# sphinx-apidoc -o docs . +# sphinx-apidoc -o /source/_modules src +# sphinx-build docs docs/_build/html -W -j auto --color -b html +description = Invoke sphinx-build to build the HTML docs +usedevelop = True +extras = docs +# commands_pre = sphinx-build docs/source "{toxworkdir}/docs_out" -d "{toxworkdir}/docs_doctree" -b doctest {posargs:-E} +# commands = sphinx-build docs docs/_build/html -W -j auto --color -Ea -b html {posargs} +commands = sphinx-build docs/source "{toxworkdir}/docs_out" -d "{toxworkdir}/docs_doctree" -W -j auto --color -b html {posargs} +commands_post = python -c 'import pathlib; print("documentation available under file://\{0\}".format(pathlib.Path(r"{toxworkdir}") / "docs_out" / "index.html"))' + # sphinx-build docs/source "{toxworkdir}/docs_out" -d "{toxworkdir}/docs_doctree" -b linkcheck {posargs:-E} + +[testenv:dist] +deps = build +commands = python -m build + +[testenv:deploy] +usedevelop = True +skip_install = true +# depends = dist +passenv = TWINE_* +deps = + build + twine +commands_pre = + python -m build + twine check dist/* +commands = twine upload --skip-existing {posargs} dist/* diff --git a/setup.py b/setup.py index f278d32..d7de081 100644 --- a/setup.py +++ b/setup.py @@ -4,6 +4,7 @@ setup( install_requires=[ + 'adjust-precision-for-schema', 'jsonschema==4.14.0', 'boto3==1.24.62', 'backoff==2.1.2' diff --git a/target_s3_jsonl/__init__.py b/src/target_s3_jsonl/__init__.py similarity index 93% rename from target_s3_jsonl/__init__.py rename to src/target_s3_jsonl/__init__.py index 2d4709f..3e464d5 100644 --- a/target_s3_jsonl/__init__.py +++ b/src/target_s3_jsonl/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -__version__ = '1.2.1' +__version__ = '1.2.2' import argparse import gzip @@ -13,7 +13,7 @@ from uuid import uuid4 from jsonschema import Draft4Validator, FormatChecker -from decimal import Decimal +from adjust_precision_for_schema import adjust_decimal_precision_for_schema from target_s3_jsonl import s3 from target_s3_jsonl.logger import get_logger @@ -82,18 +82,6 @@ def emit_state(state): sys.stdout.flush() -def float_to_decimal(value): - '''Walk the given data structure and turn all instances of float into - double.''' - if isinstance(value, float): - return Decimal(str(value)) - if isinstance(value, list): - return [float_to_decimal(child) for child in value] - if isinstance(value, dict): - return {k: float_to_decimal(v) for k, v in value.items()} - return value - - def get_target_key(stream, config, timestamp=None, prefix=None): '''Creates and returns an S3 key for the stream''' @@ -149,7 +137,7 @@ def persist_lines(messages, config, save_records=save_jsonl_file): record_to_load = o['record'] # NOTE: Validate record - validators[stream].validate(float_to_decimal(record_to_load)) + validators[stream].validate(record_to_load) if config.get('add_metadata_columns'): record_to_load = add_metadata_values_to_record(o, {}, now) @@ -170,11 +158,11 @@ def persist_lines(messages, config, save_records=save_jsonl_file): if 'stream' not in o: raise Exception("Line is missing required key 'stream': {}".format(message)) stream = o['stream'] + schemas[stream] = o['schema'] - if config.get('add_metadata_columns'): - schemas[stream] = add_metadata_columns_to_schema(o) - else: - schemas[stream] = float_to_decimal(o['schema']) + schemas[stream] = add_metadata_columns_to_schema(o) if config.get('add_metadata_columns') else o + + adjust_decimal_precision_for_schema(schemas[stream]) # NOTE: prevent exception *** jsonschema.exceptions.UnknownType: Unknown type 'SCHEMA' for validator. # 'type' is a key word for jsonschema validator which is different from `{'type': 'SCHEMA'}` as the message type. diff --git a/target_s3_jsonl/logger.py b/src/target_s3_jsonl/logger.py similarity index 100% rename from target_s3_jsonl/logger.py rename to src/target_s3_jsonl/logger.py diff --git a/target_s3_jsonl/logging.conf b/src/target_s3_jsonl/logging.conf similarity index 100% rename from target_s3_jsonl/logging.conf rename to src/target_s3_jsonl/logging.conf diff --git a/target_s3_jsonl/s3.py b/src/target_s3_jsonl/s3.py similarity index 100% rename from target_s3_jsonl/s3.py rename to src/target_s3_jsonl/s3.py diff --git a/tests/test_init.py b/tests/test_init.py index 5e954a8..ca51e66 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -11,7 +11,6 @@ # Package imports from target_s3_jsonl import ( sys, - Decimal, datetime, argparse, gzip, @@ -22,7 +21,6 @@ add_metadata_values_to_record, remove_metadata_values_from_record, emit_state, - float_to_decimal, get_target_key, save_jsonl_file, upload_files, @@ -262,22 +260,6 @@ def test_remove_metadata_values_from_record(): 'c_pk': 1, 'c_varchar': '1', 'c_int': 1, 'c_float': 1.99} -def test_float_to_decimal(): - '''TEST : simple float_to_decimal call''' - - assert float_to_decimal({ - "type": "RECORD", - "stream": "tap_dummy_test-test_table_one", - "record": { - "c_pk": 1, "c_varchar": "1", "c_int": 1, "c_float": 1.99}, - "version": 1, "time_extracted": "2019-01-31T15:51:47.465408Z"}) \ - == { - "type": "RECORD", "stream": "tap_dummy_test-test_table_one", - "record": { - "c_pk": 1, "c_varchar": "1", "c_int": 1, "c_float": Decimal('1.99')}, - "version": 1, "time_extracted": "2019-01-31T15:51:47.465408Z"} - - def test_get_target_key(config): '''TEST : simple get_target_key call''' @@ -375,6 +357,8 @@ def test_persist_lines(caplog, config, input_data, input_multi_stream_data, inva dummy_type = '{"type": "DUMMY", "value": {"currently_syncing": "tap_dummy_test-test_table_one"}}' output_state, output_file_metadata = persist_lines([dummy_type] + input_multi_stream_data, config) + assert 'Unknown message type "{}" in message "{}"'.format(json.loads(dummy_type)['type'], dummy_type.replace('"', "'")) in caplog.text + with raises(json.decoder.JSONDecodeError): output_state, output_file_metadata = persist_lines(invalid_row_data, config) diff --git a/tests/test_s3.py b/tests/test_s3.py index 5853903..d9bd77c 100644 --- a/tests/test_s3.py +++ b/tests/test_s3.py @@ -4,7 +4,6 @@ from pathlib import Path import json import os -import re import boto3 from moto import mock_s3, mock_sts @@ -41,9 +40,9 @@ def aws_credentials(): def test_log_backoff_attempt(caplog): '''TEST : simple upload_files call''' - log_backoff_attempt({'tries': 99}) - pat = r'INFO root:s3.py:\d{2} Error detected communicating with Amazon, triggering backoff: 99 try\n' - assert re.match(pat, caplog.text) + log_backoff_attempt({'tries': 2}) + + assert 'Error detected communicating with Amazon, triggering backoff: 2 try' in caplog.text @mock_sts