diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 927ffd3..4c0886b 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -2,5 +2,5 @@ click DataProperty>=0.12.0 logbook path.py -pytablereader>=0.5.2 -SimpleSQLite>=0.6.3 +pytablereader>=0.6.0 +SimpleSQLite>=0.6.4 diff --git a/sqlitebiter/__init__.py b/sqlitebiter/__init__.py index f8c8a10..6cc293e 100644 --- a/sqlitebiter/__init__.py +++ b/sqlitebiter/__init__.py @@ -1 +1 @@ -VERSION = "0.3.3" +VERSION = "0.4.0" diff --git a/sqlitebiter/sqlitebiter.py b/sqlitebiter/sqlitebiter.py index 57a6a14..4fddd8b 100644 --- a/sqlitebiter/sqlitebiter.py +++ b/sqlitebiter/sqlitebiter.py @@ -38,11 +38,16 @@ def create_database(database_path): def _setup_logger_from_context(ctx, logger): log_level = ctx.obj.get("LOG_LEVEL") + if log_level == logbook.NOTSET: logger.disable() + ptr.logger.disable() elif log_level is None: log_level = logbook.INFO + ptr.logger.level = logbook.INFO + logger.level = log_level + ptr.logger.level = log_level def _get_format_type_from_path(file_path): @@ -70,7 +75,7 @@ def cmd(ctx, log_level): @click.pass_context def file(ctx, files, output_path): """ - Convert CSV/Excel/HTML/Markdown/JSON file(s) to a SQLite database file. + Convert CSV/Excel/HTML/JSON/Markdown file(s) to a SQLite database file. """ if dataproperty.is_empty_sequence(files): @@ -88,13 +93,10 @@ def file(ctx, files, output_path): continue try: - loader_factory = ptr.TableFileLoaderFactory(file_path) + loader = ptr.TableFileLoader(file_path) except ptr.InvalidFilePathError as e: logger.debug(e) continue - - try: - loader = loader_factory.create_from_path() except ptr.LoaderNotFoundError: logger.debug( "loader not found that coincide with '{}'".format(file_path)) @@ -102,9 +104,11 @@ def file(ctx, files, output_path): try: for tabledata in loader.load(): + sqlite_tabledata = ptr.SQLiteTableDataSanitizer( + tabledata).sanitize() + try: - con.create_table_from_tabledata( - ptr.SQLiteTableDataSanitizer(tabledata).sanitize()) + con.create_table_from_tabledata(sqlite_tabledata) result_counter.inc_success() except (ValueError, IOError) as e: logger.debug( @@ -113,7 +117,7 @@ def file(ctx, files, output_path): continue click.echo("convert '{:s}' to '{:s}' table".format( - file_path, tabledata.table_name)) + file_path, sqlite_tabledata.table_name)) except ptr.OpenError as e: logger.error("open error: file={:s}, message='{:s}'".format( file_path, str(e))) @@ -131,6 +135,62 @@ def file(ctx, files, output_path): sys.exit(result_counter.get_return_code()) +@cmd.command() +@click.argument("url", type=str) +@click.option( + "--format", "format_name", default="html", + type=click.Choice(["csv", "excel", "html", "json", "markdown"]), + help="Data format to loading (defaults to html).") +@click.option( + "-o", "--output-path", default="out.sqlite", + help="Output path of the SQLite database file") +@click.pass_context +def url(ctx, url, format_name, output_path): + """ + Fetch data from a URL and convert data to a SQLite database file. + """ + + if dataproperty.is_empty_sequence(url): + return 0 + + con = create_database(output_path) + result_counter = ResultCounter() + + logger = logbook.Logger("sqlitebiter url") + _setup_logger_from_context(ctx, logger) + + try: + loader = ptr.TableUrlLoader(url, format_name) + except ptr.LoaderNotFoundError as e: + logger.error(e) + sys.exit(1) + except ptr.HTTPError as e: + logger.error(e) + sys.exit(2) + + try: + for tabledata in loader.load(): + sqlite_tabledata = ptr.SQLiteTableDataSanitizer( + tabledata).sanitize() + + try: + con.create_table_from_tabledata(sqlite_tabledata) + result_counter.inc_success() + except (ValueError) as e: + logger.debug( + "url={}, message={}".format(url, str(e))) + result_counter.inc_fail() + continue + + click.echo("convert a table to '{:s}' table".format( + sqlite_tabledata.table_name)) + except ptr.InvalidDataError as e: + logger.error("invalid data: url={}, message={}".format(url, str(e))) + result_counter.inc_fail() + + sys.exit(result_counter.get_return_code()) + + @cmd.command() @click.argument( "credentials", type=click.Path(exists=True)) diff --git a/test/test_sqlitebiter.py b/test/test_sqlitebiter.py index fde2c85..53216a3 100644 --- a/test/test_sqlitebiter.py +++ b/test/test_sqlitebiter.py @@ -175,7 +175,8 @@ def invalid_excel_file2(): def valid_html_file(): file_path = "htmltable.html" with open(file_path, "w") as f: - f.write(""" + f.write("""testtitle +
@@ -355,9 +356,9 @@ def test_normal_multi(self): con = simplesqlite.SimpleSQLite(db_path, "r") expected_tables = [ 'singlejson_json1', 'multijson_table1', 'multijson_table2', - 'csv_a', "insert_csv", + 'csv_a', "rename_insert", 'excel_sheet_a', 'excel_sheet_c', 'excel_sheet_d', - 'htmltable_tablename', 'htmltable_html2', + 'testtitle_tablename', 'testtitle_html2', 'valid_mdtable_markdown1', ] @@ -372,9 +373,9 @@ def test_normal_multi(self): "multijson_table1": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')], "multijson_table2": - [(1, '4'), (2, 'NULL'), (3, '120.9')], + [(1, 4.0), (2, None), (3, 120.9)], "csv_a": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')], - "insert_csv": + "rename_insert": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')], "excel_sheet_a": [(1.0, 1.1, 'a'), (2.0, 2.2, 'bb'), (3.0, 3.3, 'cc')], @@ -382,9 +383,9 @@ def test_normal_multi(self): [(1.0, '1.1', 'a'), (2.0, '', 'bb'), (3.0, '3.3', '')], "excel_sheet_d": [(1.0, '1.1', 'a'), (2.0, '', 'bb'), (3.0, '3.3', '')], - "htmltable_tablename": + "testtitle_tablename": [(1, 123.1, 'a'), (2, 2.2, 'bb'), (3, 3.3, 'ccc')], - "htmltable_html2": + "testtitle_html2": [(1, 123.1), (2, 2.2), (3, 3.3)], "valid_mdtable_markdown1": [(1, 123.1, 'a'), (2, 2.2, 'bb'), (3, 3.3, 'ccc')],
caption
a