diff --git a/README.rst b/README.rst index 9d733b7..ed33b49 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,7 @@ sqlitebiter Summary ------- -sqlitebiter is a CLI tool to convert CSV/JSON/Excel/Google-Sheets to a SQLite database file. +sqlitebiter is a CLI tool to convert CSV/Excel/HTML/JSON/Google-Sheets to a SQLite database file. Features -------- @@ -50,9 +50,8 @@ Install via pip Dependencies ============ -Python packages ---------------- - +Python packages (mandatory) +------------------------------ Dependency python packages are automatically installed during ``sqlitebiter`` installation via pip. @@ -77,6 +76,11 @@ Test dependencies - `tox `__ - `XlsxWriter `__ +Python packages (optional) +------------------------------ +- `lxml `__ (Faster HTML convert if installed) + + Documentation ============= diff --git a/docs/pages/installation.rst b/docs/pages/installation.rst index 29046d4..b469fc7 100644 --- a/docs/pages/installation.rst +++ b/docs/pages/installation.rst @@ -15,9 +15,8 @@ Install via pip Dependencies ============ -Python packages ---------------- - +Python packages (mandatory) +------------------------------ Dependency python packages are automatically installed during ``sqlitebiter`` installation via pip. @@ -41,3 +40,8 @@ Test dependencies - `pytest-runner `__ - `tox `__ - `XlsxWriter `__ + +Python packages (optional) +------------------------------ +- `lxml `__ (Faster HTML convert if installed) + diff --git a/docs/pages/introduction/summary.txt b/docs/pages/introduction/summary.txt index dde1c47..edba12d 100644 --- a/docs/pages/introduction/summary.txt +++ b/docs/pages/introduction/summary.txt @@ -1 +1 @@ -sqlitebiter is a CLI tool to convert CSV/JSON/Excel/Google-Sheets to a SQLite database file. +sqlitebiter is a CLI tool to convert CSV/Excel/HTML/JSON/Google-Sheets to a SQLite database file. diff --git a/requirements/requirements.txt b/requirements/requirements.txt index d93babd..cebb7d8 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -2,4 +2,4 @@ click DataProperty>=0.8.1 logbook path.py -SimpleSQLite>=0.4.8 +SimpleSQLite>=0.5.1 diff --git a/setup.py b/setup.py index bcec84b..07972d5 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,10 @@ description=summary, include_package_data=True, install_requires=install_requires, - keywords=["SQLite", "converter", "CSV", "JSON", "Excel", "Google Sheets"], + keywords=[ + "SQLite", "converter", + "CSV", "Excel", "Google Sheets", "HTML", "JSON", + ], license="MIT License", long_description=long_description, packages=setuptools.find_packages(exclude=['test*']), diff --git a/sqlitebiter/__init__.py b/sqlitebiter/__init__.py index 8d91de2..6c5007c 100644 --- a/sqlitebiter/__init__.py +++ b/sqlitebiter/__init__.py @@ -1 +1 @@ -VERSION = "0.1.7" +VERSION = "0.2.0" diff --git a/sqlitebiter/sqlitebiter.py b/sqlitebiter/sqlitebiter.py index e1627de..50396bc 100644 --- a/sqlitebiter/sqlitebiter.py +++ b/sqlitebiter/sqlitebiter.py @@ -43,6 +43,9 @@ class LoaderFactory(object): LoaderTuple( re.compile("[\.]csv$"), simplesqlite.loader.CsvTableFileLoader()), + LoaderTuple( + re.compile("[\.]html$|[\.]htm$"), + simplesqlite.loader.HtmlTableFileLoader()), LoaderTuple( re.compile("[\.]json$"), simplesqlite.loader.JsonTableFileLoader()), @@ -107,7 +110,7 @@ def cmd(ctx, log_level): @click.pass_context def file(ctx, files, output_path): """ - Convert CSV/JSON/Excel file(s) to a SQLite database file. + Convert CSV/Excel/HTML/JSON file(s) to a SQLite database file. """ con = create_database(output_path) diff --git a/test/test_sqlitebiter.py b/test/test_sqlitebiter.py index a378eac..77d6592 100644 --- a/test/test_sqlitebiter.py +++ b/test/test_sqlitebiter.py @@ -16,7 +16,7 @@ def valid_json_single_file(): - file_path = "json_a.json" + file_path = "singlejson.json" with open(file_path, "w") as f: f.write("""[ {"attr_b": 4, "attr_c": "a", "attr_a": 1}, @@ -40,15 +40,15 @@ def invalid_json_single_file(): def valid_json_multi_file(): - file_path = "multi.json" + file_path = "multijson.json" with open(file_path, "w") as f: f.write("""{ - "json_b" : [ + "table1" : [ {"attr_b": 4, "attr_c": "a", "attr_a": 1}, {"attr_b": 2.1, "attr_c": "bb", "attr_a": 2}, {"attr_b": 120.9, "attr_c": "ccc", "attr_a": 3} ], - "json_c" : [ + "table2" : [ {"a": 1, "b": 4}, {"a": 2 }, {"a": 3, "b": 120.9} @@ -158,6 +158,71 @@ def invalid_excel_file2(): return file_path +def valid_html_file(): + file_path = "htmltable.html" + with open(file_path, "w") as f: + f.write(""" + + + + + + + + + + + + + + + + + + + + + +
caption
abc
1123.1a
22.2bb
33.3ccc
+ + + + + + + + + + + + + + + + + +
ab
1123.1
22.2
33.3
+""") + + return file_path + + +def invalid_html_file(): + file_path = "invalid_html.html" + with open(file_path, "w") as f: + f.write(""" + + header + + + hogehoge + + +""") + + return file_path + + class Test_sqlitebiter: @pytest.mark.parametrize(["option_list", "expected"], [ @@ -186,6 +251,9 @@ def test_normal(self): valid_excel_file(), invalid_excel_file(), invalid_excel_file2(), + + valid_html_file(), + invalid_html_file(), ] result = runner.invoke(cmd, ["file"] + file_list + ["-o", db_path]) @@ -193,16 +261,24 @@ def test_normal(self): con = simplesqlite.SimpleSQLite(db_path, "r") expected_tables = [ - 'json_a', 'json_c', 'json_b', - 'csv_a', 'excel_sheet_a', 'excel_sheet_c', 'excel_sheet_d', + 'singlejson_json1', 'multijson_table1', 'multijson_table2', + 'csv_a', + 'excel_sheet_a', 'excel_sheet_c', 'excel_sheet_d', + 'htmltable_tablename', 'htmltable_html2', ] - assert set(con.get_table_name_list()) == set(expected_tables) - - expected_data = { - "json_a": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')], - "json_b": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')], - "json_c": [(1, '4'), (2, 'NULL'), (3, '120.9')], + message = "expected-tables={}, actual-tables={}".format( + expected_tables, con.get_table_name_list()) + assert set(con.get_table_name_list()) == set( + expected_tables), message + + expected_data_table = { + "singlejson_json1": + [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')], + "multijson_table1": + [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')], + "multijson_table2": + [(1, '4'), (2, 'NULL'), (3, '120.9')], "csv_a": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')], "excel_sheet_a": [(1.0, 1.1, 'a'), (2.0, 2.2, 'bb'), (3.0, 3.3, 'cc')], @@ -210,7 +286,16 @@ def test_normal(self): [(1.0, '1.1', 'a'), (2.0, '', 'bb'), (3.0, '3.3', '')], "excel_sheet_d": [(1.0, '1.1', 'a'), (2.0, '', 'bb'), (3.0, '3.3', '')], + "htmltable_tablename": + [(1, 123.1, 'a'), (2, 2.2, 'bb'), (3, 3.3, 'ccc')], + "htmltable_html2": + [(1, 123.1), (2, 2.2), (3, 3.3)], } for table in con.get_table_name_list(): result = con.select("*", table_name=table) - assert expected_data.get(table) == result.fetchall() + expected_data = expected_data_table.get(table) + actual_data = result.fetchall() + + message = "table={}, expected={}, actual={}".format( + table, expected_data, actual_data) + assert expected_data == actual_data, message