diff --git a/README.rst b/README.rst
index 9d733b7..ed33b49 100644
--- a/README.rst
+++ b/README.rst
@@ -11,7 +11,7 @@ sqlitebiter
Summary
-------
-sqlitebiter is a CLI tool to convert CSV/JSON/Excel/Google-Sheets to a SQLite database file.
+sqlitebiter is a CLI tool to convert CSV/Excel/HTML/JSON/Google-Sheets to a SQLite database file.
Features
--------
@@ -50,9 +50,8 @@ Install via pip
Dependencies
============
-Python packages
----------------
-
+Python packages (mandatory)
+------------------------------
Dependency python packages are automatically installed during
``sqlitebiter`` installation via pip.
@@ -77,6 +76,11 @@ Test dependencies
- `tox `__
- `XlsxWriter `__
+Python packages (optional)
+------------------------------
+- `lxml `__ (Faster HTML convert if installed)
+
+
Documentation
=============
diff --git a/docs/pages/installation.rst b/docs/pages/installation.rst
index 29046d4..b469fc7 100644
--- a/docs/pages/installation.rst
+++ b/docs/pages/installation.rst
@@ -15,9 +15,8 @@ Install via pip
Dependencies
============
-Python packages
----------------
-
+Python packages (mandatory)
+------------------------------
Dependency python packages are automatically installed during
``sqlitebiter`` installation via pip.
@@ -41,3 +40,8 @@ Test dependencies
- `pytest-runner `__
- `tox `__
- `XlsxWriter `__
+
+Python packages (optional)
+------------------------------
+- `lxml `__ (Faster HTML convert if installed)
+
diff --git a/docs/pages/introduction/summary.txt b/docs/pages/introduction/summary.txt
index dde1c47..edba12d 100644
--- a/docs/pages/introduction/summary.txt
+++ b/docs/pages/introduction/summary.txt
@@ -1 +1 @@
-sqlitebiter is a CLI tool to convert CSV/JSON/Excel/Google-Sheets to a SQLite database file.
+sqlitebiter is a CLI tool to convert CSV/Excel/HTML/JSON/Google-Sheets to a SQLite database file.
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index d93babd..cebb7d8 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -2,4 +2,4 @@ click
DataProperty>=0.8.1
logbook
path.py
-SimpleSQLite>=0.4.8
+SimpleSQLite>=0.5.1
diff --git a/setup.py b/setup.py
index bcec84b..07972d5 100644
--- a/setup.py
+++ b/setup.py
@@ -34,7 +34,10 @@
description=summary,
include_package_data=True,
install_requires=install_requires,
- keywords=["SQLite", "converter", "CSV", "JSON", "Excel", "Google Sheets"],
+ keywords=[
+ "SQLite", "converter",
+ "CSV", "Excel", "Google Sheets", "HTML", "JSON",
+ ],
license="MIT License",
long_description=long_description,
packages=setuptools.find_packages(exclude=['test*']),
diff --git a/sqlitebiter/__init__.py b/sqlitebiter/__init__.py
index 8d91de2..6c5007c 100644
--- a/sqlitebiter/__init__.py
+++ b/sqlitebiter/__init__.py
@@ -1 +1 @@
-VERSION = "0.1.7"
+VERSION = "0.2.0"
diff --git a/sqlitebiter/sqlitebiter.py b/sqlitebiter/sqlitebiter.py
index e1627de..50396bc 100644
--- a/sqlitebiter/sqlitebiter.py
+++ b/sqlitebiter/sqlitebiter.py
@@ -43,6 +43,9 @@ class LoaderFactory(object):
LoaderTuple(
re.compile("[\.]csv$"),
simplesqlite.loader.CsvTableFileLoader()),
+ LoaderTuple(
+ re.compile("[\.]html$|[\.]htm$"),
+ simplesqlite.loader.HtmlTableFileLoader()),
LoaderTuple(
re.compile("[\.]json$"),
simplesqlite.loader.JsonTableFileLoader()),
@@ -107,7 +110,7 @@ def cmd(ctx, log_level):
@click.pass_context
def file(ctx, files, output_path):
"""
- Convert CSV/JSON/Excel file(s) to a SQLite database file.
+ Convert CSV/Excel/HTML/JSON file(s) to a SQLite database file.
"""
con = create_database(output_path)
diff --git a/test/test_sqlitebiter.py b/test/test_sqlitebiter.py
index a378eac..77d6592 100644
--- a/test/test_sqlitebiter.py
+++ b/test/test_sqlitebiter.py
@@ -16,7 +16,7 @@
def valid_json_single_file():
- file_path = "json_a.json"
+ file_path = "singlejson.json"
with open(file_path, "w") as f:
f.write("""[
{"attr_b": 4, "attr_c": "a", "attr_a": 1},
@@ -40,15 +40,15 @@ def invalid_json_single_file():
def valid_json_multi_file():
- file_path = "multi.json"
+ file_path = "multijson.json"
with open(file_path, "w") as f:
f.write("""{
- "json_b" : [
+ "table1" : [
{"attr_b": 4, "attr_c": "a", "attr_a": 1},
{"attr_b": 2.1, "attr_c": "bb", "attr_a": 2},
{"attr_b": 120.9, "attr_c": "ccc", "attr_a": 3}
],
- "json_c" : [
+ "table2" : [
{"a": 1, "b": 4},
{"a": 2 },
{"a": 3, "b": 120.9}
@@ -158,6 +158,71 @@ def invalid_excel_file2():
return file_path
+def valid_html_file():
+ file_path = "htmltable.html"
+ with open(file_path, "w") as f:
+ f.write("""
+ caption
+
+ a |
+ b |
+ c |
+
+
+ 1 |
+ 123.1 |
+ a |
+
+
+ 2 |
+ 2.2 |
+ bb |
+
+
+ 3 |
+ 3.3 |
+ ccc |
+
+
+
+
+ a |
+ b |
+
+
+ 1 |
+ 123.1 |
+
+
+ 2 |
+ 2.2 |
+
+
+ 3 |
+ 3.3 |
+
+
+""")
+
+ return file_path
+
+
+def invalid_html_file():
+ file_path = "invalid_html.html"
+ with open(file_path, "w") as f:
+ f.write("""
+
+ header
+
+
+ hogehoge
+
+
+""")
+
+ return file_path
+
+
class Test_sqlitebiter:
@pytest.mark.parametrize(["option_list", "expected"], [
@@ -186,6 +251,9 @@ def test_normal(self):
valid_excel_file(),
invalid_excel_file(),
invalid_excel_file2(),
+
+ valid_html_file(),
+ invalid_html_file(),
]
result = runner.invoke(cmd, ["file"] + file_list + ["-o", db_path])
@@ -193,16 +261,24 @@ def test_normal(self):
con = simplesqlite.SimpleSQLite(db_path, "r")
expected_tables = [
- 'json_a', 'json_c', 'json_b',
- 'csv_a', 'excel_sheet_a', 'excel_sheet_c', 'excel_sheet_d',
+ 'singlejson_json1', 'multijson_table1', 'multijson_table2',
+ 'csv_a',
+ 'excel_sheet_a', 'excel_sheet_c', 'excel_sheet_d',
+ 'htmltable_tablename', 'htmltable_html2',
]
- assert set(con.get_table_name_list()) == set(expected_tables)
-
- expected_data = {
- "json_a": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')],
- "json_b": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')],
- "json_c": [(1, '4'), (2, 'NULL'), (3, '120.9')],
+ message = "expected-tables={}, actual-tables={}".format(
+ expected_tables, con.get_table_name_list())
+ assert set(con.get_table_name_list()) == set(
+ expected_tables), message
+
+ expected_data_table = {
+ "singlejson_json1":
+ [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')],
+ "multijson_table1":
+ [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')],
+ "multijson_table2":
+ [(1, '4'), (2, 'NULL'), (3, '120.9')],
"csv_a": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')],
"excel_sheet_a":
[(1.0, 1.1, 'a'), (2.0, 2.2, 'bb'), (3.0, 3.3, 'cc')],
@@ -210,7 +286,16 @@ def test_normal(self):
[(1.0, '1.1', 'a'), (2.0, '', 'bb'), (3.0, '3.3', '')],
"excel_sheet_d":
[(1.0, '1.1', 'a'), (2.0, '', 'bb'), (3.0, '3.3', '')],
+ "htmltable_tablename":
+ [(1, 123.1, 'a'), (2, 2.2, 'bb'), (3, 3.3, 'ccc')],
+ "htmltable_html2":
+ [(1, 123.1), (2, 2.2), (3, 3.3)],
}
for table in con.get_table_name_list():
result = con.select("*", table_name=table)
- assert expected_data.get(table) == result.fetchall()
+ expected_data = expected_data_table.get(table)
+ actual_data = result.fetchall()
+
+ message = "table={}, expected={}, actual={}".format(
+ table, expected_data, actual_data)
+ assert expected_data == actual_data, message