Skip to content

Commit

Permalink
Merge pull request #14 from thombashi/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
thombashi authored Sep 18, 2016
2 parents 01768b2 + d22fcc7 commit de822d0
Show file tree
Hide file tree
Showing 8 changed files with 124 additions and 25 deletions.
12 changes: 8 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ sqlitebiter
Summary
-------

sqlitebiter is a CLI tool to convert CSV/JSON/Excel/Google-Sheets to a SQLite database file.
sqlitebiter is a CLI tool to convert CSV/Excel/HTML/JSON/Google-Sheets to a SQLite database file.

Features
--------
Expand Down Expand Up @@ -50,9 +50,8 @@ Install via pip
Dependencies
============

Python packages
---------------

Python packages (mandatory)
------------------------------
Dependency python packages are automatically installed during
``sqlitebiter`` installation via pip.

Expand All @@ -77,6 +76,11 @@ Test dependencies
- `tox <https://testrun.org/tox/latest/>`__
- `XlsxWriter <http://xlsxwriter.readthedocs.io/>`__

Python packages (optional)
------------------------------
- `lxml <http://lxml.de/installation.html>`__ (Faster HTML convert if installed)


Documentation
=============

Expand Down
10 changes: 7 additions & 3 deletions docs/pages/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ Install via pip
Dependencies
============

Python packages
---------------

Python packages (mandatory)
------------------------------
Dependency python packages are automatically installed during
``sqlitebiter`` installation via pip.

Expand All @@ -41,3 +40,8 @@ Test dependencies
- `pytest-runner <https://pypi.python.org/pypi/pytest-runner>`__
- `tox <https://testrun.org/tox/latest/>`__
- `XlsxWriter <http://xlsxwriter.readthedocs.io/>`__

Python packages (optional)
------------------------------
- `lxml <http://lxml.de/installation.html>`__ (Faster HTML convert if installed)

2 changes: 1 addition & 1 deletion docs/pages/introduction/summary.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sqlitebiter is a CLI tool to convert CSV/JSON/Excel/Google-Sheets to a SQLite database file.
sqlitebiter is a CLI tool to convert CSV/Excel/HTML/JSON/Google-Sheets to a SQLite database file.
2 changes: 1 addition & 1 deletion requirements/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ click
DataProperty>=0.8.1
logbook
path.py
SimpleSQLite>=0.4.8
SimpleSQLite>=0.5.1
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@
description=summary,
include_package_data=True,
install_requires=install_requires,
keywords=["SQLite", "converter", "CSV", "JSON", "Excel", "Google Sheets"],
keywords=[
"SQLite", "converter",
"CSV", "Excel", "Google Sheets", "HTML", "JSON",
],
license="MIT License",
long_description=long_description,
packages=setuptools.find_packages(exclude=['test*']),
Expand Down
2 changes: 1 addition & 1 deletion sqlitebiter/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = "0.1.7"
VERSION = "0.2.0"
5 changes: 4 additions & 1 deletion sqlitebiter/sqlitebiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ class LoaderFactory(object):
LoaderTuple(
re.compile("[\.]csv$"),
simplesqlite.loader.CsvTableFileLoader()),
LoaderTuple(
re.compile("[\.]html$|[\.]htm$"),
simplesqlite.loader.HtmlTableFileLoader()),
LoaderTuple(
re.compile("[\.]json$"),
simplesqlite.loader.JsonTableFileLoader()),
Expand Down Expand Up @@ -107,7 +110,7 @@ def cmd(ctx, log_level):
@click.pass_context
def file(ctx, files, output_path):
"""
Convert CSV/JSON/Excel file(s) to a SQLite database file.
Convert CSV/Excel/HTML/JSON file(s) to a SQLite database file.
"""

con = create_database(output_path)
Expand Down
111 changes: 98 additions & 13 deletions test/test_sqlitebiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


def valid_json_single_file():
file_path = "json_a.json"
file_path = "singlejson.json"
with open(file_path, "w") as f:
f.write("""[
{"attr_b": 4, "attr_c": "a", "attr_a": 1},
Expand All @@ -40,15 +40,15 @@ def invalid_json_single_file():


def valid_json_multi_file():
file_path = "multi.json"
file_path = "multijson.json"
with open(file_path, "w") as f:
f.write("""{
"json_b" : [
"table1" : [
{"attr_b": 4, "attr_c": "a", "attr_a": 1},
{"attr_b": 2.1, "attr_c": "bb", "attr_a": 2},
{"attr_b": 120.9, "attr_c": "ccc", "attr_a": 3}
],
"json_c" : [
"table2" : [
{"a": 1, "b": 4},
{"a": 2 },
{"a": 3, "b": 120.9}
Expand Down Expand Up @@ -158,6 +158,71 @@ def invalid_excel_file2():
return file_path


def valid_html_file():
file_path = "htmltable.html"
with open(file_path, "w") as f:
f.write("""<table id="tablename">
<caption>caption</caption>
<tr>
<th>a</th>
<th>b</th>
<th>c</th>
</tr>
<tr>
<td align="right">1</td>
<td align="right">123.1</td>
<td align="left">a</td>
</tr>
<tr>
<td align="right">2</td>
<td align="right">2.2</td>
<td align="left">bb</td>
</tr>
<tr>
<td align="right">3</td>
<td align="right">3.3</td>
<td align="left">ccc</td>
</tr>
</table>
<table>
<tr>
<th>a</th>
<th>b</th>
</tr>
<tr>
<td align="right">1</td>
<td align="right">123.1</td>
</tr>
<tr>
<td align="right">2</td>
<td align="right">2.2</td>
</tr>
<tr>
<td align="right">3</td>
<td align="right">3.3</td>
</tr>
</table>
""")

return file_path


def invalid_html_file():
file_path = "invalid_html.html"
with open(file_path, "w") as f:
f.write("""<html>
<head>
header
</head>
<body>
hogehoge
</body>
</html>
""")

return file_path


class Test_sqlitebiter:

@pytest.mark.parametrize(["option_list", "expected"], [
Expand Down Expand Up @@ -186,31 +251,51 @@ def test_normal(self):
valid_excel_file(),
invalid_excel_file(),
invalid_excel_file2(),

valid_html_file(),
invalid_html_file(),
]

result = runner.invoke(cmd, ["file"] + file_list + ["-o", db_path])
assert result.exit_code == 0

con = simplesqlite.SimpleSQLite(db_path, "r")
expected_tables = [
'json_a', 'json_c', 'json_b',
'csv_a', 'excel_sheet_a', 'excel_sheet_c', 'excel_sheet_d',
'singlejson_json1', 'multijson_table1', 'multijson_table2',
'csv_a',
'excel_sheet_a', 'excel_sheet_c', 'excel_sheet_d',
'htmltable_tablename', 'htmltable_html2',
]

assert set(con.get_table_name_list()) == set(expected_tables)

expected_data = {
"json_a": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')],
"json_b": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')],
"json_c": [(1, '4'), (2, 'NULL'), (3, '120.9')],
message = "expected-tables={}, actual-tables={}".format(
expected_tables, con.get_table_name_list())
assert set(con.get_table_name_list()) == set(
expected_tables), message

expected_data_table = {
"singlejson_json1":
[(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')],
"multijson_table1":
[(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')],
"multijson_table2":
[(1, '4'), (2, 'NULL'), (3, '120.9')],
"csv_a": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')],
"excel_sheet_a":
[(1.0, 1.1, 'a'), (2.0, 2.2, 'bb'), (3.0, 3.3, 'cc')],
"excel_sheet_c":
[(1.0, '1.1', 'a'), (2.0, '', 'bb'), (3.0, '3.3', '')],
"excel_sheet_d":
[(1.0, '1.1', 'a'), (2.0, '', 'bb'), (3.0, '3.3', '')],
"htmltable_tablename":
[(1, 123.1, 'a'), (2, 2.2, 'bb'), (3, 3.3, 'ccc')],
"htmltable_html2":
[(1, 123.1), (2, 2.2), (3, 3.3)],
}
for table in con.get_table_name_list():
result = con.select("*", table_name=table)
assert expected_data.get(table) == result.fetchall()
expected_data = expected_data_table.get(table)
actual_data = result.fetchall()

message = "table={}, expected={}, actual={}".format(
table, expected_data, actual_data)
assert expected_data == actual_data, message

0 comments on commit de822d0

Please sign in to comment.