Skip to content

Commit

Permalink
Merge pull request #16 from thombashi/develop
Browse files Browse the repository at this point in the history
Add support for Markdown
  • Loading branch information
thombashi authored Oct 29, 2016
2 parents a36484e + 3116021 commit 4387344
Show file tree
Hide file tree
Showing 10 changed files with 81 additions and 64 deletions.
1 change: 0 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ include README.rst
include setup.cfg
include tox.ini

recursive-include docs *
recursive-include requirements *
recursive-include test *

Expand Down
16 changes: 12 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,29 @@ sqlitebiter

.. image:: https://img.shields.io/pypi/pyversions/sqlitebiter.svg
:target: https://pypi.python.org/pypi/sqlitebiter
.. image:: https://travis-ci.org/thombashi/sqlitebiter.svg?branch=master

.. image:: https://img.shields.io/travis/thombashi/sqlitebiter/master.svg?label=Linux
:target: https://travis-ci.org/thombashi/sqlitebiter
.. image:: https://ci.appveyor.com/api/projects/status/hunqrvo1inm2jjnj?svg=true
:alt: Linux CI test status

.. image:: https://img.shields.io/appveyor/ci/thombashi/sqlitebiter/master.svg?label=Windows
:target: https://ci.appveyor.com/project/thombashi/sqlitebiter
:alt: Windows CI test status

Summary
-------

sqlitebiter is a CLI tool to convert CSV/Excel/HTML/JSON/Google-Sheets to a SQLite database file.
sqlitebiter is a CLI tool to convert CSV/Excel/HTML/JSON/Markdown/Google-Sheets to a SQLite database file.

Features
--------

- Create a SQLite database file from:
- CSV file(s)
- JSON file(s)
- Microsoft Excel :superscript:`TM` file(s)
- HTML file(s): extract table tag data
- JSON file(s)
- Markdown file(s): extract Markdown table
- `Google Sheets <https://www.google.com/intl/en_us/sheets/about/>`_

Usage
Expand Down Expand Up @@ -58,8 +64,10 @@ Dependency python packages are automatically installed during
- `click <http://click.pocoo.org/>`__
- `DataPropery <https://github.com/thombashi/DataProperty>`__
- `path.py <https://github.com/jaraco/path.py>`__
- `pytablereader <https://github.com/thombashi/pytablereader>`__
- `SimpleSQLite <https://github.com/thombashi/SimpleSQLite>`__


Google Sheets dependencies
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
2 changes: 2 additions & 0 deletions docs/pages/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ Dependency python packages are automatically installed during
- `click <http://click.pocoo.org/>`__
- `DataPropery <https://github.com/thombashi/DataProperty>`__
- `path.py <https://github.com/jaraco/path.py>`__
- `pytablereader <https://github.com/thombashi/pytablereader>`__
- `SimpleSQLite <https://github.com/thombashi/SimpleSQLite>`__


Google Sheets dependencies
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Expand Down
8 changes: 6 additions & 2 deletions docs/pages/introduction/badges.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
.. image:: https://img.shields.io/pypi/pyversions/sqlitebiter.svg
:target: https://pypi.python.org/pypi/sqlitebiter
.. image:: https://travis-ci.org/thombashi/sqlitebiter.svg?branch=master

.. image:: https://img.shields.io/travis/thombashi/sqlitebiter/master.svg?label=Linux
:target: https://travis-ci.org/thombashi/sqlitebiter
.. image:: https://ci.appveyor.com/api/projects/status/hunqrvo1inm2jjnj?svg=true
:alt: Linux CI test status

.. image:: https://img.shields.io/appveyor/ci/thombashi/sqlitebiter/master.svg?label=Windows
:target: https://ci.appveyor.com/project/thombashi/sqlitebiter
:alt: Windows CI test status
4 changes: 3 additions & 1 deletion docs/pages/introduction/feature.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ Features

- Create a SQLite database file from:
- CSV file(s)
- JSON file(s)
- Microsoft Excel :superscript:`TM` file(s)
- HTML file(s): extract table tag data
- JSON file(s)
- Markdown file(s): extract Markdown table
- `Google Sheets <https://www.google.com/intl/en_us/sheets/about/>`_
2 changes: 1 addition & 1 deletion docs/pages/introduction/summary.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sqlitebiter is a CLI tool to convert CSV/Excel/HTML/JSON/Google-Sheets to a SQLite database file.
sqlitebiter is a CLI tool to convert CSV/Excel/HTML/JSON/Markdown/Google-Sheets to a SQLite database file.
5 changes: 3 additions & 2 deletions requirements/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
click
DataProperty>=0.9.0
DataProperty>=0.10.0
logbook
path.py
SimpleSQLite>=0.5.5
pytablereader>=0.2.0
SimpleSQLite>=0.6.0
2 changes: 1 addition & 1 deletion sqlitebiter/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
VERSION = "0.2.1"
VERSION = "0.3.0"
70 changes: 20 additions & 50 deletions sqlitebiter/sqlitebiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,14 @@
"""

from __future__ import absolute_import
import collections
import re
import sys

import click
import dataproperty
import logbook
import path
import pytablereader as ptr
import simplesqlite
from simplesqlite.loader import ValidationError
from simplesqlite.loader import InvalidDataError
from simplesqlite.loader import OpenError

from ._counter import ResultCounter

Expand All @@ -31,42 +27,6 @@
handler.push_application()


class LoaderNotFound(Exception):
pass


class LoaderFactory(object):
LoaderTuple = collections.namedtuple(
"LoaderTuple", "filename_regexp loader")

__LOADERTUPLE_LIST = [
LoaderTuple(
re.compile("[\.]csv$"),
simplesqlite.loader.CsvTableFileLoader()),
LoaderTuple(
re.compile("[\.]html$|[\.]htm$"),
simplesqlite.loader.HtmlTableFileLoader()),
LoaderTuple(
re.compile("[\.]json$"),
simplesqlite.loader.JsonTableFileLoader()),
LoaderTuple(
re.compile("[\.]xlsx$|[\.]xlsm$|[\.]xls$"),
simplesqlite.loader.ExcelTableFileLoader()),
]

@classmethod
def get_loader(cls, file_path):
for loadertuple in cls.__LOADERTUPLE_LIST:
if loadertuple.filename_regexp.search(file_path) is None:
continue

loadertuple.loader.source = file_path

return loadertuple.loader

raise LoaderNotFound(file_path)


def create_database(database_path):
db_path = path.Path(database_path)
dir_path = db_path.dirname()
Expand Down Expand Up @@ -110,9 +70,12 @@ def cmd(ctx, log_level):
@click.pass_context
def file(ctx, files, output_path):
"""
Convert CSV/Excel/HTML/JSON file(s) to a SQLite database file.
Convert CSV/Excel/HTML/Markdown/JSON file(s) to a SQLite database file.
"""

if dataproperty.is_empty_sequence(files):
return 0

con = create_database(output_path)
result_counter = ResultCounter()

Expand All @@ -125,8 +88,15 @@ def file(ctx, files, output_path):
continue

try:
loader = LoaderFactory.get_loader(file_path)
except LoaderNotFound:
loader_factory = ptr.FileLoaderFactory(file_path)
except ptr.InvalidFilePathError:
continue

try:
loader = loader_factory.create_from_file_path()
except ptr.LoaderNotFound:
logger.debug(
"loader not found that coincide with '{}'".format(file_path))
continue

try:
Expand All @@ -142,14 +112,14 @@ def file(ctx, files, output_path):

click.echo("convert '{:s}' to '{:s}' table".format(
file_path, tabledata.table_name))
except OpenError as e:
except ptr.OpenError as e:
logger.error(e)
except ValidationError as e:
except ptr.ValidationError as e:
logger.error(
"invalid {:s} data format: path={:s}, message={:s}".format(
_get_format_type_from_path(file_path), file_path, str(e)))
result_counter.inc_fail()
except InvalidDataError as e:
except ptr.InvalidDataError as e:
logger.error(
"invalid {:s} data: path={:s}, message={:s}".format(
_get_format_type_from_path(file_path), file_path, str(e)))
Expand Down Expand Up @@ -193,13 +163,13 @@ def gs(ctx, credentials, title, output_path):
try:
con.create_table_from_tabledata(tabledata)
result_counter.inc_success()
except (ValidationError, InvalidDataError):
except (ptr.ValidationError, ptr.InvalidDataError):
result_counter.inc_fail()
except OpenError as e:
except ptr.OpenError as e:
logger.error(e)
except AttributeError:
logger.error("invalid credentials data: path={:s}".format(credentials))
except (ValidationError, InvalidDataError) as e:
except (ptr.ValidationError, ptr.InvalidDataError) as e:
logger.error(
"invalid credentials data: path={:s}, message={:s}".format(
credentials, str(e)))
Expand Down
35 changes: 33 additions & 2 deletions test/test_sqlitebiter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import xlsxwriter

from sqlitebiter.sqlitebiter import cmd
from simplesqlite.loader.interface import TableLoader
from pytablereader.interface import TableLoader


def valid_json_single_file():
Expand Down Expand Up @@ -237,6 +237,19 @@ def invalid_html_file():
return file_path


def valid_markdown_file():
file_path = "valid_mdtable.md"
with open(file_path, "w") as f:
f.write(""" a | b | c
--:|----:|---
1|123.1|a
2| 2.2|bb
3| 3.3|ccc
""")

return file_path


class Test_sqlitebiter:

def setup_method(self, method):
Expand Down Expand Up @@ -264,13 +277,25 @@ def test_normal_smoke(self):
valid_csv_file2(),
valid_excel_file(),
valid_html_file(),
valid_markdown_file(),
]

for file_path in file_list:
result = runner.invoke(
cmd, ["file", file_path, "-o", db_path])
assert result.exit_code == 0, file_path

def test_abnormal_empty(self):
runner = CliRunner()

with runner.isolated_filesystem():
result = runner.invoke(
cmd, ["file"])

assert result.exit_code == 0
assert not path.Path(
"out.sqlite").exists(), "output file must not exist"

def test_abnormal_smoke(self):
db_path = "test.sqlite"
runner = CliRunner()
Expand Down Expand Up @@ -309,6 +334,8 @@ def test_normal_multi(self):

valid_html_file(),
invalid_html_file(),

valid_markdown_file(),
]

result = runner.invoke(cmd, ["file"] + file_list + ["-o", db_path])
Expand All @@ -320,6 +347,7 @@ def test_normal_multi(self):
'csv_a', "insert_csv",
'excel_sheet_a', 'excel_sheet_c', 'excel_sheet_d',
'htmltable_tablename', 'htmltable_html2',
'valid_mdtable_markdown1',
]

message = "expected-tables={}, actual-tables={}".format(
Expand All @@ -335,7 +363,8 @@ def test_normal_multi(self):
"multijson_table2":
[(1, '4'), (2, 'NULL'), (3, '120.9')],
"csv_a": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')],
"insert_csv": [(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')],
"insert_csv":
[(1, 4.0, 'a'), (2, 2.1, 'bb'), (3, 120.9, 'ccc')],
"excel_sheet_a":
[(1.0, 1.1, 'a'), (2.0, 2.2, 'bb'), (3.0, 3.3, 'cc')],
"excel_sheet_c":
Expand All @@ -346,6 +375,8 @@ def test_normal_multi(self):
[(1, 123.1, 'a'), (2, 2.2, 'bb'), (3, 3.3, 'ccc')],
"htmltable_html2":
[(1, 123.1), (2, 2.2), (3, 3.3)],
"valid_mdtable_markdown1":
[(1, 123.1, 'a'), (2, 2.2, 'bb'), (3, 3.3, 'ccc')],
}
for table in con.get_table_name_list():
result = con.select("*", table_name=table)
Expand Down

0 comments on commit 4387344

Please sign in to comment.