Skip to content
This repository has been archived by the owner on Mar 9, 2023. It is now read-only.

Commit

Permalink
Merge pull request #164 from WorksApplications/feature/kazuma-t/build…
Browse files Browse the repository at this point in the history
…_test_dict

Build test dictionaries on the fly
  • Loading branch information
kazuma-t authored Sep 25, 2021
2 parents b445b4d + c054fce commit d4699a3
Show file tree
Hide file tree
Showing 11 changed files with 184 additions and 43 deletions.
7 changes: 2 additions & 5 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,8 @@ jobs:
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Check license header
run: |
HEADER=`cat scripts/license-header.txt`
for FILE in `find setup.py sudachipy tests -name '*.py'`; do FILECONTENTS=`cat "$FILE"`; if [[ "$FILECONTENTS" != "$HEADER"* ]]; then >&2 echo "invalid license header on $FILE"; fi; done
scripts/checkheader.sh
- name: Test with unittest
run: |
cp .travis/system.dic.test tests/resources/system.dic
cp .travis/user.dic.test tests/resources/user.dic
python setup.py build_ext --inplace
python -m unittest discover tests
scripts/test.sh
Binary file removed .travis/system.dic.test
Binary file not shown.
Binary file removed .travis/user.dic.test
Binary file not shown.
10 changes: 10 additions & 0 deletions scripts/checkheader.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/usr/bin/env bash

HEADER=scripts/license-header.txt
SIZE=`wc -c < "$HEADER"`

RES=`find setup.py sudachipy tests -type f -name '*.py' -exec cmp -n "$SIZE" "$HEADER" {} \;`
if [ -n "$RES" ]; then
echo "$RES" | awk '{print "invalid license header on " $2}' >&2
exit 1
fi
22 changes: 1 addition & 21 deletions scripts/format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,5 @@ cd $(dirname $0)
flake8 --show --config=flake8.cfg ../sudachipy
flake8 --show --config=flake8.cfg ../tests

HEADER=`cat license-header.txt`

cd ..

array=()

for FILE in `find ./sudachipy -type f -name "*.py"`; do
array+=( ${FILE} )
done

for FILE in `find ./tests -type f -name "*.py"`; do
array+=( ${FILE} )
done

array+=( ./setup.py )

for FILE in ${array[@]}; do
FILECONTENTS=`cat ${FILE}`
if [[ ${FILECONTENTS} != ${HEADER}* ]]; then
>&2 echo "invalid license header on ${FILE}"
fi
done
scripts/checkheader.sh
28 changes: 11 additions & 17 deletions scripts/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,23 @@
# You need to prepare system.dic in resources and tests/resources
# see README

cd $(dirname $0)
set -e

# check system.dic
if [[ ! -f "../tests/resources/system.dic" ]]; then
cp ../.travis/system.dic.test ../tests/resources/system.dic
fi
DIFF=$(diff ../.travis/system.dic.test ../tests/resources/system.dic)
if [[ "$DIFF" != "" ]]; then
cp ../.travis/system.dic.test ../tests/resources/system.dic
# build dictionaries
if !(type sudachipy > /dev/null 2>&1); then
python setup.py develop
fi
sudachipy build -o tests/resources/system.dic -d "the system dictionary for the unit tests" -m tests/resources/dict/matrix.def tests/resources/dict/lex.csv
sudachipy ubuild -o tests/resources/user.dic -s tests/resources/system.dic tests/resources/dict/user.csv

# check user.dic
if [[ ! -f "../tests/resources/user.dic" ]]; then
cp ../.travis/user.dic.test ../tests/resources/user.dic
fi
DIFF=$(diff ../.travis/user.dic.test ../tests/resources/user.dic)
if [[ "$DIFF" != "" ]]; then
cp ../.travis/user.dic.test ../tests/resources/user.dic
fi
set +e

# unittest
RES=`cd ..; python -m unittest discover tests -p '*test*.py' 2>&1`
RES=`python -m unittest discover tests -p '*test*.py' 2>&1`
STATUS=$?
RES_TAIL=`echo "$RES" | tail -1`
if [[ $RES_TAIL != "OK" ]]; then
>&2 echo "$RES"
fi

exit $STATUS
39 changes: 39 additions & 0 deletions tests/resources/dict/lex.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
た,1,1,8729,た,助動詞,*,*,*,助動詞-タ,終止形-一般,タ,た,*,A,*,*,*,*
に,2,2,11406,に,助詞,接続助詞,*,*,*,*,ニ,に,*,A,*,*,*,*
に,3,3,4481,に,助詞,格助詞,*,*,*,*,ニ,に,*,A,*,*,*,*
京都,6,6,5293,京都,名詞,固有名詞,地名,一般,*,*,キョウト,京都,*,A,*,*,*,1/5
東,7,7,4675,東,名詞,普通名詞,一般,*,*,*,ヒガシ,東,*,A,*,*,*,*
東京,6,6,2816,東京,名詞,固有名詞,地名,一般,*,*,トウキョウ,東京,*,A,*,*,*,*
東京都,6,8,5320,東京都,名詞,固有名詞,地名,一般,*,*,トウキョウト,東京都,*,B,5/9,*,5/9,*
行く,4,4,5105,行く,動詞,非自立可能,*,*,五段-カ行,終止形-一般,イク,行く,*,A,*,*,*,*
行っ,5,5,5122,行っ,動詞,非自立可能,*,*,五段-カ行,連用形-促音便,イッ,行く,7,A,*,*,*,*
都,8,8,2914,都,名詞,普通名詞,一般,*,*,*,ト,都,*,A,*,*,*,*
アイ,7,7,4675,アイ,名詞,普通名詞,一般,*,*,*,アイ,アイ,*,A,*,*,*,*
アイウ,7,7,4675,アイウ,名詞,普通名詞,一般,*,*,*,アイウ,アイウ,*,A,*,*,*,*
アイアイウ,6,6,32766,アイウ,名詞,固有名詞,地名,一般,*,*,アイアイウ,アイアイウ,*,A,*,*,*,*
0,9,9,2478,0,名詞,数詞,*,*,*,*,ゼロ,0,*,A,*,*,*,*
1,9,9,2478,1,名詞,数詞,*,*,*,*,イチ,1,*,A,*,*,*,*
2,9,9,2478,2,名詞,数詞,*,*,*,*,ニ,2,*,A,*,*,*,*
3,9,9,2478,3,名詞,数詞,*,*,*,*,サン,3,*,A,*,*,*,*
4,9,9,2478,4,名詞,数詞,*,*,*,*,ヨン,4,*,A,*,*,*,*
5,9,9,2478,5,名詞,数詞,*,*,*,*,ゴ,5,*,A,*,*,*,*
6,9,9,2478,6,名詞,数詞,*,*,*,*,ロク,6,*,A,*,*,*,*
7,9,9,2478,7,名詞,数詞,*,*,*,*,ナナ,7,*,A,*,*,*,*
8,9,9,2478,8,名詞,数詞,*,*,*,*,ハチ,8,*,A,*,*,*,*
9,9,9,2478,9,名詞,数詞,*,*,*,*,キュウ,9,*,A,*,*,*,*
〇,9,9,2478,〇,名詞,数詞,*,*,*,*,ゼロ,〇,*,A,*,*,*,*
一,9,9,2478,一,名詞,数詞,*,*,*,*,イチ,一,*,A,*,*,*,*
二,9,9,2478,二,名詞,数詞,*,*,*,*,ニ,二,*,A,*,*,*,*
三,9,9,2478,三,名詞,数詞,*,*,*,*,サン,三,*,A,*,*,*,*
四,9,9,2478,四,名詞,数詞,*,*,*,*,ヨン,四,*,A,*,*,*,*
五,9,9,2478,五,名詞,数詞,*,*,*,*,ゴ,五,*,A,*,*,*,*
六,9,9,2478,六,名詞,数詞,*,*,*,*,ロク,六,*,A,*,*,*,*
七,9,9,2478,七,名詞,数詞,*,*,*,*,ナナ,七,*,A,*,*,*,*
八,9,9,2478,八,名詞,数詞,*,*,*,*,ハチ,八,*,A,*,*,*,*
九,9,9,2478,九,名詞,数詞,*,*,*,*,キュウ,九,*,A,*,*,*,*
六三四,6,6,0,六三四,名詞,固有名詞,地名,一般,*,*,ムサシ,六三四,*,A,*,*,*,*
いく,4,4,5105,いく,動詞,非自立可能,*,*,五段-カ行,終止形-一般,イク,行く,*,A,*,*,*,*
いっ,5,5,5122,いっ,動詞,非自立可能,*,*,五段-カ行,連用形-促音便,イッ,行く,34,A,*,*,*,*
012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789,9,9,2478,012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789,名詞,数詞,*,*,*,*,ゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウゼロイチニサンヨンゴロクナナハチキュウ,012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789,*,A,*,*,*,*
特a,8,8,2914,特a,名詞,普通名詞,一般,*,*,*,トクエー,特a,*,A,*,*,*,*
な。な,8,8,2914,な。な,名詞,普通名詞,一般,*,*,*,ナナ,な。な,*,A,*,*,*,*
101 changes: 101 additions & 0 deletions tests/resources/dict/matrix.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
10 10
0 0 0
0 1 863
0 2 2124
0 3 1032
0 4 591
0 5 -162
0 6 -79
0 7 887
0 8 447
0 9 -535
1 0 -3689
1 1 -3361
1 2 -7643
1 3 -3267
1 4 809
1 5 -1098
1 6 4606
1 7 4269
1 8 4567
1 9 1635
2 0 -1959
2 1 2457
2 2 811
2 3 840
2 4 903
2 5 -958
2 6 517
2 7 2037
2 8 1392
2 9 -193
3 0 -2288
3 1 1741
3 2 487
3 3 792
3 4 -1474
3 5 -3429
3 6 126
3 7 437
3 8 605
3 9 -547
4 0 -2809
4 1 -3584
4 2 -6743
4 3 -2869
4 4 -2805
4 5 -407
4 6 3422
4 7 5642
4 8 6382
4 9 2165
5 0 -509
5 1 -3665
5 2 -3882
5 3 -572
5 4 -1036
5 5 -54
5 6 2570
5 7 3319
5 8 4059
5 9 882
6 0 101
6 1 2933
6 2 2198
6 3 -2004
6 4 4392
6 5 4017
6 6 569
6 7 475
6 8 -390
6 9 852
7 0 -852
7 1 2079
7 2 1180
7 3 -3084
7 4 2010
7 5 1570
7 6 746
7 7 2341
7 8 2051
7 9 1393
8 0 -522
8 1 3354
8 2 2037
8 3 -2542
8 4 3071
8 5 2631
8 6 -352
8 7 2847
8 8 1134
8 9 1256
9 0 -975
9 1 2498
9 2 1690
9 3 -1523
9 4 3023
9 5 3139
9 6 2562
9 7 3962
9 8 418
9 9 -2490
4 changes: 4 additions & 0 deletions tests/resources/dict/user.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ぴらる,8,8,-32768,ぴらる,名詞,普通名詞,一般,*,*,*,ピラル,ぴらる,*,A,*,*,*,*
府,8,8,2914,府,名詞,普通名詞,一般,*,*,*,フ,府,*,A,*,*,*,*
東京府,6,6,2816,東京府,名詞,固有名詞,地名,一般,*,*,トウキョウフ,東京府,*,B,5/U1,*,5/U1,1/3
すだち,6,6,2816,すだち,被子植物門,双子葉植物綱,ムクロジ目,ミカン科,ミカン属,スダチ,スダチ,すだち,*,A,*,*,*,*
2 changes: 2 additions & 0 deletions tests/resources/dict/user2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
ぴさる,8,8,-32768,ぴさる,名詞,普通名詞,一般,*,*,*,ピサル,ぴさる,*,A,*,*,*,*
かぼす,6,6,2816,かぼす,被子植物門,双子葉植物綱,ムクロジ目,ミカン科,ミカン属,カボス,カボス,かぼす,*,A,*,*,*,*
14 changes: 14 additions & 0 deletions tests/test_switchdictionary.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright (c) 2019 Works Applications Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import json
import os
import shutil
Expand Down

0 comments on commit d4699a3

Please sign in to comment.