-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #7 from kyohsuke/bump_up_ruby_2_2_x
convert ruby 2.2.x
- Loading branch information
Showing
10 changed files
with
239 additions
and
225 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -322,4 +322,5 @@ notes 辞書に転用できますし、やり方によっては SKK 以外にも | |
こうじゅつろうどく /口述朗読;‖<autogen>,名詞-サ変接続/ | ||
|
||
## 著者 | ||
三田祐介 <[email protected]> | ||
|
||
三田祐介 < clefs<span></span>@mail.goo.ne.jp > |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
#!/usr/local/bin/ruby -Ke | ||
# -*- coding: euc-jp -*- | ||
#!/usr/bin/env ruby | ||
# -*- coding: utf-8 -*- | ||
## Copyright (C) 2005 MITA Yuusuke <[email protected]> | ||
## | ||
## Author: MITA Yuusuke <[email protected]> | ||
|
@@ -32,11 +32,11 @@ | |
## | ||
## % aozora2skk.rb file-from-aozora-bunko.html > result.txt | ||
## | ||
# ○ | ||
require 'jcode' if RUBY_VERSION.to_f < 1.9 | ||
#require 'kconv' | ||
#require 'skkdictools' | ||
# ○ | ||
|
||
Encoding.default_external = "euc-jis-2004" | ||
require 'optparse' | ||
|
||
opt = OptionParser.new | ||
|
||
results = [] | ||
|
@@ -45,18 +45,20 @@ | |
opt.on('-a', 'append annotation <autogen - aozora>') { note = true } | ||
begin | ||
opt.parse!(ARGV) | ||
rescue OptionParser::InvalidOption => e | ||
rescue OptionParser::InvalidOption | ||
print "'#{$0} -h' for help.\n" | ||
exit 1 | ||
end | ||
|
||
|
||
|
||
while gets | ||
$_.encode!("utf-8") | ||
$_.gsub!(/<[^>]*>/, '') | ||
results = results + $_.scan(/([亜-熙]{2,})[ ]*[\[(([〔【]([ぁ-ん]*)[\]))〕]】]/) | ||
results = results + $_.scan(/([亜-熙]{2,})[ ]*[\[(([〔【]([ぁ-ん]*)[\]))〕]】]/) | ||
end | ||
|
||
results.uniq! | ||
results.each {|word,yomi| | ||
print "#{yomi} /#{word}#{note ? ';∥<autogen - aozora>' : ''}/\n" | ||
print "#{yomi} /#{word}#{note ? ';‖<autogen - aozora>' : ''}/\n" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
#!/usr/bin/env ruby | ||
# -*- coding: euc-jp -*- | ||
# -*- coding: utf-8 -*- | ||
# canna2skk.rb -- convert Canna dictionary to SKK-JISYO format. | ||
# | ||
# Copyright (C) 2003 NAKAJIMA Mikio <[email protected]> | ||
|
@@ -33,8 +33,9 @@ | |
# $ canna2skk.rb gcanna.t gcannaf.t > tmp.jisyo | ||
# $ skkdic-expr2 tmp.jisyo > SKK-JISYO.canna | ||
# | ||
# ¤«¤ó #JS*8 ´¬ #CNSUC2*2 ´Ö #JS ´Ì ´Ó #JSSUC ´Ö | ||
# かん #JS*8 巻 #CNSUC2*2 間 #JS 缶 貫 #JSSUC 間 | ||
|
||
Encoding.default_external = "euc-jis-2004" | ||
file = ARGV.shift | ||
open(file).each{|line| | ||
if !(line =~ /([^ ]+) (.+) *$/) | ||
|
@@ -44,9 +45,9 @@ | |
words = $2 | ||
words.split(' ').each{|word| | ||
if (word =~ /[#*a-zA-Z0-9]+/ || key == word) | ||
next | ||
next | ||
else | ||
print key, " /", word, "/\n" | ||
print key, " /", word, "/\n" | ||
end | ||
} | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
#!/usr/local/bin/ruby -Ke | ||
# -*- coding: euc-jp -*- | ||
#!/usr/bin/env ruby | ||
# -*- coding: utf-8 -*- | ||
|
||
## Copyright (C) 2005 MITA Yuusuke <[email protected]> | ||
## | ||
## Author: MITA Yuusuke <[email protected]> | ||
|
@@ -36,19 +37,15 @@ | |
## | ||
## skkdictools.rb required. | ||
## | ||
## TODO: pick up compound-verbs, eg. 「舞い散る」 | ||
## 舞い マイ 舞う 動詞-自立 五段・ワ行促音便 連用形 | ||
## 散る チル 散る 動詞-自立 五段・ラ行 基本形 | ||
## TODO: pick up compound-verbs, eg. 「舞い散る」 | ||
## 舞い マイ 舞う 動詞-自立 五段・ワ行促音便 連用形 | ||
## 散る チル 散る 動詞-自立 五段・ラ行 基本形 | ||
## | ||
require 'jcode' if RUBY_VERSION.to_f < 1.9 | ||
require 'kconv' | ||
require 'skkdictools' | ||
|
||
#require 'cgi' | ||
#require 'socket' | ||
#require 'timeout' | ||
|
||
Encoding.default_external = "euc-jis-2004" | ||
require_relative 'skkdictools' | ||
require 'optparse' | ||
|
||
opt = OptionParser.new | ||
|
||
katakana_words = false | ||
|
@@ -77,62 +74,63 @@ | |
|
||
begin | ||
opt.parse!(ARGV) | ||
rescue OptionParser::InvalidOption => e | ||
rescue OptionParser::InvalidOption | ||
print "'#{$0} -h' for help.\n" | ||
exit 1 | ||
end | ||
|
||
#keyword_pat = Regexp.compile("[亜-熙]*#{keyword}[亜-熙]*") | ||
#keyword_pat = Regexp.compile("[亜-熙]*#{keyword}[亜-熙]*") | ||
|
||
count = 0 | ||
#key = word = last_key = last_word = last_part = "" | ||
key = word = last_part = "" | ||
poisoned = terminate = false | ||
|
||
while gets | ||
midasi, yomi, root, part, conj = $_.split(" ", 5) | ||
#if midasi !~ /^[亜-熙ァ-ンヴー]+$/ || terminate | ||
if (midasi !~ /^[亜-熙ァ-ンヴー々]+$/ && | ||
(!allow_noun_chains || part !~ /名詞/ || part =~ /非自立/ || | ||
midasi !~ /^[亜-熙ァ-ンヴー々ぁ-ん]+$/ )) || terminate | ||
#if (midasi !~ /^[亜-熙ァ-ンヴー]+$/ && conj !~ /連用形/) || terminate | ||
$_.encode!("utf-8") | ||
midasi, yomi, _root, part, _conj = $_.split(" ", 5) | ||
#if midasi !~ /^[亜-熙ァ-ンヴー]+$/ || terminate | ||
if (midasi !~ /^[亜-熙ァ-ンヴー々]+$/ && | ||
(!allow_noun_chains || part !~ /名詞/ || part =~ /非自立/ || | ||
midasi !~ /^[亜-熙ァ-ンヴー々ぁ-ん]+$/ )) || terminate | ||
#if (midasi !~ /^[亜-熙ァ-ンヴー]+$/ && conj !~ /連用形/) || terminate | ||
#next if count < 1 | ||
if count < 1 | ||
next if !handle_prefix | ||
if part =~ /接頭詞/ | ||
# kludge - keep prefix w/o increasing count (cf.「ご立派」「お味噌」) | ||
key = yomi.to_hiragana | ||
word = midasi | ||
last_part = part | ||
#elsif part =~ /自立/ && conj =~ /連用形/ | ||
# hogehoge | ||
if part =~ /接頭詞/ | ||
# kludge - keep prefix w/o increasing count (cf.「ご立派」「お味噌」) | ||
key = yomi.to_hiragana | ||
word = midasi | ||
last_part = part | ||
#elsif part =~ /自立/ && conj =~ /連用形/ | ||
# hogehoge | ||
else | ||
key = word = last_part = "" | ||
key = word = last_part = "" | ||
end | ||
next | ||
end | ||
|
||
if midasi =~ /^[^亜-熙ァ-ンヴー々]+$/ && !terminate | ||
if midasi =~ /^[^亜-熙ァ-ンヴー々]+$/ && !terminate | ||
# nothing | ||
else | ||
if part =~ /接続詞|接頭詞|副詞[^可]/ | ||
# nothing - decline some parts | ||
elsif midasi =~ /並び|及び/ | ||
# nothing - (HACK) decline conjonctions that ChaSen overlooks | ||
elsif midasi =~ /^[ぁ-ん]+[亜-熙ァ-ンヴー々]+/ | ||
# nothing - this applies to quasi-words such as: | ||
# に関する ニカンスル に関する 助詞-格助詞-連語 | ||
if part =~ /接続詞|接頭詞|副詞[^可]/ | ||
# nothing - decline some parts | ||
elsif midasi =~ /並び|及び/ | ||
# nothing - (HACK) decline conjonctions that ChaSen overlooks | ||
elsif midasi =~ /^[ぁ-ん]+[亜-熙ァ-ンヴー々]+/ | ||
# nothing - this applies to quasi-words such as: | ||
# に関する ニカンスル に関する 助詞-格助詞-連語 | ||
else | ||
key += yomi.to_hiragana | ||
word += midasi | ||
last_part = part | ||
# asayaKify here? | ||
key += yomi.to_hiragana | ||
word += midasi | ||
last_part = part | ||
# asayaKify here? | ||
end | ||
end | ||
|
||
if word =~ /^[ぁ-んー]+$/ | ||
if word =~ /^[ぁ-んー]+$/ | ||
# nothing | ||
elsif !katakana_words && word =~ /^[ァ-ンヴー]+$/ | ||
elsif !katakana_words && word =~ /^[ァ-ンヴー]+$/ | ||
# nothing | ||
elsif !keyword.empty? && !word.include?(keyword) | ||
# nothing | ||
|
@@ -147,19 +145,19 @@ | |
count = 0 | ||
|
||
else | ||
if count > 0 && part =~ /接続詞|接頭詞|副詞[^可]/ | ||
if count > 0 && part =~ /接続詞|接頭詞|副詞[^可]/ | ||
terminate = true | ||
redo | ||
elsif count == 0 && part =~ /接尾/ | ||
# avoid generating 「回大会」 from 「第3回大会」 | ||
# 回 カイ 回 名詞-接尾-助数詞 | ||
elsif count == 0 && part =~ /接尾/ | ||
# avoid generating 「回大会」 from 「第3回大会」 | ||
# 回 カイ 回 名詞-接尾-助数詞 | ||
key = word = last_part = "" | ||
next | ||
end | ||
count += 1 | ||
key += yomi.to_hiragana | ||
word += midasi | ||
last_part = part | ||
poisoned = true if part =~ /未知語/ | ||
poisoned = true if part =~ /未知語/ | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
#!/usr/bin/env ruby | ||
# -*- coding: utf-8 -*- | ||
|
||
# dic-it2skk.rb -- convert dic-it dictionary to SKK-JISYO format. | ||
# | ||
# Copyright (C) 2003 NAKAJIMA Mikio <[email protected]> | ||
|
@@ -32,6 +34,7 @@ | |
# $ dic-it2skk.rb dic-it.txt > tmp.jisyo | ||
# $ skkdic-expr2 tmp.jisyo > SKK-JISYO.dic-it | ||
# | ||
Encoding.default_external = "euc-jis-2004" | ||
file = ARGV.shift | ||
open(file).each{|line| | ||
if !(line =~ /([^ \/]+)\/([^ ]+) *$/) | ||
|
Oops, something went wrong.