diff --git a/convert2skk/README.md b/convert2skk/README.md index ad4281d..27971a1 100644 --- a/convert2skk/README.md +++ b/convert2skk/README.md @@ -322,4 +322,5 @@ notes 辞書に転用できますし、やり方によっては SKK 以外にも こうじゅつろうどく /口述朗読;‖,名詞-サ変接続/ ## 著者 -三田祐介 + +三田祐介 < clefs@mail.goo.ne.jp > diff --git a/convert2skk/aozora2skk.rb b/convert2skk/aozora2skk.rb index 3fe63f3..d3f0a09 100755 --- a/convert2skk/aozora2skk.rb +++ b/convert2skk/aozora2skk.rb @@ -1,5 +1,5 @@ -#!/usr/local/bin/ruby -Ke -# -*- coding: euc-jp -*- +#!/usr/bin/env ruby +# -*- coding: utf-8 -*- ## Copyright (C) 2005 MITA Yuusuke ## ## Author: MITA Yuusuke @@ -32,11 +32,11 @@ ## ## % aozora2skk.rb file-from-aozora-bunko.html > result.txt ## -# -require 'jcode' if RUBY_VERSION.to_f < 1.9 -#require 'kconv' -#require 'skkdictools' +# ○ + +Encoding.default_external = "euc-jis-2004" require 'optparse' + opt = OptionParser.new results = [] @@ -45,18 +45,20 @@ opt.on('-a', 'append annotation ') { note = true } begin opt.parse!(ARGV) -rescue OptionParser::InvalidOption => e +rescue OptionParser::InvalidOption print "'#{$0} -h' for help.\n" exit 1 end + while gets + $_.encode!("utf-8") $_.gsub!(/<[^>]*>/, '') - results = results + $_.scan(/([-]{2,})[ ]*[\[(ʡΡ̡]([-]*)[\])ˡ͡ϡ]/) + results = results + $_.scan(/([亜-熙]{2,})[  ]*[\[(([〔【]([ぁ-ん]*)[\]))〕]】]/) end results.uniq! results.each {|word,yomi| - print "#{yomi} /#{word}#{note ? ';' : ''}/\n" + print "#{yomi} /#{word}#{note ? ';‖' : ''}/\n" } diff --git a/convert2skk/canna2skk.rb b/convert2skk/canna2skk.rb index 6113d9e..55dc300 100755 --- a/convert2skk/canna2skk.rb +++ b/convert2skk/canna2skk.rb @@ -1,5 +1,5 @@ #!/usr/bin/env ruby -# -*- coding: euc-jp -*- +# -*- coding: utf-8 -*- # canna2skk.rb -- convert Canna dictionary to SKK-JISYO format. # # Copyright (C) 2003 NAKAJIMA Mikio @@ -33,8 +33,9 @@ # $ canna2skk.rb gcanna.t gcannaf.t > tmp.jisyo # $ skkdic-expr2 tmp.jisyo > SKK-JISYO.canna # -# #JS*8 #CNSUC2*2 #JS #JSSUC +# かん #JS*8 巻 #CNSUC2*2 間 #JS 缶 貫 #JSSUC 間 +Encoding.default_external = "euc-jis-2004" file = ARGV.shift open(file).each{|line| if !(line =~ /([^ ]+) (.+) *$/) @@ -44,9 +45,9 @@ words = $2 words.split(' ').each{|word| if (word =~ /[#*a-zA-Z0-9]+/ || key == word) - next + next else - print key, " /", word, "/\n" + print key, " /", word, "/\n" end } end diff --git a/convert2skk/chasen2skk.rb b/convert2skk/chasen2skk.rb index a82596c..0b76e34 100755 --- a/convert2skk/chasen2skk.rb +++ b/convert2skk/chasen2skk.rb @@ -1,5 +1,6 @@ -#!/usr/local/bin/ruby -Ke -# -*- coding: euc-jp -*- +#!/usr/bin/env ruby +# -*- coding: utf-8 -*- + ## Copyright (C) 2005 MITA Yuusuke ## ## Author: MITA Yuusuke @@ -36,19 +37,15 @@ ## ## skkdictools.rb required. ## -## TODO: pick up compound-verbs, eg. 񤤻 -## ޥ ư-Ω ʡ¥ Ϣѷ -## ư-Ω ʡ ܷ +## TODO: pick up compound-verbs, eg. 「舞い散る」 +## 舞い マイ 舞う 動詞-自立 五段・ワ行促音便 連用形 +## 散る チル 散る 動詞-自立 五段・ラ行 基本形 ## -require 'jcode' if RUBY_VERSION.to_f < 1.9 -require 'kconv' -require 'skkdictools' - -#require 'cgi' -#require 'socket' -#require 'timeout' +Encoding.default_external = "euc-jis-2004" +require_relative 'skkdictools' require 'optparse' + opt = OptionParser.new katakana_words = false @@ -77,12 +74,12 @@ begin opt.parse!(ARGV) -rescue OptionParser::InvalidOption => e +rescue OptionParser::InvalidOption print "'#{$0} -h' for help.\n" exit 1 end -#keyword_pat = Regexp.compile("[-]*#{keyword}[-]*") +#keyword_pat = Regexp.compile("[亜-熙]*#{keyword}[亜-熙]*") count = 0 #key = word = last_key = last_word = last_part = "" @@ -90,49 +87,50 @@ poisoned = terminate = false while gets - midasi, yomi, root, part, conj = $_.split(" ", 5) - #if midasi !~ /^[--]+$/ || terminate - if (midasi !~ /^[--]+$/ && - (!allow_noun_chains || part !~ /̾/ || part =~ /Ω/ || - midasi !~ /^[---]+$/ )) || terminate - #if (midasi !~ /^[--]+$/ && conj !~ /Ϣѷ/) || terminate + $_.encode!("utf-8") + midasi, yomi, _root, part, _conj = $_.split(" ", 5) + #if midasi !~ /^[亜-熙ァ-ンヴー]+$/ || terminate + if (midasi !~ /^[亜-熙ァ-ンヴー々]+$/ && + (!allow_noun_chains || part !~ /名詞/ || part =~ /非自立/ || + midasi !~ /^[亜-熙ァ-ンヴー々ぁ-ん]+$/ )) || terminate + #if (midasi !~ /^[亜-熙ァ-ンヴー]+$/ && conj !~ /連用形/) || terminate #next if count < 1 if count < 1 next if !handle_prefix - if part =~ /Ƭ/ - # kludge - keep prefix w/o increasing count (cf.֤Ωɡס֤̣) - key = yomi.to_hiragana - word = midasi - last_part = part - #elsif part =~ /Ω/ && conj =~ /Ϣѷ/ - # hogehoge + if part =~ /接頭詞/ + # kludge - keep prefix w/o increasing count (cf.「ご立派」「お味噌」) + key = yomi.to_hiragana + word = midasi + last_part = part + #elsif part =~ /自立/ && conj =~ /連用形/ + # hogehoge else - key = word = last_part = "" + key = word = last_part = "" end next end - if midasi =~ /^[^--]+$/ && !terminate + if midasi =~ /^[^亜-熙ァ-ンヴー々]+$/ && !terminate # nothing else - if part =~ /³|Ƭ|[^]/ - # nothing - decline some parts - elsif midasi =~ /¤|ڤ/ - # nothing - (HACK) decline conjonctions that ChaSen overlooks - elsif midasi =~ /^[-]+[--]+/ - # nothing - this applies to quasi-words such as: - # ˴ؤ ˥󥹥 ˴ؤ -ʽ-Ϣ + if part =~ /接続詞|接頭詞|副詞[^可]/ + # nothing - decline some parts + elsif midasi =~ /並び|及び/ + # nothing - (HACK) decline conjonctions that ChaSen overlooks + elsif midasi =~ /^[ぁ-ん]+[亜-熙ァ-ンヴー々]+/ + # nothing - this applies to quasi-words such as: + # に関する ニカンスル に関する 助詞-格助詞-連語 else - key += yomi.to_hiragana - word += midasi - last_part = part - # asayaKify here? + key += yomi.to_hiragana + word += midasi + last_part = part + # asayaKify here? end end - if word =~ /^[-]+$/ + if word =~ /^[ぁ-んー]+$/ # nothing - elsif !katakana_words && word =~ /^[-]+$/ + elsif !katakana_words && word =~ /^[ァ-ンヴー]+$/ # nothing elsif !keyword.empty? && !word.include?(keyword) # nothing @@ -147,12 +145,12 @@ count = 0 else - if count > 0 && part =~ /³|Ƭ|[^]/ + if count > 0 && part =~ /接続詞|接頭詞|副詞[^可]/ terminate = true redo - elsif count == 0 && part =~ // - # avoid generating ֲ from 裳 - # ̾-- + elsif count == 0 && part =~ /接尾/ + # avoid generating 「回大会」 from 「第3回大会」 + # 回 カイ 回 名詞-接尾-助数詞 key = word = last_part = "" next end @@ -160,6 +158,6 @@ key += yomi.to_hiragana word += midasi last_part = part - poisoned = true if part =~ /̤θ/ + poisoned = true if part =~ /未知語/ end end diff --git a/convert2skk/ctdicconv.rb b/convert2skk/ctdicconv.rb index cf5862b..4ca0019 100755 --- a/convert2skk/ctdicconv.rb +++ b/convert2skk/ctdicconv.rb @@ -1,6 +1,5 @@ -#!/usr/bin/ruby -Ke -# -*- coding: euc-jp -*- -require 'jcode' if RUBY_VERSION.to_f < 1.9 +#!/usr/bin/env ruby +# -*- coding: utf-8 -*- # ctdicconv.rb -- convert china_taiwan.csv to SKK-JISYO dictionary format. # @@ -30,10 +29,11 @@ # # Commentary: +Encoding.default_external = "euc-jis-2004" $ANNOTATION = true ##$ANNOTATION = false -# from ֥֥Ȼظץȸrubyp121 +# from 「オブジェクト指向スクリプト言語ruby」p121 def csv_split(source, delimiter = ',') csv = [] data = "" @@ -45,8 +45,8 @@ def csv_split(source, delimiter = ',') end if /^"/ =~ data if /[^"]"$/ =~ data or '""' == data - csv << data.sub(/^"(.*)"$/, '\1').gsub(/""/, '"') - data = '' + csv << data.sub(/^"(.*)"$/, '\1').gsub(/""/, '"') + data = '' end else csv << d @@ -60,7 +60,7 @@ def csv_split(source, delimiter = ',') file = ARGV.shift if not file - print "եꤷƲ\n" + print "ファイルを指定して下さい\n" else first = true File.foreach(file) do |line| @@ -68,24 +68,24 @@ def csv_split(source, delimiter = ',') first = false next end - #,,Ѹ츫Ф,,ܸɤ,ɤߡʥʡ,Ѹɸ2,̾,̾ɤ,,ɤ,annotation - c_t,d,e_key,kanji,j_key,c_key,english,kanji_alias,kanji_alias_key,capital,capital_key,annotation= csv_split(line.chomp) + #中国・台湾,種別,英語見出し,漢字,日本語読み,中国語読み(カタカナ),英語標記2,漢字別名,漢字別名読み,省都,省都読み,annotation + _c_t, _d,e_key,kanji,j_key,c_key,_english,kanji_alias,kanji_alias_key,_capital,_capital_key,annotation= csv_split(line.chomp) if (e_key && !e_key.empty? && kanji && !kanji.empty?) e_key.strip! kanji.strip! - # Ѹ츫Ф // + # 英語見出し /漢字/ if ($ANNOTATION && annotation && !annotation.empty?) - annotation.strip! + annotation.strip! print e_key, " /", kanji, ";", annotation, "/\n" else print e_key, " /", kanji, "/\n" end - # ܸ츫Ф /Capitalized Ѹ/ + # 日本語見出し /Capitalized 英語/ if (j_key && !j_key.empty?) - j_key.strip! + j_key.strip! if ($ANNOTATION && annotation && !annotation.empty?) - annotation.strip! + annotation.strip! print j_key, " /", e_key.capitalize, ";", annotation, "/\n" else print j_key, " /", e_key.capitalize, "/\n" @@ -94,35 +94,35 @@ def csv_split(source, delimiter = ',') end if (j_key && !j_key.empty? && kanji && !kanji.empty?) - # ܸ츫Ф // + # 日本語見出し /漢字/ if ($ANNOTATION && annotation && !annotation.empty?) - annotation.strip! - print j_key, " /", kanji, ";", annotation, "/\n" + annotation.strip! + print j_key, " /", kanji, ";", annotation, "/\n" else - print j_key, " /", kanji, "/\n" + print j_key, " /", kanji, "/\n" end end if (c_key && !c_key.empty? && kanji && !kanji.empty?) c_key.strip! - c_key.tr!("-", "-") - # 츫Ф // + c_key.tr!("ァ-ン", "ぁ-ん") + # 中国語見出し /漢字/ if ($ANNOTATION && annotation && !annotation.empty?) - print c_key, " /", kanji, ";", annotation, "/\n" + print c_key, " /", kanji, ";", annotation, "/\n" else - print c_key, " /", kanji, "/\n" + print c_key, " /", kanji, "/\n" end end - # ̾Ф /̾/ + # 漢字別名見出し /漢字別名/ if (kanji_alias && kanji_alias_key && - !kanji_alias.empty? && !kanji_alias_key.empty?) + !kanji_alias.empty? && !kanji_alias_key.empty?) if ($ANNOTATION && annotation && !annotation.empty?) - print kanji_alias_key, " /", kanji_alias, ";", annotation, "/\n" + print kanji_alias_key, " /", kanji_alias, ";", annotation, "/\n" else - print kanji_alias_key, " /", kanji_alias, "/\n" + print kanji_alias_key, " /", kanji_alias, "/\n" end end - # ԸФ // + # 省都見出し /省都/ #if (capital && capital_key && # !capital.empty? && !capital_key.empty?) # print capital_key, " /", capital, "/\n" diff --git a/convert2skk/dic-it2skk.rb b/convert2skk/dic-it2skk.rb index a8bd08e..cba4b90 100755 --- a/convert2skk/dic-it2skk.rb +++ b/convert2skk/dic-it2skk.rb @@ -1,4 +1,6 @@ #!/usr/bin/env ruby +# -*- coding: utf-8 -*- + # dic-it2skk.rb -- convert dic-it dictionary to SKK-JISYO format. # # Copyright (C) 2003 NAKAJIMA Mikio @@ -32,6 +34,7 @@ # $ dic-it2skk.rb dic-it.txt > tmp.jisyo # $ skkdic-expr2 tmp.jisyo > SKK-JISYO.dic-it # +Encoding.default_external = "euc-jis-2004" file = ARGV.shift open(file).each{|line| if !(line =~ /([^ \/]+)\/([^ ]+) *$/) diff --git a/convert2skk/ipadic2skk.rb b/convert2skk/ipadic2skk.rb index bfd447c..6c3b34f 100755 --- a/convert2skk/ipadic2skk.rb +++ b/convert2skk/ipadic2skk.rb @@ -1,5 +1,6 @@ -#!/usr/local/bin/ruby -Ke -# -*- coding: euc-jp -*- +#!/usr/bin/env ruby +# -*- coding: utf-8 -*- + ## Copyright (C) 2005 MITA Yuusuke ## ## Author: MITA Yuusuke @@ -38,10 +39,11 @@ ## ## NOTE: skkdictools.rb should be in the ruby loadpaths to have this work. ## -require 'jcode' if RUBY_VERSION.to_f < 1.9 -#require 'kconv' -require 'skkdictools' + +Encoding.default_external = "euc-jis-2004" +require_relative 'skkdictools' require 'optparse' + opt = OptionParser.new skip_identical = true skip_hira2kana = true @@ -51,60 +53,62 @@ opt.on('-a', "convert Asayake into AsayaKe") { asayake_mode = "convert" } opt.on('-A', "both Asayake and AsayaKe are output") { asayake_mode = "both" } opt.on('-g', "append grammatical annotations") { grammar = true } -opt.on('-k', "generate hiragana-to-katakana pairs (֤ͤ /ͥ/)") { skip_hira2kana = false } -opt.on('-K', "generate identical pairs (֤ͤ /ͤ/)") { skip_identical = false } +opt.on('-k', "generate hiragana-to-katakana pairs (「ねこ /ネコ/」)") { skip_hira2kana = false } +opt.on('-K', "generate identical pairs (「ねこ /ねこ/」)") { skip_identical = false } begin opt.parse!(ARGV) -rescue OptionParser::InvalidOption => e +rescue OptionParser::InvalidOption print "'#{$0} -h' for help.\n" exit 1 end while gets + $_.encode!("utf-8") + #line = $_.toeuc - next if $_ !~ /^\(ʻ \(([^)]*)\)\) \(\(Ф \(([^ ]*) [0-9]*\)\) \(ɤ ([^ ]*)\)/ - # (ʻ (̾ )) ((Ф (ز 3999)) (ɤ å) (ȯ å) ) + next if $_ !~ /^\(品詞 \(([^)]*)\)\) \(\(見出し語 \(([^ ]*) [0-9]*\)\) \(読み ([^ ]*)\)/ + # (品詞 (名詞 一般)) ((見出し語 (学課 3999)) (読み ガッカ) (発音 ガッカ) ) next if skip_hira2kana && $2 == $3 hinsi = $1 candidate = $2 - key = $3.tr('-', '-').gsub(//, '') + key = $3.tr('ァ-ン', 'ぁ-ん').gsub(/ヴ/, 'う゛') next if skip_identical && key == candidate conjugation = nil - if grammar && $_ =~ /\(ѷ ([^)]*)\) \)$/ - # (ѷ ʡ¥) ) - conjugation = $1.sub(/^(..)([-])/, '\2\1 ') + if grammar && $_ =~ /\(活用型 ([^)]*)\) \)$/ + # (活用型 五段・ワ行促音便) ) + conjugation = $1.sub(/^(..)・([ア-ン]行)/, '\2\1 ') end comment = nil if grammar comment = hinsi comment += " " + conjugation if !conjugation.nil? - if hinsi =~ /Ƭ/ - if hinsi =~ /³/ - # generate "#0"; complete-numerative.rb should do the rest - candidate += "#0" - key += "#" + if hinsi =~ /接頭詞/ + if hinsi =~ /数接続/ + # generate "#0"; complete-numerative.rb should do the rest + candidate += "#0" + key += "#" else - comment += "[>]" + comment += "[φ>]" end - elsif hinsi =~ // - if hinsi =~ // - comment += "[#]" + elsif hinsi =~ /接尾/ + if hinsi =~ /助数詞/ + comment += "[φ#]" else - comment += "[<]" + comment += "[φ<]" end end end tail = "" - if key =~ /^\{(.*)\}([-]*)$/ + if key =~ /^\{(.*)\}([ぁ-ん]*)$/ tail = $2 - # (ɤ {ͥ/ͥ}) + # (読み {チネツ/ジネツ}) keys = $1.split("/") else - keys = key + keys = [key] end keys.each do |midasi| @@ -115,40 +119,39 @@ if asayake_mode != "none" new_midasi, new_candidate, postfix = okuri_nasi_to_ari(midasi, candidate) if !new_midasi.nil? - comment_extra = "" - if grammar - comment_extra += "[iks(gm)]" if postfix == "" && hinsi =~ /ƻ/ - - comment_extra += "[wiueot(c)]" if postfix == "" && conjugation =~ /Ը/ - comment_extra += "[gi]" if postfix == "" && conjugation =~ /Ը/ - comment_extra += "[mn]" if postfix == "" && conjugation =~ /޹Ը/ - comment_extra += "[*]" if postfix == "" && conjugation =~ // - comment_extra += "[rt(cn)]" if postfix == "" && conjugation =~ /Ը/ - # this can be of problem - comment_extra += "[a-z]" if postfix == "" && conjugation =~ // - - #comment_extra += "[ki]" if postfix == "" && conjugation =~ /Ը/ - if postfix == "" && conjugation =~ /Ը/ - #if new_candidate =~ /$/ - if new_midasi =~ /k$/ - comment_extra += "[ktc]" - elsif new_midasi =~ /k$/ - comment_extra += "[k]" - else - comment_extra += "[ki]" - end - end - - comment_extra += "(-#{postfix})" - #print_orig = false if !comment_extra.empty? - print_orig = false if hinsi =~ /ư|ƻ/ - end - print_pair(new_midasi, new_candidate, nil, - comment.delete("") + comment_extra) - print_orig = false if asayake_mode != "both" + comment_extra = "" + if grammar + comment_extra += "[iks(gm)]" if postfix == "い" && hinsi =~ /形容詞/ + + comment_extra += "[wiueot(c)]" if postfix == "う" && conjugation =~ /ワ行五段/ + comment_extra += "[gi]" if postfix == "ぐ" && conjugation =~ /ガ行五段/ + comment_extra += "[mn]" if postfix == "む" && conjugation =~ /マ行五段/ + comment_extra += "[*]" if postfix == "る" && conjugation =~ /カ変/ + comment_extra += "[rt(cn)]" if postfix == "る" && conjugation =~ /ラ行五段/ + # this can be of problem + comment_extra += "[a-z]" if postfix == "る" && conjugation =~ /一段/ + + #comment_extra += "[ki]" if postfix == "く" && conjugation =~ /カ行五段/ + if postfix == "く" && conjugation =~ /カ行五段/ + #if new_candidate =~ /行$/ + if new_midasi =~ /いk$/ + comment_extra += "[ktc]" + elsif new_midasi =~ /ゆk$/ + comment_extra += "[k]" + else + comment_extra += "[ki]" + end + end + + comment_extra += "(-#{postfix})" + #print_orig = false if !comment_extra.empty? + print_orig = false if hinsi =~ /動詞|形容詞/ + end + print_pair(new_midasi, new_candidate, nil, comment.delete("φ") + comment_extra) + print_orig = false if asayake_mode != "both" else - comment += "[dn(s)]" if hinsi =~ /ư촴/ - comment += "[s]" if hinsi =~ /³/ + comment += "[φdn(s)]" if hinsi =~ /形容動詞語幹/ + comment += "[φs]" if hinsi =~ /サ変接続/ end end print_pair(midasi, candidate, nil, grammar ? comment : nil) if print_orig diff --git a/convert2skk/prime2skk.rb b/convert2skk/prime2skk.rb index da66b72..5fc2d45 100755 --- a/convert2skk/prime2skk.rb +++ b/convert2skk/prime2skk.rb @@ -1,5 +1,6 @@ -#!/usr/local/bin/ruby -Ke -# -*- coding: euc-jp -*- +#!/usr/bin/env ruby +# -*- coding: utf-8 -*- + ## Copyright (C) 2005 MITA Yuusuke ## ## Author: MITA Yuusuke @@ -36,9 +37,9 @@ ## ## NOTE: skkdictools.rb should be in one of the ruby loadpaths. ## -require 'jcode' if RUBY_VERSION.to_f < 1.9 -#require 'kconv' -require 'skkdictools' + +Encoding.default_external = "euc-jis-2004" +require_relative 'skkdictools' require 'optparse' opt = OptionParser.new @@ -51,30 +52,32 @@ opt.on('-a', "convert Asayake into AsayaKe") { asayake_mode = "convert" } opt.on('-A', "both Asayake and AsayaKe are output") { asayake_mode = "both" } opt.on('-g', "append grammatical annotations") { grammar = true } -opt.on('-k', "generate hiragana-to-katakana pairs (֤ͤ /ͥ/)") { skip_hira2kana = false } -opt.on('-K', "generate identical pairs (֤ͤ /ͤ/)") { skip_identical = false } +opt.on('-k', "generate hiragana-to-katakana pairs (「ねこ /ネコ/」)") { skip_hira2kana = false } +opt.on('-K', "generate identical pairs (「ねこ /ねこ/」)") { skip_identical = false } opt.on('-u', "don't add original comments as annotation") { unannotate = true } begin opt.parse!(ARGV) -rescue OptionParser::InvalidOption => e +rescue OptionParser::InvalidOption print "'#{$0} -h' for help.\n" exit 1 end while gets + $_.encode!("utf-8") + #line = $_.toeuc - key, hinsi, candidate, score, notes = $_.split(" ", 5) - # 礦 ̾ 377 comment=state usage=ΤȤͻҡ־Ѳ + key, hinsi, candidate, _score, notes = $_.split(" ", 5) + # じょうたい 名詞 状態 377 comment=state usage=ものごとの様子。「状態変化」 next if skip_identical && key == candidate next if skip_hira2kana && key.to_katakana == candidate comment = nil if grammar comment = hinsi - comment += "[>]" if hinsi =~ /Ƭ/ - comment += "[#]" if hinsi =~ // - comment += "[<]" if hinsi =~ // + comment += "[φ>]" if hinsi =~ /接頭語/ + comment += "[φ#]" if hinsi =~ /助数詞/ + comment += "[φ<]" if hinsi =~ /接尾語/ end print_orig = true @@ -86,59 +89,59 @@ new_key, new_candidate, postfix = okuri_nasi_to_ari(key, candidate) if !new_key.nil? if grammar - comment_extra += "(-#{postfix})" + comment_extra += "(-#{postfix})" - if (hinsi =~ /̾/ || - hinsi =~ // || - hinsi =~ /Ϣλ/ || - hinsi =~ /θ/ ) - print_orig = true - else - print_orig = false - end + if (hinsi =~ /名詞/ || + hinsi =~ /副詞/ || + hinsi =~ /連体詞/ || + hinsi =~ /体言/ ) + print_orig = true + else + print_orig = false + end end print_pair(new_key, new_candidate, unannotate ? nil : notes, - comment.delete("") + comment_extra) + comment.delete("φ") + comment_extra) print_orig = false if asayake_mode != "both" elsif grammar # XXX XXX Unfortunately, prime-dict doesn't have data of exact # conjugation types for adjective verbs; this should yield a lot of - # unwanted okuri-ari pairs, such as ֤ɤɤn /Ʋ/(). - comment += "[dn(st)]" if hinsi =~ /ư/ - comment += "[s]" if hinsi =~ /\(\)/ + # unwanted okuri-ari pairs, such as 「どうどうn /堂々/」(タリ活用). + comment += "[φdn(st)]" if hinsi =~ /形容動詞/ + comment += "[φs]" if hinsi =~ /サ行\(する\)/ - if hinsi =~ /([-])Ը/ - okuri = GyakuhikiOkurigana.assoc($1.to_hiragana)[1] + if hinsi =~ /([ア-ン])行五段/ + okuri = GyakuhikiOkurigana.assoc($1.to_hiragana)[1] end - if hinsi =~ /ƻ/ - comment += "[iks(gm)]" - okuri = "i" - elsif hinsi =~ /Ը/ - comment += "[wiueot(c)]" - okuri = "u" - elsif hinsi =~ /Ը/ - comment += "[gi]" - elsif hinsi =~ /Ը/ - #if candidate =~ /$/ - if key =~ /$/ - comment += "[ktc]" - elsif key =~ /$/ - comment += "[k]" - else - comment += "[ki]" - end - elsif hinsi =~ /޹Ը/ - comment += "[mn]" - elsif hinsi =~ /Ը/ - comment += "[rt(cn)]" - elsif hinsi =~ /\(\)/ - comment += "[*]" - okuri = "r" - elsif hinsi =~ // - # this can be of problem - comment += "[a-z]" - okuri = "r" + if hinsi =~ /形容詞/ + comment += "[iks(gm)]" + okuri = "i" + elsif hinsi =~ /ワ行五段/ + comment += "[wiueot(c)]" + okuri = "u" + elsif hinsi =~ /ガ行五段/ + comment += "[gi]" + elsif hinsi =~ /カ行五段/ + #if candidate =~ /行$/ + if key =~ /い$/ + comment += "[ktc]" + elsif key =~ /ゆ$/ + comment += "[k]" + else + comment += "[ki]" + end + elsif hinsi =~ /マ行五段/ + comment += "[mn]" + elsif hinsi =~ /ラ行五段/ + comment += "[rt(cn)]" + elsif hinsi =~ /来\(く\)/ + comment += "[*]" + okuri = "r" + elsif hinsi =~ /一段/ + # this can be of problem + comment += "[a-z]" + okuri = "r" end end end diff --git a/convert2skk/skk-wordpicker.rb b/convert2skk/skk-wordpicker.rb index 92d75e5..2ccb444 100755 --- a/convert2skk/skk-wordpicker.rb +++ b/convert2skk/skk-wordpicker.rb @@ -1,5 +1,5 @@ -#!/usr/local/bin/ruby -Ke -# -*- coding: euc-jp -*- +#!/usr/bin/env ruby +# -*- coding: euc-jis-2004 -*- ## Copyright (C) 2005 MITA Yuusuke ## ## Author: MITA Yuusuke @@ -31,9 +31,9 @@ ## ## skkdictools.rb and KAKASI are required. ## -require 'jcode' if RUBY_VERSION.to_f < 1.9 -require 'kconv' -require 'skkdictools' + +Encoding.default_external = "euc-jis-2004" +require_relative 'skkdictools' require 'cgi' require 'socket' @@ -70,7 +70,7 @@ begin opt.parse!(ARGV) #rulesets = default_rulesets if rulesets.empty? -rescue OptionParser::InvalidOption => e +rescue OptionParser::InvalidOption print "'#{$0} -h' for help.\n" exit 1 end diff --git a/saihenkan.rb b/saihenkan.rb index 9544f24..ae926b3 100755 --- a/saihenkan.rb +++ b/saihenkan.rb @@ -1,4 +1,6 @@ -#!/usr/bin/ruby -Ke +#!/usr/bin/env ruby +# -*- coding: utf-8 -*- + # saihenkan.rb -- convert SKK normail dictionary to saihenkan dictionary. # # Copyright (C) 2003 NAKAJIMA Mikio @@ -28,28 +30,29 @@ # Commentary: # saihenkan.rb SKK-JISYO.S | nkf -e > SKK-JISYO.saihenkan # + +Encoding.default_external = "euc-jis-2004" file = ARGV.shift -if !file -else - print ';;This file was generated by saihenkan.rb at ', - Time.new(), "\n" +if file + print ";;This file was generated by saihenkan.rb at #{Time.new()}\n" print ";; okuri-ari entries.\n;; okuri-nasi entries.\n" + File.open(file).each do |line| if (line =~ /^([^ a-z].*[^a-z]) \/(.+)\/$/) candidates = $2 candidates = candidates.split('/') max_index = candidates.length - 1 if (max_index > 0) - copy = candidates - candidates.each do |can| - print can, ' /' - copy.each do |c| - if c != can - print c, '/' - end - end - print "\n" - end + copy = candidates + candidates.each do |can| + print can, ' /' + copy.each do |c| + if c != can + print c, '/' + end + end + print "\n" + end end end end