Merge pull request #7 from kyohsuke/bump_up_ruby_2_2_x

convert ruby 2.2.x
skk-dev · May 8, 2016 · 5d238e8 · 5d238e8
2 parents 57f1bbd + c99833a
commit 5d238e8
Show file tree

Hide file tree

Showing 10 changed files with 239 additions and 225 deletions.
diff --git a/convert2skk/README.md b/convert2skk/README.md
@@ -322,4 +322,5 @@ notes 辞書に転用できますし、やり方によっては SKK 以外にも
 	こうじゅつろうどく /口述朗読;‖<autogen>,名詞-サ変接続/
 
 ## 著者
-三田祐介 <[email protected]>
+
+三田祐介 < clefs<span></span>@mail.goo.ne.jp >
diff --git a/convert2skk/aozora2skk.rb b/convert2skk/aozora2skk.rb
@@ -1,5 +1,5 @@
-#!/usr/local/bin/ruby -Ke
-# -*- coding: euc-jp -*-
+#!/usr/bin/env ruby
+# -*- coding: utf-8 -*-
 ## Copyright (C) 2005 MITA Yuusuke <[email protected]>
 ##
 ## Author: MITA Yuusuke <[email protected]>
@@ -32,11 +32,11 @@
 ##
 ## % aozora2skk.rb file-from-aozora-bunko.html > result.txt
 ##
-# ○
-require 'jcode' if RUBY_VERSION.to_f < 1.9
-#require 'kconv'
-#require 'skkdictools'
+# ○
+
+Encoding.default_external = "euc-jis-2004"
 require 'optparse'
+
 opt = OptionParser.new
 
 results = []
@@ -45,18 +45,20 @@
 opt.on('-a', 'append annotation <autogen - aozora>') { note = true }
 begin
   opt.parse!(ARGV)
-rescue OptionParser::InvalidOption => e
+rescue OptionParser::InvalidOption
   print "'#{$0} -h' for help.\n"
   exit 1
 end
 
 
+
 while gets
+  $_.encode!("utf-8")
   $_.gsub!(/<[^>]*>/, '')
-  results = results + $_.scan(/([亜-熙]{2,})[ 　]*[\[(（［〔【]([ぁ-ん]*)[\])）〕］】]/)
+  results = results + $_.scan(/([亜-熙]{2,})[ 　]*[\[(（［〔【]([ぁ-ん]*)[\])）〕］】]/)
 end
 
 results.uniq!
 results.each {|word,yomi|
-  print "#{yomi} /#{word}#{note ? ';∥<autogen - aozora>' : ''}/\n"
+  print "#{yomi} /#{word}#{note ? ';‖<autogen - aozora>' : ''}/\n"
 }
diff --git a/convert2skk/canna2skk.rb b/convert2skk/canna2skk.rb
@@ -1,5 +1,5 @@
 #!/usr/bin/env ruby
-# -*- coding: euc-jp -*-
+# -*- coding: utf-8 -*-
 # canna2skk.rb -- convert Canna dictionary to SKK-JISYO format.
 #
 # Copyright (C) 2003 NAKAJIMA Mikio <[email protected]>
@@ -33,8 +33,9 @@
 # $ canna2skk.rb gcanna.t gcannaf.t > tmp.jisyo
 # $ skkdic-expr2 tmp.jisyo > SKK-JISYO.canna
 #
-# ¤«¤ó #JS*8 ´¬ #CNSUC2*2 ´Ö #JS ´Ì ´Ó #JSSUC ´Ö
+# かん #JS*8 巻 #CNSUC2*2 間 #JS 缶 貫 #JSSUC 間
 
+Encoding.default_external = "euc-jis-2004"
 file = ARGV.shift
 open(file).each{|line|
   if !(line =~ /([^ ]+) (.+) *$/)
@@ -44,9 +45,9 @@
     words = $2
     words.split(' ').each{|word|
       if (word =~ /[#*a-zA-Z0-9]+/ || key == word)
-	next
+          next
       else
-	print key, " /", word, "/\n"
+        print key, " /", word, "/\n"
       end
     }
   end

diff --git a/convert2skk/chasen2skk.rb b/convert2skk/chasen2skk.rb
@@ -1,5 +1,6 @@
-#!/usr/local/bin/ruby -Ke
-# -*- coding: euc-jp -*-
+#!/usr/bin/env ruby
+# -*- coding: utf-8 -*-
+
 ## Copyright (C) 2005 MITA Yuusuke <[email protected]>
 ##
 ## Author: MITA Yuusuke <[email protected]>
@@ -36,19 +37,15 @@
 ##
 ## skkdictools.rb required.
 ##
-## TODO: pick up compound-verbs, eg. 「舞い散る」
-## 舞い    マイ    舞う    動詞-自立       五段・ワ行促音便        連用形
-## 散る    チル    散る    動詞-自立       五段・ラ行      基本形
+## TODO: pick up compound-verbs, eg. 「舞い散る」
+## 舞い    マイ    舞う    動詞-自立       五段・ワ行促音便        連用形
+## 散る    チル    散る    動詞-自立       五段・ラ行      基本形
 ##
-require 'jcode' if RUBY_VERSION.to_f < 1.9
-require 'kconv'
-require 'skkdictools'
-
-#require 'cgi'
-#require 'socket'
-#require 'timeout'
 
+Encoding.default_external = "euc-jis-2004"
+require_relative 'skkdictools'
 require 'optparse'
+
 opt = OptionParser.new
 
 katakana_words = false
@@ -77,62 +74,63 @@
 
 begin
   opt.parse!(ARGV)
-rescue OptionParser::InvalidOption => e
+rescue OptionParser::InvalidOption
   print "'#{$0} -h' for help.\n"
   exit 1
 end
 
-#keyword_pat = Regexp.compile("[亜-熙]*#{keyword}[亜-熙]*")
+#keyword_pat = Regexp.compile("[亜-熙]*#{keyword}[亜-熙]*")
 
 count = 0
 #key = word = last_key = last_word = last_part = ""
 key = word = last_part = ""
 poisoned = terminate = false
 
 while gets
-  midasi, yomi, root, part, conj = $_.split("	", 5)
-  #if midasi !~ /^[亜-熙ァ-ンヴー]+$/ || terminate
-  if (midasi !~ /^[亜-熙ァ-ンヴー々]+$/ &&
-      (!allow_noun_chains || part !~ /名詞/ || part =~ /非自立/ ||
-      midasi !~ /^[亜-熙ァ-ンヴー々ぁ-ん]+$/ )) || terminate
-  #if (midasi !~ /^[亜-熙ァ-ンヴー]+$/ && conj !~ /連用形/) || terminate
+  $_.encode!("utf-8")
+  midasi, yomi, _root, part, _conj = $_.split("	", 5)
+  #if midasi !~ /^[亜-熙ァ-ンヴー]+$/ || terminate
+  if (midasi !~ /^[亜-熙ァ-ンヴー々]+$/ &&
+      (!allow_noun_chains || part !~ /名詞/ || part =~ /非自立/ ||
+       midasi !~ /^[亜-熙ァ-ンヴー々ぁ-ん]+$/ )) || terminate
+    #if (midasi !~ /^[亜-熙ァ-ンヴー]+$/ && conj !~ /連用形/) || terminate
     #next if count < 1
     if count < 1
       next if !handle_prefix
-      if part =~ /接頭詞/
-	# kludge - keep prefix w/o increasing count (cf.「ご立派」「お味噌」)
-	key = yomi.to_hiragana
-	word = midasi
-	last_part = part
-      #elsif part =~ /自立/ && conj =~ /連用形/
-      #  hogehoge
+      if part =~ /接頭詞/
+        # kludge - keep prefix w/o increasing count (cf.「ご立派」「お味噌」)
+        key = yomi.to_hiragana
+        word = midasi
+        last_part = part
+        #elsif part =~ /自立/ && conj =~ /連用形/
+        #  hogehoge
       else
-	key = word = last_part = ""
+        key = word = last_part = ""
       end
       next
     end
 
-    if midasi =~ /^[^亜-熙ァ-ンヴー々]+$/ && !terminate
+    if midasi =~ /^[^亜-熙ァ-ンヴー々]+$/ && !terminate
       # nothing
     else
-      if part =~ /接続詞|接頭詞|副詞[^可]/
-	# nothing - decline some parts
-      elsif midasi =~ /並び|及び/
-	# nothing - (HACK) decline conjonctions that ChaSen overlooks
-      elsif midasi =~ /^[ぁ-ん]+[亜-熙ァ-ンヴー々]+/
-	# nothing - this applies to quasi-words such as:
-	# に関する        ニカンスル      に関する        助詞-格助詞-連語
+      if part =~ /接続詞|接頭詞|副詞[^可]/
+        # nothing - decline some parts
+      elsif midasi =~ /並び|及び/
+        # nothing - (HACK) decline conjonctions that ChaSen overlooks
+      elsif midasi =~ /^[ぁ-ん]+[亜-熙ァ-ンヴー々]+/
+        # nothing - this applies to quasi-words such as:
+        # に関する        ニカンスル      に関する        助詞-格助詞-連語
       else
-	key += yomi.to_hiragana
-	word += midasi
-	last_part = part
-	# asayaKify here?
+        key += yomi.to_hiragana
+        word += midasi
+        last_part = part
+        # asayaKify here?
       end
     end
 
-    if word =~ /^[ぁ-んー]+$/
+    if word =~ /^[ぁ-んー]+$/
       # nothing
-    elsif !katakana_words && word =~ /^[ァ-ンヴー]+$/
+    elsif !katakana_words && word =~ /^[ァ-ンヴー]+$/
       # nothing
     elsif !keyword.empty? && !word.include?(keyword)
       # nothing
@@ -147,19 +145,19 @@
     count = 0
 
   else
-    if count > 0 && part =~ /接続詞|接頭詞|副詞[^可]/
+    if count > 0 && part =~ /接続詞|接頭詞|副詞[^可]/
       terminate = true
       redo
-    elsif count == 0 && part =~ /接尾/
-      # avoid generating 「回大会」 from 「第３回大会」
-      # 回      カイ    回      名詞-接尾-助数詞
+    elsif count == 0 && part =~ /接尾/
+      # avoid generating 「回大会」 from 「第３回大会」
+      # 回      カイ    回      名詞-接尾-助数詞
       key = word = last_part = ""
       next
     end
     count += 1
     key += yomi.to_hiragana
     word += midasi
     last_part = part
-    poisoned = true if part =~ /未知語/
+    poisoned = true if part =~ /未知語/
   end
 end
diff --git a/convert2skk/ctdicconv.rb b/convert2skk/ctdicconv.rb
@@ -1,6 +1,5 @@
-#!/usr/bin/ruby -Ke
-# -*- coding: euc-jp -*-
-require 'jcode' if RUBY_VERSION.to_f < 1.9
+#!/usr/bin/env ruby
+# -*- coding: utf-8 -*-
 
 # ctdicconv.rb -- convert china_taiwan.csv to SKK-JISYO dictionary format.
 #
@@ -30,10 +29,11 @@
 #
 # Commentary:
 
+Encoding.default_external = "euc-jis-2004"
 $ANNOTATION = true
 ##$ANNOTATION = false
 
-# from 「オブジェクト指向スクリプト言語ruby」p121
+# from 「オブジェクト指向スクリプト言語ruby」p121
 def csv_split(source, delimiter = ',')
   csv = []
   data = ""
@@ -45,8 +45,8 @@ def csv_split(source, delimiter = ',')
     end
     if /^"/ =~ data
       if /[^"]"$/ =~ data or '""' == data
-	csv << data.sub(/^"(.*)"$/, '\1').gsub(/""/, '"')
-	data = ''
+        csv << data.sub(/^"(.*)"$/, '\1').gsub(/""/, '"')
+        data = ''
       end
     else
       csv << d
@@ -60,32 +60,32 @@ def csv_split(source, delimiter = ',')
 file = ARGV.shift
 
 if not file
-  print "ファイルを指定して下さい\n"
+  print "ファイルを指定して下さい\n"
 else
   first = true
   File.foreach(file) do |line|
     if first
       first = false
       next
     end
-    #中国・台湾,種別,英語見出し,漢字,日本語読み,中国語読み（カタカナ）,英語標記2,漢字別名,漢字別名読み,省都,省都読み,annotation
-    c_t,d,e_key,kanji,j_key,c_key,english,kanji_alias,kanji_alias_key,capital,capital_key,annotation= csv_split(line.chomp)
+    #中国・台湾,種別,英語見出し,漢字,日本語読み,中国語読み（カタカナ）,英語標記2,漢字別名,漢字別名読み,省都,省都読み,annotation
+    _c_t, _d,e_key,kanji,j_key,c_key,_english,kanji_alias,kanji_alias_key,_capital,_capital_key,annotation= csv_split(line.chomp)
     if (e_key && !e_key.empty? && kanji && !kanji.empty?)
       e_key.strip!
       kanji.strip!
-      # 英語見出し /漢字/
+      # 英語見出し /漢字/
       if ($ANNOTATION && annotation && !annotation.empty?)
-	annotation.strip!
+        annotation.strip!
         print e_key, " /", kanji, ";", annotation, "/\n"
       else
         print e_key, " /", kanji, "/\n"
       end
 
-      # 日本語見出し /Capitalized 英語/
+      # 日本語見出し /Capitalized 英語/
       if (j_key && !j_key.empty?)
-	j_key.strip!
+        j_key.strip!
         if ($ANNOTATION && annotation && !annotation.empty?)
-	  annotation.strip!
+          annotation.strip!
           print j_key, " /", e_key.capitalize, ";", annotation, "/\n"
         else
           print j_key, " /", e_key.capitalize, "/\n"
@@ -94,35 +94,35 @@ def csv_split(source, delimiter = ',')
     end
 
     if (j_key && !j_key.empty? && kanji && !kanji.empty?)
-      # 日本語見出し /漢字/
+      # 日本語見出し /漢字/
       if ($ANNOTATION && annotation && !annotation.empty?)
-	annotation.strip!
-	print j_key, " /", kanji, ";", annotation, "/\n"
+        annotation.strip!
+        print j_key, " /", kanji, ";", annotation, "/\n"
       else
-	print j_key, " /", kanji, "/\n"
+        print j_key, " /", kanji, "/\n"
       end
     end
 
     if (c_key && !c_key.empty? && kanji && !kanji.empty?)
       c_key.strip!
-      c_key.tr!("ァ-ン", "ぁ-ん")
-      # 中国語見出し /漢字/
+      c_key.tr!("ァ-ン", "ぁ-ん")
+      # 中国語見出し /漢字/
       if ($ANNOTATION && annotation && !annotation.empty?)
-	print c_key, " /", kanji, ";", annotation, "/\n"
+        print c_key, " /", kanji, ";", annotation, "/\n"
       else
-	print c_key, " /", kanji, "/\n"
+        print c_key, " /", kanji, "/\n"
       end
     end
-    # 漢字別名見出し /漢字別名/
+    # 漢字別名見出し /漢字別名/
     if (kanji_alias && kanji_alias_key &&
-	!kanji_alias.empty? && !kanji_alias_key.empty?)
+        !kanji_alias.empty? && !kanji_alias_key.empty?)
       if ($ANNOTATION && annotation && !annotation.empty?)
-	print kanji_alias_key, " /", kanji_alias, ";", annotation, "/\n"
+        print kanji_alias_key, " /", kanji_alias, ";", annotation, "/\n"
       else
-	print kanji_alias_key, " /", kanji_alias, "/\n"
+        print kanji_alias_key, " /", kanji_alias, "/\n"
       end
     end
-    # 省都見出し /省都/
+    # 省都見出し /省都/
     #if (capital && capital_key &&
     #    !capital.empty? && !capital_key.empty?)
     #  print capital_key, " /", capital, "/\n"

diff --git a/convert2skk/dic-it2skk.rb b/convert2skk/dic-it2skk.rb
@@ -1,4 +1,6 @@
 #!/usr/bin/env ruby
+# -*- coding: utf-8 -*-
+
 # dic-it2skk.rb -- convert dic-it dictionary to SKK-JISYO format.
 #
 # Copyright (C) 2003 NAKAJIMA Mikio <[email protected]>
@@ -32,6 +34,7 @@
 # $ dic-it2skk.rb dic-it.txt > tmp.jisyo
 # $ skkdic-expr2 tmp.jisyo > SKK-JISYO.dic-it
 #
+Encoding.default_external = "euc-jis-2004"
 file = ARGV.shift
 open(file).each{|line|
   if !(line =~ /([^ \/]+)\/([^ ]+) *$/)