Skip to content

Commit

Permalink
fix bug and change version
Browse files Browse the repository at this point in the history
  • Loading branch information
fxsjy committed Apr 27, 2013
1 parent c8df565 commit 59d5d3b
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 105 deletions.
18 changes: 13 additions & 5 deletions jieba/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,16 @@ def initialize(dictionary=DICTIONARY):
trie = None
_curpath=os.path.normpath( os.path.join( os.getcwd(), os.path.dirname(__file__) ) )

print >> sys.stderr, "Building Trie..., from " + dictionary
abs_path = os.path.join(_curpath,dictionary)
print >> sys.stderr, "Building Trie..., from " + abs_path
t1 = time.time()
cache_file = os.path.join(tempfile.gettempdir(),"jieba.cache")
if abs_path == os.path.join(_curpath,"dict.txt"): #defautl dictionary
cache_file = os.path.join(tempfile.gettempdir(),"jieba.cache")
else: #customer dictionary
cache_file = os.path.join(tempfile.gettempdir(),"jieba.user."+str(hash(abs_path))+".cache")

load_from_cache_fail = True
if os.path.exists(cache_file) and os.path.getmtime(cache_file)>os.path.getmtime(os.path.join(_curpath,dictionary)):
if os.path.exists(cache_file) and os.path.getmtime(cache_file)>os.path.getmtime(abs_path):
print >> sys.stderr, "loading model from cache " + cache_file
try:
trie,FREQ,total,min_freq = marshal.load(open(cache_file,'rb'))
Expand All @@ -67,7 +72,7 @@ def initialize(dictionary=DICTIONARY):
load_from_cache_fail = True

if load_from_cache_fail:
trie,FREQ,total = gen_trie(os.path.join(_curpath, dictionary))
trie,FREQ,total = gen_trie(abs_path)
FREQ = dict([(k,log(float(v)/total)) for k,v in FREQ.iteritems()]) #normalize
min_freq = min(FREQ.itervalues())
print >> sys.stderr, "dumping model to file cache " + cache_file
Expand Down Expand Up @@ -296,5 +301,8 @@ def disable_parallel():
def set_dictionary(dictionary_path):
global initialized, DICTIONARY
with DICT_LOCK:
DICTIONARY = dictionary_path
abs_path = os.path.normpath( os.path.join( os.getcwd(), dictionary_path ) )
if not os.path.exists(abs_path):
raise Exception("path does not exists:" + abs_path)
DICTIONARY = abs_path
initialized = False
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from distutils.core import setup
setup(name='jieba',
version='0.27',
version='0.28',
description='Chinese Words Segementation Utilities',
author='Sun, Junyi',
author_email='[email protected]',
Expand Down
1 change: 1 addition & 0 deletions test/foobar.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
好人 12 n
27 changes: 27 additions & 0 deletions test/test_change_dictpath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#encoding=utf-8
import sys
sys.path.append("../")
import jieba

def cuttest(test_sent):
result = jieba.cut(test_sent)
print " ".join(result)

def testcase():
cuttest("这是一个伸手不见五指的黑夜。我叫孙悟空,我爱北京,我爱Python和C++。")
cuttest("我不喜欢日本和服。")
cuttest("雷猴回归人间。")
cuttest("工信处女干事每月经过下属科室都要亲口交代24口交换机等技术性器件的安装工作")
cuttest("我需要廉租房")
cuttest("永和服装饰品有限公司")
cuttest("我爱北京天安门")
cuttest("abc")
cuttest("隐马尔可夫")
cuttest("雷猴是个好网站")

if __name__ == "__main__":
testcase()
jieba.set_dictionary("foobar.txt")
print "================================"
testcase()

99 changes: 0 additions & 99 deletions test/test_pos2.py

This file was deleted.

0 comments on commit 59d5d3b

Please sign in to comment.