From 0b897d46955118c168189e9b791fa9450db1c9ee Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Fri, 16 Sep 2016 17:55:41 +1000 Subject: [PATCH 1/3] = --- src/sensecounts.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 src/sensecounts.jl diff --git a/src/sensecounts.jl b/src/sensecounts.jl new file mode 100644 index 0000000..fb1ee2f --- /dev/null +++ b/src/sensecounts.jl @@ -0,0 +1,12 @@ +export sensecount, sensecounts + +function sensecount(db::DB, ss::Synset, lem::Lemma) + get(db.counts, sensekey(db, ss, lem), 0) + # zero is default for senses that are not found in CNTLIST + # note: this will still error for senses that doen't have a sense key + # that is a good thing. +end + +function sensecounts(db::DB, lem::Lemma) + Dict([ss=>sensecount(db, ss, lem) for ss in synsets(db, lem)]) +end From 0bd8c6162db6730855c2927fc240ed7f537dbf48 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Fri, 16 Sep 2016 17:57:43 +1000 Subject: [PATCH 2/3] =add sense counts --- src/WordNet.jl | 1 + src/db.jl | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/WordNet.jl b/src/WordNet.jl index 7bb0723..6a5c58d 100644 --- a/src/WordNet.jl +++ b/src/WordNet.jl @@ -9,5 +9,6 @@ include("synset.jl") include("db.jl") include("operations.jl") include("sensekeys.jl") +include("sensecounts.jl") end diff --git a/src/db.jl b/src/db.jl index 0af4c45..e3c2c19 100644 --- a/src/db.jl +++ b/src/db.jl @@ -4,13 +4,15 @@ immutable DB lemmas::Dict{Char, Dict{AbstractString, Lemma}} synsets::Dict{Char, Dict{Int, Synset}} sensekeys::Dict{Tuple{Int, AbstractString}, AbstractString} + counts::Dict{AbstractString, Int} end function DB(base_dir::AbstractString) DB( load_lemmas(base_dir), load_synsets(base_dir), - load_sensekeys(base_dir) + load_sensekeys(base_dir), + load_counts(base_dir) ) end @@ -80,6 +82,19 @@ function load_sensekeys(basedir) end +function load_counts(basedir) + path=joinpath(basedir, "dict", "cntlist") + counts =Dict{AbstractString, Int}() + + for line in eachline(path) + tag_cnt, sense_key, sense_number = split(line) + counts[sense_key] = parse(Int, tag_cnt) + end + + counts +end + + function path_to_data_file(base_dir, pos) joinpath(base_dir, "dict", "data.$(SYNSET_TYPES[pos])") end From 31ebf003e201314b86fd44429b9333e3b6d6c4c2 Mon Sep 17 00:00:00 2001 From: Lyndon White Date: Mon, 24 Oct 2016 23:28:30 +1100 Subject: [PATCH 3/3] =Added missing field to mockdb (This should still fail tests as I have not committed an example cnt file) --- test/test_db.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_db.jl b/test/test_db.jl index d03d8f9..f35ff83 100644 --- a/test/test_db.jl +++ b/test/test_db.jl @@ -2,8 +2,9 @@ facts("DB") do const mock_db = DB( Dict{Char, Dict{AbstractString, Lemma}}(), Dict{Char, Dict{Int, Synset}}(), - Dict{Tuple{Int,AbstractString}, AbstractString}() - ) + Dict{Tuple{Int,AbstractString}, AbstractString}(), + Dict{AbstractString, Int}() + ) context("path_to_data_file") do expected = joinpath("MockDB", "dict", "data.verb")