Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add Sensecounts #4

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/WordNet.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@ include("synset.jl")
include("db.jl")
include("operations.jl")
include("sensekeys.jl")
include("sensecounts.jl")

end
17 changes: 16 additions & 1 deletion src/db.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@ immutable DB
lemmas::Dict{Char, Dict{AbstractString, Lemma}}
synsets::Dict{Char, Dict{Int, Synset}}
sensekeys::Dict{Tuple{Int, AbstractString}, AbstractString}
counts::Dict{AbstractString, Int}
end

function DB(base_dir::AbstractString)
DB(
load_lemmas(base_dir),
load_synsets(base_dir),
load_sensekeys(base_dir)
load_sensekeys(base_dir),
load_counts(base_dir)
)
end

Expand Down Expand Up @@ -80,6 +82,19 @@ function load_sensekeys(basedir)
end


function load_counts(basedir)
path=joinpath(basedir, "dict", "cntlist")
counts =Dict{AbstractString, Int}()

for line in eachline(path)
tag_cnt, sense_key, sense_number = split(line)
counts[sense_key] = parse(Int, tag_cnt)
end

counts
end


function path_to_data_file(base_dir, pos)
joinpath(base_dir, "dict", "data.$(SYNSET_TYPES[pos])")
end
Expand Down
12 changes: 12 additions & 0 deletions src/sensecounts.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
export sensecount, sensecounts

function sensecount(db::DB, ss::Synset, lem::Lemma)
get(db.counts, sensekey(db, ss, lem), 0)
# zero is default for senses that are not found in CNTLIST
# note: this will still error for senses that doen't have a sense key
# that is a good thing.
end

function sensecounts(db::DB, lem::Lemma)
Dict([ss=>sensecount(db, ss, lem) for ss in synsets(db, lem)])
end
5 changes: 3 additions & 2 deletions test/test_db.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@ facts("DB") do
const mock_db = DB(
Dict{Char, Dict{AbstractString, Lemma}}(),
Dict{Char, Dict{Int, Synset}}(),
Dict{Tuple{Int,AbstractString}, AbstractString}()
)
Dict{Tuple{Int,AbstractString}, AbstractString}(),
Dict{AbstractString, Int}()
)

context("path_to_data_file") do
expected = joinpath("MockDB", "dict", "data.verb")
Expand Down