Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Decoder words probability #23

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .ruby-gemset
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
pocketsphinx-ruby
1 change: 1 addition & 0 deletions .ruby-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ruby-2.1.1
3 changes: 2 additions & 1 deletion Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ begin
require 'rspec/core/rake_task'
RSpec::Core::RakeTask.new(:spec)
task :default => [:spec]
rescue LoadError
rescue LoadError => ex
puts ex
end
21 changes: 16 additions & 5 deletions lib/pocketsphinx/decoder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def initialize(string, path_score, posterior_prob = nil)
end
end

Word = Struct.new(:word, :start_frame, :end_frame)
Word = Struct.new(:word, :start_frame, :end_frame, :acoustic_score, :language_score, :backoff_mode, :posterior_prob)

attr_writer :ps_api
attr_accessor :configuration
Expand Down Expand Up @@ -132,19 +132,30 @@ def hypothesis
#
# @return [Array] Array of words with start/end frame values (10msec/frame)
def words
mp_path_score = FFI::MemoryPointer.new(:int32, 1)
start_frame = FFI::MemoryPointer.new(:int32, 1)
end_frame = FFI::MemoryPointer.new(:int32, 1)
mp_path_score = FFI::MemoryPointer.new(:int32, 1)
start_frame = FFI::MemoryPointer.new(:int32, 1)
end_frame = FFI::MemoryPointer.new(:int32, 1)

acoustic_score = FFI::MemoryPointer.new(:int32, 1)
language_score = FFI::MemoryPointer.new(:int32, 1)
backoff_mode = FFI::MemoryPointer.new(:int32, 1)

seg_iter = ps_api.ps_seg_iter(ps_decoder, mp_path_score)
words = []

until seg_iter.null? do
ps_api.ps_seg_frames(seg_iter, start_frame, end_frame)

posterior_prob = ps_api.ps_seg_prob(seg_iter, acoustic_score, language_score, backoff_mode)

words << Pocketsphinx::Decoder::Word.new(
ps_api.ps_seg_word(seg_iter),
start_frame.get_int32(0),
end_frame.get_int32(0)
end_frame.get_int32(0),
log_prob_to_linear(acoustic_score.get_int32(0)),
log_prob_to_linear(language_score.get_int32(0)),
backoff_mode.get_int32(0),
log_prob_to_linear(posterior_prob)
)
seg_iter = ps_api.ps_seg_next(seg_iter)
end
Expand Down
1 change: 1 addition & 0 deletions pocketsphinx-ruby.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ Gem::Specification.new do |spec|
spec.add_development_dependency "rake"
spec.add_development_dependency "rspec", "~> 3.1.0"
spec.add_development_dependency "coveralls"
spec.add_development_dependency "byebug"
end
4 changes: 2 additions & 2 deletions spec/configuration_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@

describe '#setting_names' do
it 'contains the names of all possible system settings' do
expect(subject.setting_names.count).to eq(114)
expect(subject.setting_names.count).to eq(112)
end
end

Expand All @@ -84,7 +84,7 @@
it 'gives details for all settings when no name is specified' do
details = subject.details

expect(details.count).to eq(114)
expect(details.count).to eq(112)
expect(details.first).to eq({
name: "agc",
type: :string,
Expand Down
34 changes: 29 additions & 5 deletions spec/decoder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@
end
end

describe '#words' do
context '#words' do
let(:iterator) { FFI::MemoryPointer.from_string("") }

it 'calls libpocketsphinx' do
Expand All @@ -177,21 +177,45 @@
end_frame.put_int16(0, 20)
end

expect(ps_api).to receive(:ps_seg_word).ordered.and_return("one")
expect(ps_api).to receive(:ps_seg_prob).ordered do |seg_iter, acoustic_score, language_score, backoff_mode|
acoustic_score.put_int32(0, 1)
language_score.put_int32(0, 2)
backoff_mode.put_int32(0, 3)
end.and_return(0.5)
expect(ps_api).to receive(:ps_get_logmath).with(subject.ps_decoder).ordered.and_return(:logmath)
expect(ps_api).to receive(:logmath_exp).with(:logmath, 1).ordered.and_return(0.1)
expect(ps_api).to receive(:ps_get_logmath).with(subject.ps_decoder).ordered.and_return(:logmath)
expect(ps_api).to receive(:logmath_exp).with(:logmath, 2).ordered.and_return(0.2)
expect(ps_api).to receive(:ps_get_logmath).with(subject.ps_decoder).ordered.and_return(:logmath)
expect(ps_api).to receive(:logmath_exp).with(:logmath, 0.5).ordered.and_return(0.51)

expect(ps_api).to receive(:ps_seg_word).and_return("one")
expect(ps_api).to receive(:ps_seg_next).ordered.and_return(iterator)

expect(ps_api).to receive(:ps_seg_frames).ordered do |seg_iter, start_frame, end_frame|
start_frame.put_int16(0, 30)
end_frame.put_int16(0, 40)
end

expect(ps_api).to receive(:ps_seg_word).ordered.and_return("two")
expect(ps_api).to receive(:ps_seg_prob).ordered do |seg_iter, acoustic_score, language_score, backoff_mode|
acoustic_score.put_int32(0, 4)
language_score.put_int32(0, 5)
backoff_mode.put_int32(0, 6)
end.and_return(0.6)
expect(ps_api).to receive(:ps_get_logmath).with(subject.ps_decoder).ordered.and_return(:logmath)
expect(ps_api).to receive(:logmath_exp).with(:logmath, 4).ordered.and_return(0.4)
expect(ps_api).to receive(:ps_get_logmath).with(subject.ps_decoder).ordered.and_return(:logmath)
expect(ps_api).to receive(:logmath_exp).with(:logmath, 5).ordered.and_return(0.5)
expect(ps_api).to receive(:ps_get_logmath).with(subject.ps_decoder).ordered.and_return(:logmath)
expect(ps_api).to receive(:logmath_exp).with(:logmath, 0.6).ordered.and_return(0.62)

expect(ps_api).to receive(:ps_seg_word).and_return("two")
expect(ps_api).to receive(:ps_seg_next).ordered.and_return(FFI::Pointer::NULL)

words = subject.words

expect(words[0]).to eq(Pocketsphinx::Decoder::Word.new("one", 10, 20))
expect(words[1]).to eq(Pocketsphinx::Decoder::Word.new("two", 30, 40))
expect(words[0]).to eq(Pocketsphinx::Decoder::Word.new("one", 10, 20, 0.1, 0.2, 3, 0.51))
expect(words[1]).to eq(Pocketsphinx::Decoder::Word.new("two", 30, 40, 0.4, 0.5, 6, 0.62))
end
end

Expand Down
18 changes: 14 additions & 4 deletions spec/integration/decoder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
subject.decode File.open('spec/assets/audio/goforward.raw', 'rb')
expect(subject.hypothesis).to eq("go forward ten meters")

expect(subject.hypothesis.path_score).to eq(0.4651996053749572)
expect(subject.hypothesis.posterior_prob).to eq( 0.0018953977306176936)
expect(subject.hypothesis.path_score).to be_within(0.01).of(0.4651996053749572)
expect(subject.hypothesis.posterior_prob).to be_within(0.01).of(0.0018953977306176936)
end

# FIXME: This test illustrates a current issue discussed in:
Expand All @@ -44,8 +44,18 @@
subject.decode File.open('spec/assets/audio/goforward.raw', 'rb')

expect(subject.words.map(&:word)).to eq(["<s>", "go", "forward", "ten", "meters", "</s>"])
expect(subject.words.map(&:start_frame)).to eq([0, 46, 64, 117, 153, 212])
expect(subject.words.map(&:end_frame)).to eq([45, 63, 116, 152, 211, 260])
expect(subject.words.map(&:start_frame)).to eq([2, 48, 66, 119, 155, 214])
expect(subject.words.map(&:end_frame)).to eq([47, 65, 118, 154, 213, 262])

expected_pps = [1.0, 0.9, 0.9, 0.1, 0.29, 1.0]
subject.words.map(&:posterior_prob).each_with_index do |pp, index|
expect(pp).to be_within(0.1).of(expected_pps[index])
end

expected_ls = [1.0, 0.95, 0.95, 0.94, 0.95, 0.98]
subject.words.map(&:language_score).each_with_index do |ls, index|
expect(ls).to be_within(0.1).of(expected_ls[index])
end
end
end
end