diff --git a/.ruby-gemset b/.ruby-gemset new file mode 100644 index 0000000..5deeff3 --- /dev/null +++ b/.ruby-gemset @@ -0,0 +1 @@ +pocketsphinx-ruby diff --git a/.ruby-version b/.ruby-version new file mode 100644 index 0000000..314a6ed --- /dev/null +++ b/.ruby-version @@ -0,0 +1 @@ +ruby-2.1.1 diff --git a/Rakefile b/Rakefile index 9aeea46..cdab22a 100644 --- a/Rakefile +++ b/Rakefile @@ -5,5 +5,6 @@ begin require 'rspec/core/rake_task' RSpec::Core::RakeTask.new(:spec) task :default => [:spec] -rescue LoadError +rescue LoadError => ex + puts ex end diff --git a/lib/pocketsphinx/decoder.rb b/lib/pocketsphinx/decoder.rb index 54df8ad..c0a106d 100644 --- a/lib/pocketsphinx/decoder.rb +++ b/lib/pocketsphinx/decoder.rb @@ -16,7 +16,7 @@ def initialize(string, path_score, posterior_prob = nil) end end - Word = Struct.new(:word, :start_frame, :end_frame) + Word = Struct.new(:word, :start_frame, :end_frame, :acoustic_score, :language_score, :backoff_mode, :posterior_prob) attr_writer :ps_api attr_accessor :configuration @@ -132,19 +132,30 @@ def hypothesis # # @return [Array] Array of words with start/end frame values (10msec/frame) def words - mp_path_score = FFI::MemoryPointer.new(:int32, 1) - start_frame = FFI::MemoryPointer.new(:int32, 1) - end_frame = FFI::MemoryPointer.new(:int32, 1) + mp_path_score = FFI::MemoryPointer.new(:int32, 1) + start_frame = FFI::MemoryPointer.new(:int32, 1) + end_frame = FFI::MemoryPointer.new(:int32, 1) + + acoustic_score = FFI::MemoryPointer.new(:int32, 1) + language_score = FFI::MemoryPointer.new(:int32, 1) + backoff_mode = FFI::MemoryPointer.new(:int32, 1) seg_iter = ps_api.ps_seg_iter(ps_decoder, mp_path_score) words = [] until seg_iter.null? do ps_api.ps_seg_frames(seg_iter, start_frame, end_frame) + + posterior_prob = ps_api.ps_seg_prob(seg_iter, acoustic_score, language_score, backoff_mode) + words << Pocketsphinx::Decoder::Word.new( ps_api.ps_seg_word(seg_iter), start_frame.get_int32(0), - end_frame.get_int32(0) + end_frame.get_int32(0), + log_prob_to_linear(acoustic_score.get_int32(0)), + log_prob_to_linear(language_score.get_int32(0)), + backoff_mode.get_int32(0), + log_prob_to_linear(posterior_prob) ) seg_iter = ps_api.ps_seg_next(seg_iter) end diff --git a/pocketsphinx-ruby.gemspec b/pocketsphinx-ruby.gemspec index 9c4cd71..255b48f 100644 --- a/pocketsphinx-ruby.gemspec +++ b/pocketsphinx-ruby.gemspec @@ -24,4 +24,5 @@ Gem::Specification.new do |spec| spec.add_development_dependency "rake" spec.add_development_dependency "rspec", "~> 3.1.0" spec.add_development_dependency "coveralls" + spec.add_development_dependency "byebug" end diff --git a/spec/configuration_spec.rb b/spec/configuration_spec.rb index 9185a4e..4e45c18 100644 --- a/spec/configuration_spec.rb +++ b/spec/configuration_spec.rb @@ -65,7 +65,7 @@ describe '#setting_names' do it 'contains the names of all possible system settings' do - expect(subject.setting_names.count).to eq(114) + expect(subject.setting_names.count).to eq(112) end end @@ -84,7 +84,7 @@ it 'gives details for all settings when no name is specified' do details = subject.details - expect(details.count).to eq(114) + expect(details.count).to eq(112) expect(details.first).to eq({ name: "agc", type: :string, diff --git a/spec/decoder_spec.rb b/spec/decoder_spec.rb index 90bcabe..8b1df76 100644 --- a/spec/decoder_spec.rb +++ b/spec/decoder_spec.rb @@ -166,7 +166,7 @@ end end - describe '#words' do + context '#words' do let(:iterator) { FFI::MemoryPointer.from_string("") } it 'calls libpocketsphinx' do @@ -177,7 +177,19 @@ end_frame.put_int16(0, 20) end - expect(ps_api).to receive(:ps_seg_word).ordered.and_return("one") + expect(ps_api).to receive(:ps_seg_prob).ordered do |seg_iter, acoustic_score, language_score, backoff_mode| + acoustic_score.put_int32(0, 1) + language_score.put_int32(0, 2) + backoff_mode.put_int32(0, 3) + end.and_return(0.5) + expect(ps_api).to receive(:ps_get_logmath).with(subject.ps_decoder).ordered.and_return(:logmath) + expect(ps_api).to receive(:logmath_exp).with(:logmath, 1).ordered.and_return(0.1) + expect(ps_api).to receive(:ps_get_logmath).with(subject.ps_decoder).ordered.and_return(:logmath) + expect(ps_api).to receive(:logmath_exp).with(:logmath, 2).ordered.and_return(0.2) + expect(ps_api).to receive(:ps_get_logmath).with(subject.ps_decoder).ordered.and_return(:logmath) + expect(ps_api).to receive(:logmath_exp).with(:logmath, 0.5).ordered.and_return(0.51) + + expect(ps_api).to receive(:ps_seg_word).and_return("one") expect(ps_api).to receive(:ps_seg_next).ordered.and_return(iterator) expect(ps_api).to receive(:ps_seg_frames).ordered do |seg_iter, start_frame, end_frame| @@ -185,13 +197,25 @@ end_frame.put_int16(0, 40) end - expect(ps_api).to receive(:ps_seg_word).ordered.and_return("two") + expect(ps_api).to receive(:ps_seg_prob).ordered do |seg_iter, acoustic_score, language_score, backoff_mode| + acoustic_score.put_int32(0, 4) + language_score.put_int32(0, 5) + backoff_mode.put_int32(0, 6) + end.and_return(0.6) + expect(ps_api).to receive(:ps_get_logmath).with(subject.ps_decoder).ordered.and_return(:logmath) + expect(ps_api).to receive(:logmath_exp).with(:logmath, 4).ordered.and_return(0.4) + expect(ps_api).to receive(:ps_get_logmath).with(subject.ps_decoder).ordered.and_return(:logmath) + expect(ps_api).to receive(:logmath_exp).with(:logmath, 5).ordered.and_return(0.5) + expect(ps_api).to receive(:ps_get_logmath).with(subject.ps_decoder).ordered.and_return(:logmath) + expect(ps_api).to receive(:logmath_exp).with(:logmath, 0.6).ordered.and_return(0.62) + + expect(ps_api).to receive(:ps_seg_word).and_return("two") expect(ps_api).to receive(:ps_seg_next).ordered.and_return(FFI::Pointer::NULL) words = subject.words - expect(words[0]).to eq(Pocketsphinx::Decoder::Word.new("one", 10, 20)) - expect(words[1]).to eq(Pocketsphinx::Decoder::Word.new("two", 30, 40)) + expect(words[0]).to eq(Pocketsphinx::Decoder::Word.new("one", 10, 20, 0.1, 0.2, 3, 0.51)) + expect(words[1]).to eq(Pocketsphinx::Decoder::Word.new("two", 30, 40, 0.4, 0.5, 6, 0.62)) end end diff --git a/spec/integration/decoder_spec.rb b/spec/integration/decoder_spec.rb index 4ab098e..a0b2c06 100644 --- a/spec/integration/decoder_spec.rb +++ b/spec/integration/decoder_spec.rb @@ -20,8 +20,8 @@ subject.decode File.open('spec/assets/audio/goforward.raw', 'rb') expect(subject.hypothesis).to eq("go forward ten meters") - expect(subject.hypothesis.path_score).to eq(0.4651996053749572) - expect(subject.hypothesis.posterior_prob).to eq( 0.0018953977306176936) + expect(subject.hypothesis.path_score).to be_within(0.01).of(0.4651996053749572) + expect(subject.hypothesis.posterior_prob).to be_within(0.01).of(0.0018953977306176936) end # FIXME: This test illustrates a current issue discussed in: @@ -44,8 +44,18 @@ subject.decode File.open('spec/assets/audio/goforward.raw', 'rb') expect(subject.words.map(&:word)).to eq(["", "go", "forward", "ten", "meters", ""]) - expect(subject.words.map(&:start_frame)).to eq([0, 46, 64, 117, 153, 212]) - expect(subject.words.map(&:end_frame)).to eq([45, 63, 116, 152, 211, 260]) + expect(subject.words.map(&:start_frame)).to eq([2, 48, 66, 119, 155, 214]) + expect(subject.words.map(&:end_frame)).to eq([47, 65, 118, 154, 213, 262]) + + expected_pps = [1.0, 0.9, 0.9, 0.1, 0.29, 1.0] + subject.words.map(&:posterior_prob).each_with_index do |pp, index| + expect(pp).to be_within(0.1).of(expected_pps[index]) + end + + expected_ls = [1.0, 0.95, 0.95, 0.94, 0.95, 0.98] + subject.words.map(&:language_score).each_with_index do |ls, index| + expect(ls).to be_within(0.1).of(expected_ls[index]) + end end end end