Skip to content

Commit

Permalink
Add experimental Pocketsphinx::CMNDecoder (relates to #10)
Browse files Browse the repository at this point in the history
  • Loading branch information
watsonbox committed Mar 19, 2015
1 parent ef2322d commit 0238ebb
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 0 deletions.
2 changes: 2 additions & 0 deletions lib/pocketsphinx.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
require "pocketsphinx/version"

# Pocketsphinx FFI API
require "pocketsphinx/api/sphinxbase/cmn"
require "pocketsphinx/api/sphinxbase"
require "pocketsphinx/api/sphinxad"
require "pocketsphinx/api/pocketsphinx"
Expand All @@ -22,6 +23,7 @@
require "pocketsphinx/audio_file"
require "pocketsphinx/microphone"
require "pocketsphinx/decoder"
require "pocketsphinx/cmn_decoder"
require "pocketsphinx/speech_recognizer"
require "pocketsphinx/live_speech_recognizer"
require "pocketsphinx/audio_file_speech_recognizer"
Expand Down
6 changes: 6 additions & 0 deletions lib/pocketsphinx/api/pocketsphinx.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def self.ps_init(*args)
attach_function :ps_unset_search, [:decoder, :string], :int
attach_function :ps_get_search, [:decoder], :string
attach_function :ps_set_search, [:decoder, :string], :int
attach_function :ps_get_feat, [:decoder], :pointer

typedef :pointer, :seg_iter

Expand All @@ -35,6 +36,11 @@ def self.ps_init(*args)
attach_function :ps_seg_frames, [:seg_iter, :pointer, :pointer], :void
attach_function :ps_seg_prob, [:seg_iter, :pointer, :pointer, :pointer], :int32
attach_function :ps_seg_free, [:seg_iter], :void

def self.get_cmn_values(ps_decoder)
feature = Sphinxbase::Feature.new(ps_get_feat(ps_decoder))
feature[:cmn_struct][:cmn_mean].get_array_of_float32(0, feature[:cmn_struct][:veclen])
end
end
end
end
2 changes: 2 additions & 0 deletions lib/pocketsphinx/api/sphinxbase.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ module Sphinxbase
extend FFI::Library
ffi_lib "libsphinxbase"

include Cmn

class Argument < FFI::Struct
layout :name, :string,
:type, :int,
Expand Down
40 changes: 40 additions & 0 deletions lib/pocketsphinx/api/sphinxbase/cmn.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
module Pocketsphinx
module API
module Sphinxbase
module Cmn
extend FFI::Library

enum :cmn_type, [:none, 0, :current, :prior]
enum :agc_type, [:none, 0, :max, :emax, :noise]

class CmnData < FFI::Struct
layout :cmn_mean, :pointer,
:cmn_var, :pointer,
:sum, :pointer,
:nframe, :int32,
:veclen, :int32
end

class Feature < FFI::Struct
layout :refcount, :int,
:name, :string,
:cepsize, :int32,
:n_stream, :int32,
:stream_len, :pointer,
:window_size, :int32,
:n_sv, :int32,
:sv_len, :pointer,
:subvecs, :pointer,
:mfcc_t, :pointer,
:sv_dim, :int32,
:cmn, :cmn_type,
:varnorm, :int32,
:agc, :agc_type,
:compute_feat, :pointer,
:cmn_struct, CmnData.ptr,
:agc_struct, :pointer
end
end
end
end
end
29 changes: 29 additions & 0 deletions lib/pocketsphinx/cmn_decoder.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
module Pocketsphinx
class CMNDecoder < Decoder
CMN_TOLERANCE_DEFAULT = 20

attr_writer :cmn_tolerance

def cmn_tolerance
@cmn_tolerance || CMN_TOLERANCE_DEFAULT
end

def decode_raw(audio_file, max_samples = 2048)
repeat_if_cmn_sum_exceeds { super }
end

private

def repeat_if_cmn_sum_exceeds(tolerance = cmn_tolerance)
before = cmn_values
result = yield
after = cmn_values

cmn_sum(before, after) > tolerance ? yield : result
end

def cmn_sum(before, after)
before.zip(after).inject(0) { |sum, a| sum + (a.last - a.first).abs }
end
end
end
5 changes: 5 additions & 0 deletions lib/pocketsphinx/decoder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ def decode(audio_path_or_file, max_samples = 2048)
# @param [IO] audio_file The raw audio stream to decode as a single utterance
# @param [Fixnum] max_samples The maximum samples to process from the stream on each iteration
def decode_raw(audio_file, max_samples = 2048)
audio_file.rewind
start_utterance

FFI::MemoryPointer.new(:int16, max_samples) do |buffer|
Expand Down Expand Up @@ -147,6 +148,10 @@ def words
words
end

def cmn_values
ps_api.get_cmn_values(ps_decoder)
end

# Adds new search using JSGF model.
#
# Convenience method to parse JSGF model from string and create a search.
Expand Down

0 comments on commit 0238ebb

Please sign in to comment.