secretsauceai · skewballfox · Jan 8, 2023 · Jan 2, 2023 · Jan 2, 2023 · Jan 5, 2023
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,6 @@
 /target
 Cargo.lock
 /py-speechsauce/.venv
+py-speechsauce/speechsauce/*.abi3.so
+py-speechsauce/src/lib.rs
+py-speechsauce/**/__pycache__/*
diff --git a/py-speechsauce/Cargo.toml b/py-speechsauce/Cargo.toml
@@ -5,9 +5,12 @@ edition = "2018"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 [lib]
-name = "speechsauce"
+name = "speechsauce_python"
 crate-type = ["cdylib"]
 
+[package.metadata.maturin]
+name = "speechsauce._internal"
+
 [dependencies]
 ndarray="^0.15"
 pyo3 = {version= "0.16.5", features=["extension-module","abi3-py37"]}

diff --git a/py-speechsauce/speechsauce/__init__.py b/py-speechsauce/speechsauce/__init__.py
@@ -0,0 +1,82 @@
+from functools import lru_cache
+from ._internal import mfcc as internal_mfcc, _speech_config, cmvn, preemphasis
+
+__all__ = ["mfcc", "preemphasis", "cmvn"]
+
+
+@lru_cache(maxsize=32)
+def _get_speech_config(
+    sampling_frequency,
+    frame_length=0.020,
+    frame_stride=0.01,
+    num_cepstral=13,
+    num_filters=40,
+    fft_length=512,
+    low_frequency=0,
+    high_frequency=None,
+    dc_elimination=True,
+):
+    """pay no attention to the man behind the curtain
+
+    this function returns a config object to be used by the rust code, avoids recomputing elements where possible
+    """
+    return _speech_config(
+        sampling_frequency,
+        frame_length,
+        frame_stride,
+        num_cepstral,
+        num_filters,
+        fft_length,
+        low_frequency,
+        high_frequency,
+        dc_elimination,
+    )
+
+
+def mfcc(
+    signal,
+    sampling_frequency,
+    frame_length=0.020,
+    frame_stride=0.01,
+    num_cepstral=13,
+    num_filters=40,
+    fft_length=512,
+    low_frequency=0,
+    high_frequency=None,
+    dc_elimination=True,
+):
+    """Compute MFCC features from an audio signal.
+    Args:
+         signal (array): the audio signal from which to compute features.
+             Should be an N x 1 array
+         sampling_frequency (int): the sampling frequency of the signal
+             we are working with.
+         frame_length (float): the length of each frame in seconds.
+             Default is 0.020s
+         frame_stride (float): the step between successive frames in seconds.
+             Default is 0.02s (means no overlap)
+         num_filters (int): the number of filters in the filterbank,
+             default 40.
+         fft_length (int): number of FFT points. Default is 512.
+         low_frequency (float): lowest band edge of mel filters.
+             In Hz, default is 0.
+         high_frequency (float): highest band edge of mel filters.
+             In Hz, default is samplerate/2
+         num_cepstral (int): Number of cepstral coefficients.
+         dc_elimination (bool): hIf the first dc component should
+             be eliminated or not.
+    Returns:
+        array: A numpy array of size (num_frames x num_cepstral) containing mfcc features.
+    """
+    config = _get_speech_config(
+        sampling_frequency,
+        frame_length,
+        frame_stride,
+        num_cepstral,
+        num_filters,
+        fft_length,
+        low_frequency,
+        high_frequency,
+        dc_elimination,
+    )
+    return internal_mfcc(signal, config)
diff --git a/py-speechsauce/src/lib.rs b/py-speechsauce/src/lib.rs
@@ -1,10 +1,28 @@
+use std::sync::Arc;
 
-use pyo3::prelude::*;
-use numpy::{IntoPyArray, PyReadonlyArray1, PyReadonlyArray2, PyArray2, PyArray1};
-use speechsauce::{feature,processing};
+use numpy::{IntoPyArray, PyArray1, PyArray2, PyReadonlyArray1, PyReadonlyArray2, ToPyArray};
+use pyo3::{callback::IntoPyCallbackOutput, prelude::*};
+use speechsauce::{config::SpeechConfig, feature, processing};
+#[pyclass]
+#[repr(transparent)]
+#[derive(Clone)]
+pub struct PySpeechConfig(SpeechConfig);
+
+impl IntoPyCallbackOutput<Self> for PySpeechConfig {
+    fn convert(self, py: Python<'_>) -> PyResult<Self> {
+        Ok(self)
+    }
+}
+
+impl IntoPy<SpeechConfig> for PySpeechConfig {
+    fn into_py(self, py: Python<'_>) -> SpeechConfig {
+        self.0
+    }
+}
 
 #[pymodule]
-fn speechsauce(_py: Python<'_>, m: &PyModule) -> PyResult<()>{
+fn speechsauce(_py: Python<'_>, m: &PyModule) -> PyResult<()> {
+    m.add_class::<PySpeechConfig>()?;
     /// Compute MFCC features from an audio signal.
     ///     Args:
     ///          signal : the audio signal from which to compute features.
@@ -29,37 +47,64 @@ fn speechsauce(_py: Python<'_>, m: &PyModule) -> PyResult<()>{
     ///         array: A numpy array of size (num_frames x num_cepstral) containing mfcc features.
     #[pyfn(m)]
     fn mfcc<'py>(
-        py: Python<'py>, 
+        py: Python<'py>,
         signal: PyReadonlyArray1<f64>,
-        sampling_frequency: usize,
-        frame_length: f64,           // =0.020,
-        frame_stride: f64,           // =0.01,
-        num_cepstral: usize,         // =13,
-        num_filters: usize,          // =40,
-        fft_length: usize,           // =512,
-        low_frequency: f64,          // =0,
-        high_frequency: Option<f64>, // =None,
-        dc_elimination: bool,        //True
-    ) -> &'py PyArray2<f64>{
-        feature::mfcc(signal.as_array(), sampling_frequency, frame_length, frame_stride, num_cepstral, num_filters, fft_length, low_frequency, high_frequency, dc_elimination).into_pyarray(py)
+        config: Py<PySpeechConfig>,
+    ) -> &'py PyArray2<f64> {
+        let cell = config.as_ref(py);
+        let obj_ref = cell.borrow();
+        let speech_config = &obj_ref.0;
+        feature::mfcc(signal.as_array(), &speech_config).to_pyarray(py)
     }
-    
+
     //TODO: #14 make signal a mutable borrow (PyReadWriteArray) once the next version of numpy-rust is released
     #[pyfn(m)]
     fn preemphasis<'py>(
-        py: Python<'py>, 
-        signal: PyReadonlyArray1<f64>, 
-        shift: isize, 
-        cof: f64 
-    ) -> &'py PyArray1<f64>{
+        py: Python<'py>,
+        signal: PyReadonlyArray1<f64>,
+        shift: isize,
+        cof: f64,
+    ) -> &'py PyArray1<f64> {
         processing::preemphasis(signal.as_array().to_owned(), shift, cof).into_pyarray(py)
     }
 
     #[pyfn(m)]
-    fn cmvn<'py>(py: Python<'py>, vec: PyReadonlyArray2<f64>, variance_normalization: bool)-> &'py PyArray2<f64>
-    {
+    fn cmvn<'py>(
+        py: Python<'py>,
+        vec: PyReadonlyArray2<f64>,
+        variance_normalization: bool,
+    ) -> &'py PyArray2<f64> {
         processing::cmvn(vec.as_array(), variance_normalization).into_pyarray(py)
     }
-
+
+    #[pyfn(m)]
+    fn _speech_config<'py>(
+        py: Python<'py>,
+        sampling_frequency: usize,
+        frame_length: f64,           // =0.020,
+        frame_stride: f64,           // =0.01,
+        num_cepstral: usize,         // =13,
+        num_filters: usize,          // =40,
+        fft_length: usize,           // =512,
+        low_frequency: f64,          // =0,
+        high_frequency: Option<f64>, // =None,
+        dc_elimination: bool,        //True
+    ) -> Py<PySpeechConfig> {
+        Py::new(
+            py,
+            PySpeechConfig(SpeechConfig::new(
+                sampling_frequency,
+                fft_length,
+                frame_length,
+                frame_stride,
+                num_cepstral,
+                num_filters,
+                low_frequency,
+                high_frequency.unwrap_or(sampling_frequency as f64 / 2.0),
+                dc_elimination,
+            )),
+        )
+        .unwrap()
+    }
     Ok(())
-}
+}
diff --git a/speechsauce/Cargo.toml b/speechsauce/Cargo.toml
@@ -14,3 +14,5 @@ num-traits = "0.2.15"
 ndarray={version="^0.15",features=["approx"]}
 ndarray-rand = "0.14.0"
 
+
+
diff --git a/speechsauce/src/config.rs b/speechsauce/src/config.rs
@@ -4,7 +4,7 @@ use ndrustfft::{DctHandler, R2cFftHandler};
 use crate::feature::filterbanks;
 
 #[derive(Default)]
-pub struct MfccConfigBuilder {
+pub struct SpeechConfigBuilder {
     ///sampling frequency of the signal
     sample_rate: usize,
     /// number of FFT points.
@@ -25,9 +25,9 @@ pub struct MfccConfigBuilder {
     dc_elimination: bool,
 }
 
-impl MfccConfigBuilder {
-    fn new(sample_rate: usize) -> MfccConfigBuilder {
-        MfccConfigBuilder {
+impl SpeechConfigBuilder {
+    pub fn new(sample_rate: usize) -> SpeechConfigBuilder {
+        SpeechConfigBuilder {
             sample_rate,
             fft_points: 512,
             frame_length: 0.02,
@@ -40,43 +40,43 @@ impl MfccConfigBuilder {
         }
     }
 
-    pub fn high_freq(mut self, high_frequency: f64) -> MfccConfigBuilder {
+    pub fn high_freq(mut self, high_frequency: f64) -> SpeechConfigBuilder {
         self.high_frequency = high_frequency;
         self
     }
 
-    pub fn dc_elimination(mut self, dc_elimination: bool) -> MfccConfigBuilder {
+    pub fn dc_elimination(mut self, dc_elimination: bool) -> SpeechConfigBuilder {
         self.dc_elimination = dc_elimination;
         self
     }
 
-    pub fn low_freq(mut self, low_frequency: f64) -> MfccConfigBuilder {
+    pub fn low_freq(mut self, low_frequency: f64) -> SpeechConfigBuilder {
         self.low_frequency = low_frequency;
         self
     }
 
-    pub fn num_cepstral(mut self, num_cepstral: usize) -> MfccConfigBuilder {
+    pub fn num_cepstral(mut self, num_cepstral: usize) -> SpeechConfigBuilder {
         self.num_cepstral = num_cepstral;
         self
     }
 
-    pub fn frame_stride(mut self, frame_stride: f64) -> MfccConfigBuilder {
+    pub fn frame_stride(mut self, frame_stride: f64) -> SpeechConfigBuilder {
         self.frame_stride = frame_stride;
         self
     }
 
-    pub fn frame_length(mut self, frame_length: f64) -> MfccConfigBuilder {
+    pub fn frame_length(mut self, frame_length: f64) -> SpeechConfigBuilder {
         self.frame_length = frame_length;
         self
     }
 
-    pub fn fft_points(mut self, fft_points: usize) -> MfccConfigBuilder {
+    pub fn fft_points(mut self, fft_points: usize) -> SpeechConfigBuilder {
         self.fft_points = fft_points;
         self
     }
 
-    pub fn build(self) -> MfccConfig {
-        MfccConfig::new(
+    pub fn build(self) -> SpeechConfig {
+        SpeechConfig::new(
             self.sample_rate,
             self.fft_points,
             self.frame_length,
@@ -90,33 +90,34 @@ impl MfccConfigBuilder {
     }
 }
 
-pub struct MfccConfig {
+#[derive(Clone)]
+pub struct SpeechConfig {
     ///sampling frequency of the signal
-    sample_rate: usize,
+    pub sample_rate: usize,
     /// number of FFT points.
-    fft_points: usize,
+    pub fft_points: usize,
     /// the length of each frame in seconds.
-    frame_length: f64, // =0.020,
+    pub frame_length: f64, // =0.020,
     /// the step between successive frames in seconds.
-    frame_stride: f64, // =0.01,
+    pub frame_stride: f64, // =0.01,
     /// Number of cepstral coefficients.
-    num_cepstral: usize, // =13,
+    pub num_cepstral: usize, // =13,
     /// the number of filters in the filterbank
-    num_filters: usize, // =40,
+    pub num_filters: usize, // =40,
     ///lowest band edge of mel filters in Hz
-    low_frequency: f64,
+    pub low_frequency: f64,
     ///highest band edge of mel filters in Hz.
-    high_frequency: f64,
+    pub high_frequency: f64,
     /// If the first dc component should be eliminated or not
-    dc_elimination: bool,
+    pub dc_elimination: bool,
     ///for
-    dct_handler: DctHandler<f64>,
-    fft_handler: R2cFftHandler<f64>,
+    pub dct_handler: DctHandler<f64>,
+    pub fft_handler: R2cFftHandler<f64>,
     /// Mel-filterbanks
-    filter_banks: Array2<f64>,
+    pub filter_banks: Array2<f64>,
 }
 
-impl MfccConfig {
+impl SpeechConfig {
     pub fn new(
         sample_rate: usize,
         fft_points: usize,
@@ -150,7 +151,7 @@ impl MfccConfig {
         }
     }
 
-    pub fn builder() -> MfccConfigBuilder {
-        MfccConfigBuilder::default()
+    pub fn builder() -> SpeechConfigBuilder {
+        SpeechConfigBuilder::default()
     }
 }
Original file line number	Diff line number	Diff line change
Expand Up		@@ -14,3 +14,5 @@ num-traits = "0.2.15"
		ndarray={version="^0.15",features=["approx"]}
		ndarray-rand = "0.14.0"