Skip to content

Commit

Permalink
Fix bugs related to fMLLR and add better testing suite (#12)
Browse files Browse the repository at this point in the history
  • Loading branch information
mmcauliffe authored Feb 1, 2024
1 parent 15be70b commit 33307be
Show file tree
Hide file tree
Showing 41 changed files with 1,175 additions and 382 deletions.
7 changes: 7 additions & 0 deletions .readthedocs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,10 @@ sphinx:

conda:
environment: rtd_environment.yml

# This part is necessary otherwise the project is not built
python:
version: 3.9
install:
- method: pip
path: .
62 changes: 62 additions & 0 deletions extensions/ivector/ivector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,6 +499,29 @@ void pybind_ivector_extractor(py::module &m) {
py::arg("opts"),
py::arg("extractor"),
py::call_guard<py::gil_scoped_release>())
.def(py::pickle(
[](const PyClass &p) { // __getstate__
/* Return a tuple that fully encodes the state of the object */
std::ostringstream os;
bool binary = true;
p.Write(os, binary);
return py::make_tuple(
py::bytes(os.str()));
},
[](py::tuple t) { // __setstate__
if (t.size() != 1)
throw std::runtime_error("Invalid state!");

/* Create a new C++ instance */
PyClass *p = new PyClass();

/* Assign any additional state */
std::istringstream str(t[0].cast<std::string>());
p->Read(str, true);

return p;
}
))
.def("update", [](
PyClass &stats,
IvectorExtractor &extractor,
Expand Down Expand Up @@ -930,6 +953,29 @@ void pybind_plda(py::module &m) {
},
py::arg("utterance_ivector"),
py::arg("transformed_enrolled_ivectors"))
.def(py::pickle(
[](const PyClass &p) { // __getstate__
/* Return a tuple that fully encodes the state of the object */
std::ostringstream os;
bool binary = true;
p.Write(os, binary);
return py::make_tuple(
py::bytes(os.str()));
},
[](py::tuple t) { // __setstate__
if (t.size() != 1)
throw std::runtime_error("Invalid state!");

/* Create a new C++ instance */
PyClass *p = new PyClass();

/* Assign any additional state */
std::istringstream str(t[0].cast<std::string>());
p->Read(str, true);

return p;
}
))
.def("TransformIvector",
py::overload_cast<const PldaConfig &,
const VectorBase<double> &,
Expand Down Expand Up @@ -1363,6 +1409,22 @@ void init_ivector(py::module &_m) {
py::arg("normalize") = true,
py::arg("scaleup") = true);

m.def("ivector_normalize_length",
[](
Vector<double>* ivector,
bool normalize = true,
bool scaleup = true
) {
py::gil_scoped_release gil_release;
double norm = ivector->Norm(2.0);
double ratio = norm / sqrt(ivector->Dim());
if (!scaleup) ratio = norm;
if (normalize) ivector->Scale(1.0 / ratio);
},
py::arg("ivector"),
py::arg("normalize") = true,
py::arg("scaleup") = true);

m.def("ivector_subtract_mean",
[](
std::vector<Vector<float>*> &ivectors
Expand Down
88 changes: 58 additions & 30 deletions extensions/transform/transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,25 @@ void pybind_cmvn(py::module &m) {
py::arg("uttlist"),
py::arg("feat_reader"));

m.def("apply_cmvn",
[](
const Matrix<BaseFloat> &feats,
const Matrix<double> &cmvn_stats,
bool reverse = false,
bool norm_vars = false
){
py::gil_scoped_release release;
Matrix<BaseFloat> feat_out(feats);
if (reverse) {
ApplyCmvnReverse(cmvn_stats, norm_vars, &feat_out);
} else {
ApplyCmvn(cmvn_stats, norm_vars, &feat_out);
}

return feat_out;
},
py::arg("feats"), py::arg("cmvn_stats"), py::arg("reverse") = false, py::arg("norm_vars") = false);

m.def("ApplyCmvn",
&ApplyCmvn,
"Apply cepstral mean and variance normalization to a matrix of features. "
Expand Down Expand Up @@ -380,6 +399,8 @@ void pybind_fmllr_diag_gmm(py::module &m) {
py::arg("feats"))
.def("accumulate_from_alignment",
[](PyClass& spk_stats,
const TransitionModel &alignment_trans_model,
const AmDiagGmm &alignment_am_gmm,
const TransitionModel &trans_model,
const AmDiagGmm &am_gmm,
const Matrix<BaseFloat> &feats,
Expand All @@ -391,49 +412,51 @@ void pybind_fmllr_diag_gmm(py::module &m) {
bool two_models = false
){
py::gil_scoped_release gil_release;
Posterior pdf_post;
Posterior post;
Posterior posterior;

AlignmentToPosterior(ali, &posterior);

AlignmentToPosterior(ali, &post);
if (distributed)
WeightSilencePostDistributed(trans_model, silence_set,
silence_scale, &post);
WeightSilencePostDistributed(alignment_trans_model, silence_set,
silence_scale, &posterior);
else
WeightSilencePost(trans_model, silence_set,
silence_scale, &post);
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
WeightSilencePost(alignment_trans_model, silence_set,
silence_scale, &posterior);

Posterior pdf_posterior;
ConvertPosteriorToPdfs(alignment_trans_model, posterior, &pdf_posterior);

if (!two_models){
for (size_t i = 0; i < pdf_post.size(); i++) {
for (size_t j = 0; j < pdf_post[i].size(); j++) {
int32 pdf_id = pdf_post[i][j].first;
spk_stats.AccumulateForGmm(am_gmm.GetPdf(pdf_id),
for (size_t i = 0; i < pdf_posterior.size(); i++) {
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
int32 pdf_id = pdf_posterior[i][j].first;
spk_stats.AccumulateForGmm(alignment_am_gmm.GetPdf(pdf_id),
feats.Row(i),
pdf_post[i][j].second);
pdf_posterior[i][j].second);
}
}
}
else{


GaussPost gpost(pdf_post.size());
GaussPost gpost(posterior.size());
BaseFloat tot_like_this_file = 0.0, tot_weight = 0.0;
for (size_t i = 0; i < pdf_post.size(); i++) {
gpost[i].reserve(pdf_post[i].size());
for (size_t j = 0; j < pdf_post[i].size(); j++) {
int32 pdf_id = pdf_post[i][j].first;
BaseFloat weight = pdf_post[i][j].second;
const DiagGmm &gmm = am_gmm.GetPdf(pdf_id);
for (size_t i = 0; i < posterior.size(); i++) {
gpost[i].reserve(pdf_posterior[i].size());
for (size_t j = 0; j < pdf_posterior[i].size(); j++) {
int32 pdf_id = pdf_posterior[i][j].first;
BaseFloat weight = pdf_posterior[i][j].second;
const DiagGmm &gmm = alignment_am_gmm.GetPdf(pdf_id);
Vector<BaseFloat> this_post_vec;
BaseFloat like =
gmm.ComponentPosteriors(feats.Row(i), &this_post_vec);
this_post_vec.Scale(weight);
if (rand_prune > 0.0)
for (int32 k = 0; k < this_post_vec.Dim(); k++)
this_post_vec(k) = RandPrune(this_post_vec(k),
rand_prune);
for (int32 k = 0; k < this_post_vec.Dim(); k++)
this_post_vec(k) = RandPrune(this_post_vec(k),
rand_prune);
if (!this_post_vec.IsZero())
gpost[i].push_back(std::make_pair(pdf_id, this_post_vec));
gpost[i].push_back(std::make_pair(pdf_id, this_post_vec));
tot_like_this_file += like * weight;
tot_weight += weight;
}
Expand All @@ -450,6 +473,8 @@ void pybind_fmllr_diag_gmm(py::module &m) {
}
}
},
py::arg("alignment_trans_model"),
py::arg("alignment_am_gmm"),
py::arg("trans_model"),
py::arg("am_gmm"),
py::arg("feats"),
Expand All @@ -461,6 +486,8 @@ void pybind_fmllr_diag_gmm(py::module &m) {
py::arg("two_models") = false)
.def("accumulate_from_lattice",
[](PyClass* spk_stats,
const TransitionModel &alignment_trans_model,
const AmDiagGmm &alignment_am_gmm,
const TransitionModel &trans_model,
const AmDiagGmm &am_gmm,
const Matrix<BaseFloat> &feats,
Expand Down Expand Up @@ -490,13 +517,13 @@ void pybind_fmllr_diag_gmm(py::module &m) {
Posterior post;
double lat_like = LatticeForwardBackward(lat, &post);
if (distributed)
WeightSilencePostDistributed(trans_model, silence_set,
WeightSilencePostDistributed(alignment_trans_model, silence_set,
silence_scale, &post);
else
WeightSilencePost(trans_model, silence_set,
WeightSilencePost(alignment_trans_model, silence_set,
silence_scale, &post);
Posterior pdf_post;
ConvertPosteriorToPdfs(trans_model, post, &pdf_post);
ConvertPosteriorToPdfs(alignment_trans_model, post, &pdf_post);
if (!two_models){
for (size_t i = 0; i < post.size(); i++) {
for (size_t j = 0; j < pdf_post[i].size(); j++) {
Expand All @@ -517,7 +544,7 @@ void pybind_fmllr_diag_gmm(py::module &m) {
for (size_t j = 0; j < pdf_post[i].size(); j++) {
int32 pdf_id = pdf_post[i][j].first;
BaseFloat weight = pdf_post[i][j].second;
const DiagGmm &gmm = am_gmm.GetPdf(pdf_id);
const DiagGmm &gmm = alignment_am_gmm.GetPdf(pdf_id);
Vector<BaseFloat> this_post_vec;
BaseFloat like =
gmm.ComponentPosteriors(feats.Row(i), &this_post_vec);
Expand All @@ -542,6 +569,8 @@ void pybind_fmllr_diag_gmm(py::module &m) {
}
}
},
py::arg("alignment_trans_model"),
py::arg("alignment_am_gmm"),
py::arg("trans_model"),
py::arg("am_gmm"),
py::arg("feats"),
Expand All @@ -567,13 +596,12 @@ void pybind_fmllr_diag_gmm(py::module &m) {
.def("compute_transform",
[](PyClass& f, const AmDiagGmm &am_gmm,
const FmllrOptions &fmllr_opts){
py::gil_scoped_release gil_release;
BaseFloat impr, tot_t;
Matrix<BaseFloat> transform(am_gmm.Dim(), am_gmm.Dim()+1);
{
transform.SetUnit();
f.Update(fmllr_opts, &transform, &impr, &tot_t);
return transform;
return py::make_tuple(transform, impr, tot_t);
}
},
py::arg("am_gmm"),
Expand Down
4 changes: 3 additions & 1 deletion kalpy/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ class Segment:
channel: typing.Optional[int] = 0

def load_audio(self):
duration = self.end - self.begin
duration = None
if self.end is not None and self.begin is not None:
duration = self.end - self.begin
y, _ = librosa.load(
self.file_path,
sr=16000,
Expand Down
19 changes: 16 additions & 3 deletions kalpy/feat/cmvn.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,26 @@ def compute_cmvn_for_export(
Returns
-------
:class:`_kalpy.matrix.FloatMatrixBase`
:class:`_kalpy.matrix.DoubleMatrix`
Feature matrix for the segment
"""
cmvn, num_done, num_error = transform.calculate_cmvn(utterance_list, feature_reader)
if False:
cmvn_stats = DoubleMatrix()
is_init = False
num_done = 0
num_error = 0
for utt in utterance_list:
print(utt)
feats = feature_reader.Value(utt)
if not is_init:
transform.InitCmvnStats(feats.NumCols(), cmvn_stats)
is_init = True
transform.AccCmvnStats(feats, None, cmvn_stats)
num_done += 1
cmvn_stats, num_done, num_error = transform.calculate_cmvn(utterance_list, feature_reader)
self.num_done += num_done
self.num_error += num_error
return cmvn
return cmvn_stats

def export_cmvn(
self,
Expand Down
Loading

0 comments on commit 33307be

Please sign in to comment.