Skip to content

Commit

Permalink
Merge pull request #1 from Mobile-Artificial-Intelligence/new-training
Browse files Browse the repository at this point in the history
New training
  • Loading branch information
danemadsen authored Aug 24, 2024
2 parents e37918c + 7a3b80c commit 4007966
Show file tree
Hide file tree
Showing 11 changed files with 382 additions and 138 deletions.
192 changes: 192 additions & 0 deletions .github/workflows/build-python.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
name: Build Python

on:
push:
branches:
- main
paths:
- '.github/workflows/build-python.yml'
- 'wrappers/**'
- 'src/**'
- 'include/**'
- 'example/**'
- 'CMakeLists.txt'

jobs:
build-linux:
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive

- name: Install CMake
run: sudo apt-get update && sudo apt-get install -y cmake

- name: Determine CPU Cores
id: cpu-info
run: echo "CPU_CORES=$(nproc)" >> $GITHUB_ENV

- name: Configure CMake
run: cmake -B build -DCMAKE_BUILD_TYPE=Release

- name: Build
run: cmake --build build --config Release -- -j${{ env.CPU_CORES }}

- name: Upload Build Artifacts
uses: actions/upload-artifact@v3
with:
name: linux-x86_64
path: lib/

build-macos:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [macos-13, macos-14]

steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive

- name: Install Homebrew
run: /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"

- name: Update CMake
run: brew install cmake

- name: Determine CPU Cores
id: cpu-info
run: echo "CPU_CORES=$(sysctl -n hw.ncpu)" >> $GITHUB_ENV

- name: Configure CMake
run: cmake -B build -DCMAKE_BUILD_TYPE=Release

- name: Build
run: cmake --build build --config Release -- -j${{ env.CPU_CORES }}

- name: Upload Build Artifacts
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.os == 'macos-13' && 'macos-x86_64' || 'macos-arm64' }}
path: lib/

create-universal-dylibs:
needs: build-macos
runs-on: macos-latest
steps:
- name: Download x86_64 Build Artifacts
uses: actions/download-artifact@v3
with:
name: macos-x86_64
path: macos-x86_64

- name: Download arm64 Build Artifacts
uses: actions/download-artifact@v3
with:
name: macos-arm64
path: macos-arm64

- name: Create Universal dylibs
run: |
mkdir -p universal/lib
for dylib in macos-x86_64/*.dylib; do
dylib_name=$(basename $dylib)
lipo -create macos-x86_64/$dylib_name macos-arm64/$dylib_name -output universal/$dylib_name
done
- name: Upload Universal dylibs
uses: actions/upload-artifact@v3
with:
name: macos-universal
path: universal/

build-windows:
runs-on: windows-latest

steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive

- name: Install CMake
run: choco install -y cmake

- name: Configure CMake
run: cmake -B build -DCMAKE_BUILD_TYPE=Release

- name: Build
run: cmake --build build --config Release -- /m:4

- name: Copy Everything from \build\Release\ to \lib
run: xcopy /E /Y build\Release\ lib\

- name: Upload Build Artifacts
uses: actions/upload-artifact@v3
with:
name: windows-x86_64
path: lib/

build-python:
needs:
- build-linux
- build-windows
- create-universal-dylibs
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v2
with:
submodules: recursive

- name: Download Linux Build Artifacts
uses: actions/download-artifact@v3
with:
name: linux-x86_64
path: linux

- name: Download Windows Build Artifacts
uses: actions/download-artifact@v3
with:
name: windows-x86_64
path: windows

- name: Download macOS Universal Build Artifacts
uses: actions/download-artifact@v3
with:
name: macos-universal
path: macos

- name: Create Build Directories
run: |
mkdir -p build/linux
mkdir -p build/windows
mkdir -p build/macos
mkdir -p build/models
- name: Copy Linux Build Artifacts
run: cp -r linux/* build/linux

- name: Copy Windows Build Artifacts
run: cp -r windows/* build/windows

- name: Copy macOS Universal Build Artifacts
run: cp -r macos/* build/macos

- name: Copy Models
run: cp -r models/* build/models

- name: Copy Python Wrapper
run: cp wrappers/babylon.py build/__init__.py

- name: Upload Build Artifacts
uses: actions/upload-artifact@v3
with:
name: python
path: build/
14 changes: 13 additions & 1 deletion example/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,25 @@ int main(int argc, char** argv) {

text = argv[1];

DeepPhonemizer::Session dp(dp_model_path);
DeepPhonemizer::Session dp(dp_model_path, "en_us", true);

Vits::Session vits(vits_model_path);

std::vector<std::string> phonemes = dp.g2p(text);

for (const auto& phoneme : phonemes) {
std::cout << phoneme << " ";
}
std::cout << std::endl;

vits.tts(phonemes, "./babylon_output.wav");

std::vector<int64_t> phoneme_ids = dp.g2p_tokens(text);

for (const auto& id : phoneme_ids) {
std::cout << id << " ";
}
std::cout << std::endl;

return 0;
}
2 changes: 2 additions & 0 deletions include/babylon.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ BABYLON_EXPORT int babylon_g2p_init(const char* model_path, const char* language

BABYLON_EXPORT char* babylon_g2p(const char* text);

BABYLON_EXPORT int* babylon_g2p_tokens(const char* text);

BABYLON_EXPORT void babylon_g2p_free(void);

BABYLON_EXPORT int babylon_tts_init(const char* model_path);
Expand Down
20 changes: 6 additions & 14 deletions include/babylon.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ namespace DeepPhonemizer {
public:
SequenceTokenizer(const std::vector<std::string>& symbols, const std::vector<std::string>& languages, int char_repeats, bool lowercase = true, bool append_start_end = true);
std::vector<int64_t> operator()(const std::string& sentence, const std::string& language) const;
std::vector<std::string> decode(const std::vector<int64_t>& sequence, bool remove_special_tokens = false) const;
std::vector<std::string> decode(const std::vector<int64_t>& sequence) const;
std::vector<int64_t> clean(const std::vector<int64_t>& sequence) const;
int64_t get_token(const std::string& token) const;

private:
std::unordered_map<std::string, int> token_to_idx;
std::unordered_map<int, std::string> idx_to_token;
std::vector<std::string> tokens;
int char_repeats;
bool lowercase;
bool append_start_end;
Expand All @@ -25,9 +26,6 @@ namespace DeepPhonemizer {
std::string pad_token;
std::string end_token;
std::unordered_set<std::string> special_tokens;

int get_start_index(const std::string& language) const;
std::string make_start_token(const std::string& language) const;
};

class Session {
Expand All @@ -36,19 +34,16 @@ namespace DeepPhonemizer {
~Session();

std::vector<std::string> g2p(const std::string& text);
std::vector<int64_t> g2p_tokens(const std::string& text);

private:
const std::array<const char *, 1> input_names = {"text"};
const std::array<const char *, 1> output_names = {"output"};

std::string lang;
bool punctuation;
Ort::Session* session;
SequenceTokenizer* text_tokenizer;
SequenceTokenizer* phoneme_tokenizer;
std::unordered_map<std::string, std::vector<std::string>> dictionary;

std::vector<std::string> g2p_internal(const std::string& text);
std::vector<int64_t> g2p_tokens_internal(const std::string& text);
};

std::vector<std::string> clean_text(const std::string& text);
Expand All @@ -72,9 +67,6 @@ namespace Vits {
void tts(const std::vector<std::string>& phonemes, const std::string& output_path);

private:
const std::array<const char *, 3> input_names = {"input", "input_lengths", "scales"};
const std::array<const char *, 1> output_names = {"output"};

int sample_rate;
std::vector<float> scales;

Expand Down
Binary file modified models/deep_phonemizer.onnx
Binary file not shown.
20 changes: 2 additions & 18 deletions scripts/deep_phonemizer/dp_export.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,7 @@
import torch
import onnx
from typing import Dict
from dp.model.model import AutoregressiveTransformer, ForwardTransformer, load_checkpoint

# Load and process the dictionary file
def load_and_process_dictionary(file_path: str) -> str:
with open(file_path, 'r') as file:
lines = file.readlines()
processed_lines = []
for line in lines:
word, phonemes = line.strip().split(maxsplit=1)
processed_lines.append(f"{word.lower()}\t{phonemes}")
return "\n".join(processed_lines)

# Load your model checkpoint
checkpoint_path = './en_us_cmudict_ipa_forward.pt'
model, config = load_checkpoint(checkpoint_path)
Expand Down Expand Up @@ -82,18 +71,13 @@ def forward(self, text, phonemes=None, start_index=None):
# Verify the ONNX model
onnx_model = onnx.load(onnx_file_path)

# Load and process dictionary file
dictionary_path = 'babylon_dict.txt'
processed_dictionary = load_and_process_dictionary(dictionary_path)

# Add metadata to the ONNX model
metadata = {
"languages": "de en_us",
"text_symbols": "a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z ä ö ü Ä Ö Ü ß",
"phoneme_symbols": "a b d e f g h i j k l m n o p r s t u v w x y z æ ç ð ø ŋ œ ɐ ɑ ɔ ə ɛ ɜ ɹ ɡ ɪ ʁ ʃ ʊ ʌ ʏ ʒ ʔ ' ˌ ː ̃ ̍ ̥ ̩ ̯ ͡ θ",
"phoneme_symbols": "a b d e f g h i j k l m n o p r s t u v w x y z æ ç ð ø ŋ œ ɐ ɑ ɔ ə ɛ ɜ ɹ ɡ ɪ ʁ ʃ ʊ ʌ ʏ ʒ ʔ ' ˌ ː ̃ ̍ ̥ ̩ ̯ ͡ θ . , : ; ? ! \" ( ) -",
"char_repeats": "3" if isinstance(model, ForwardTransformer) else "1",
"lowercase": "1",
"dictionary": processed_dictionary
"lowercase": "1"
}

for key, value in metadata.items():
Expand Down
24 changes: 24 additions & 0 deletions src/babylon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,30 @@ extern "C" {
return strdup(phonemes.c_str());
}

BABYLON_EXPORT int* babylon_g2p_tokens(const char* text) {
if (dp == nullptr) {
std::cerr << "DeepPhonemizer session not initialized." << std::endl;
return nullptr;
}

std::vector<int64_t> phoneme_ids;
try {
phoneme_ids = dp->g2p_tokens(text);
}
catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
}

phoneme_ids.push_back(-1); // Sentinel value

int* phoneme_ids_arr = new int[phoneme_ids.size()];
for (size_t i = 0; i < phoneme_ids.size(); i++) {
phoneme_ids_arr[i] = phoneme_ids[i];
}

return phoneme_ids_arr;
}

BABYLON_EXPORT void babylon_g2p_free(void) {
delete dp;
}
Expand Down
Loading

0 comments on commit 4007966

Please sign in to comment.