Skip to content

Commit

Permalink
Add cori, kristin, ljspeech, claude
Browse files Browse the repository at this point in the history
  • Loading branch information
synesthesiam committed Mar 12, 2024
1 parent 857377c commit c9ecb8f
Show file tree
Hide file tree
Showing 18 changed files with 309 additions and 1 deletion.
3 changes: 2 additions & 1 deletion _script/export-checkpoints.sh
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ find "${piper_checkpoints}" -name '*.ckpt' | sort | \

if [ ! -s "${onnx}" ]; then
# Export to onnx and optimize
python3 -m piper_train.export_onnx \
PYTHONPATH="${repo_dir}/src/python" \
python3 -m piper_train.export_onnx \
"${checkpoint}" \
"${onnx}.unoptimized";

Expand Down
69 changes: 69 additions & 0 deletions _script/generate-samples.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env bash
set -eo pipefail

# Generates audio samples for voices.
# Requires: ffmpeg jq

if [ -z "$1" ]; then
echo 'Usage: generate-samples.sh <piper-voices>'
exit 1
fi

this_dir="$( cd "$( dirname "$0" )" && pwd )"
repo_dir="$(realpath "${this_dir}/../../")"

venv="${repo_dir}/src/python/.venv"
if [ -d "${venv}" ]; then
source "${venv}/bin/activate"
fi

# -----------------------------------------------------------------------------

piper_voices="$1"
piper_binary="${repo_dir}/install/piper"

find "${piper_voices}" -name '*.onnx' | sort | \
while read -r onnx; do
voice_dir="$(dirname "${onnx}")";
quality="$(basename "${voice_dir}")"
dataset_dir="$(dirname "${voice_dir}")";
dataset="$(basename "${dataset_dir}")"
language_dir="$(dirname "${dataset_dir}")";
language="$(basename "${language_dir}")"
language_family_dir="$(dirname "${language_dir}")";
language_family="$(basename "${language_family_dir}")"

test_sentences="${repo_dir}/etc/test_sentences/${language_family}.txt"
if [ ! -s "${test_sentences}" ]; then
echo "[ERROR] Missing ${test_sentences}" >&2;
continue;
fi

samples_dir="${voice_dir}/samples"
mkdir -p "${samples_dir}"

num_speakers="$(jq --raw-output '.num_speakers' "${onnx}.json")"
sample_rate="$(jq --raw-output '.audio.sample_rate' "${onnx}.json")"
last_speaker_id="$((num_speakers-1))"

# Generate a sample from the first test sentence for each speaker
for speaker_id in `seq 0 ${last_speaker_id}`; do
sample_mp3="${samples_dir}/speaker_${speaker_id}.mp3"
if [ -s "${sample_mp3}" ]; then
sample_mp3_size="$(stat --printf='%s' "${sample_mp3}")"
else
sample_mp3_size='0'
fi

if [ "${sample_mp3_size}" -lt 1000 ]; then
echo "Generating sample for ${dataset} (quality=${quality}, speaker=${speaker_id})"

# Compress to MP3 with ffmpeg
head -n1 "${test_sentences}" | \
"${piper_binary}" --model "${onnx}" --speaker "${speaker_id}" --output_raw | \
ffmpeg -hide_banner -loglevel warning -y \
-sample_rate "${sample_rate}" -f s16le -ac 1 -i - \
-codec:a libmp3lame -qscale:a 2 "${sample_mp3}";
fi;
done
done
17 changes: 17 additions & 0 deletions samples/en/en_GB/cori/high/MODEL_CARD
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Model card for cori (high)

* Language: en_GB (English, Great Britain)
* Speakers: 1
* Quality: medium
* Samplerate: 22,050Hz

## Dataset

* URL: https://librivox.org
* License: public domain

## Training

See: https://brycebeattie.com/files/tts/

UK English female voice. Single Speaker. Trained from scratch on high quality settings for 500 epochs. I put together the dataset, which ended up with about 24 hours of recordings. All recordings came from LibriVox.org.
1 change: 1 addition & 0 deletions samples/en/en_GB/cori/high/sample.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
A rainbow is a meteorological phenomenon that is caused by reflection, refraction and dispersion of light in water droplets resulting in a spectrum of light appearing in the sky.
Binary file added samples/en/en_GB/cori/high/speaker_0.mp3
Binary file not shown.
17 changes: 17 additions & 0 deletions samples/en/en_US/kristin/medium/MODEL_CARD
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Model card for kristin (medium)

* Language: en_US(English, United States)
* Speakers: 1
* Quality: medium
* Samplerate: 22,050Hz

## Dataset

* URL: https://librivox.org
* License: public domain

## Training

See: https://brycebeattie.com/files/tts/

US English female voice. Single Speaker. Trained from scratch on medium quality settings for 2000 epochs. I put together the dataset, which ended up with about 11.5 hours of recordings. All recordings came from LibriVox.org.
1 change: 1 addition & 0 deletions samples/en/en_US/kristin/medium/sample.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
A rainbow is a meteorological phenomenon that is caused by reflection, refraction and dispersion of light in water droplets resulting in a spectrum of light appearing in the sky.
Binary file added samples/en/en_US/kristin/medium/speaker_0.mp3
Binary file not shown.
17 changes: 17 additions & 0 deletions samples/en/en_US/ljspeech/high/MODEL_CARD
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Model card for ljspeech (high)

* Language: en_US (English, United States)
* Speakers: 1
* Quality: medium
* Samplerate: 22,050Hz

## Dataset

* URL: https://keithito.com/LJ-Speech-Dataset/
* License: public domain

## Training

See: https://brycebeattie.com/files/tts/

US English female voice. Single speaker. Trained from scratch for 1000 epochs on medium quality settings using the LJ Speech dataset. I reencoded the recordings to a bit rate of 22500 Hz so it would match other voices released for Piper TTS.
1 change: 1 addition & 0 deletions samples/en/en_US/ljspeech/high/sample.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
A rainbow is a meteorological phenomenon that is caused by reflection, refraction and dispersion of light in water droplets resulting in a spectrum of light appearing in the sky.
Binary file added samples/en/en_US/ljspeech/high/speaker_0.mp3
Binary file not shown.
17 changes: 17 additions & 0 deletions samples/en/en_US/ljspeech/medium/MODEL_CARD
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Model card for ljspeech (medium)

* Language: en_US (English, United States)
* Speakers: 1
* Quality: medium
* Samplerate: 22,050Hz

## Dataset

* URL: https://keithito.com/LJ-Speech-Dataset/
* License: public domain

## Training

See: https://brycebeattie.com/files/tts/

US English female voice. Single speaker. Trained from scratch for 1000 epochs on medium quality settings using the LJ Speech dataset. I reencoded the recordings to a bit rate of 22500 Hz so it would match other voices released for Piper TTS.
1 change: 1 addition & 0 deletions samples/en/en_US/ljspeech/medium/sample.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
A rainbow is a meteorological phenomenon that is caused by reflection, refraction and dispersion of light in water droplets resulting in a spectrum of light appearing in the sky.
Binary file added samples/en/en_US/ljspeech/medium/speaker_0.mp3
Binary file not shown.
15 changes: 15 additions & 0 deletions samples/es/es_MX/claude/high/MODEL_CARD
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Model card for claude (high)

* Language: es_MX (Spanish, Mexico)
* Speakers: 1
* Quality: high
* Samplerate: 22,050Hz

## Dataset

* URL: https://huggingface.co/spaces/HirCoir/Piper-TTS-Spanish
* License: apache-2.0

## Training

See URL above
1 change: 1 addition & 0 deletions samples/es/es_MX/claude/high/sample.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Un arcoíris​ o arco iris es un fenómeno óptico y meteorológico que consiste en la aparición en el cielo de un arco de luz multicolor, originado por la descomposición de la luz solar en el espectro visible, la cual se produce por refracción, cuando los rayos del sol atraviesan pequeñas gotas de agua contenidas en la atmósfera terrestre.
Binary file added samples/es/es_MX/claude/high/speaker_0.mp3
Binary file not shown.
150 changes: 150 additions & 0 deletions voices.json
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,36 @@
},
"aliases": []
},
"en_GB-cori-high": {
"key": "en_GB-cori-high",
"name": "cori",
"language": {
"code": "en_GB",
"family": "en",
"region": "GB",
"name_native": "English",
"name_english": "English",
"country_english": "Great Britain"
},
"quality": "high",
"num_speakers": 1,
"speaker_id_map": {},
"files": {
"en/en_GB/cori/high/en_GB-cori-high.onnx": {
"size_bytes": 114219352,
"md5_digest": "3474a80133d9a03e6870d2ac42c18806"
},
"en/en_GB/cori/high/en_GB-cori-high.onnx.json": {
"size_bytes": 4963,
"md5_digest": "0f7d42e77a99193006aa34a34442f5e0"
},
"en/en_GB/cori/high/MODEL_CARD": {
"size_bytes": 470,
"md5_digest": "93702d22a081e5efe612625f2a9d6d51"
}
},
"aliases": []
},
"en_GB-jenny_dioco-medium": {
"key": "en_GB-jenny_dioco-medium",
"name": "jenny_dioco",
Expand Down Expand Up @@ -1498,6 +1528,36 @@
"en-us-kathleen-low"
]
},
"en_US-kristin-medium": {
"key": "en_US-kristin-medium",
"name": "kristin",
"language": {
"code": "en_US",
"family": "en",
"region": "US",
"name_native": "English",
"name_english": "English",
"country_english": "United States"
},
"quality": "medium",
"num_speakers": 1,
"speaker_id_map": {},
"files": {
"en/en_US/kristin/medium/en_US-kristin-medium.onnx": {
"size_bytes": 63531379,
"md5_digest": "5fed42d2296baca042e2bf74785db725"
},
"en/en_US/kristin/medium/en_US-kristin-medium.onnx.json": {
"size_bytes": 4968,
"md5_digest": "70bc97d350c796c64ea5e4d08241afac"
},
"en/en_US/kristin/medium/MODEL_CARD": {
"size_bytes": 479,
"md5_digest": "9bfb8192299b34cddf76455f04cb8cd2"
}
},
"aliases": []
},
"en_US-kusal-medium": {
"key": "en_US-kusal-medium",
"name": "kusal",
Expand Down Expand Up @@ -3549,6 +3609,66 @@
},
"aliases": []
},
"en_US-ljspeech-high": {
"key": "en_US-ljspeech-high",
"name": "ljspeech",
"language": {
"code": "en_US",
"family": "en",
"region": "US",
"name_native": "English",
"name_english": "English",
"country_english": "United States"
},
"quality": "high",
"num_speakers": 1,
"speaker_id_map": {},
"files": {
"en/en_US/ljspeech/high/en_US-ljspeech-high.onnx": {
"size_bytes": 114199011,
"md5_digest": "dad093b5d2cff6a5fda99883ceda09d1"
},
"en/en_US/ljspeech/high/en_US-ljspeech-high.onnx.json": {
"size_bytes": 4967,
"md5_digest": "1f93986f7c39647383f2a0b1471f30aa"
},
"en/en_US/ljspeech/high/MODEL_CARD": {
"size_bytes": 515,
"md5_digest": "59322a9a8d2c0e556f0be1171cd54ea7"
}
},
"aliases": []
},
"en_US-ljspeech-medium": {
"key": "en_US-ljspeech-medium",
"name": "ljspeech",
"language": {
"code": "en_US",
"family": "en",
"region": "US",
"name_native": "English",
"name_english": "English",
"country_english": "United States"
},
"quality": "medium",
"num_speakers": 1,
"speaker_id_map": {},
"files": {
"en/en_US/ljspeech/medium/en_US-ljspeech-medium.onnx": {
"size_bytes": 63531379,
"md5_digest": "109d552e9dd78d92d1169a7edd6de38d"
},
"en/en_US/ljspeech/medium/en_US-ljspeech-medium.onnx.json": {
"size_bytes": 4969,
"md5_digest": "ae02d7c3f01e9606dcac4668e58720c9"
},
"en/en_US/ljspeech/medium/MODEL_CARD": {
"size_bytes": 517,
"md5_digest": "5ad31d314786587d4f97effb0b716d61"
}
},
"aliases": []
},
"en_US-ryan-high": {
"key": "en_US-ryan-high",
"name": "ryan",
Expand Down Expand Up @@ -3834,6 +3954,36 @@
},
"aliases": []
},
"es_MX-claude-high": {
"key": "es_MX-claude-high",
"name": "claude",
"language": {
"code": "es_MX",
"family": "es",
"region": "MX",
"name_native": "Español",
"name_english": "Spanish",
"country_english": "Mexico"
},
"quality": "high",
"num_speakers": 1,
"speaker_id_map": {},
"files": {
"es/es_MX/claude/high/es_MX-claude-high.onnx": {
"size_bytes": 63122309,
"md5_digest": "cb1966e0ff20ca3aa010f6c9a0ce296a"
},
"es/es_MX/claude/high/es_MX-claude-high.onnx.json": {
"size_bytes": 4963,
"md5_digest": "36882201922fabb409f18d284af3eddd"
},
"es/es_MX/claude/high/MODEL_CARD": {
"size_bytes": 247,
"md5_digest": "bca99dc9b9a6c8f2f8563d73d52c4a3b"
}
},
"aliases": []
},
"fa_IR-amir-medium": {
"key": "fa_IR-amir-medium",
"name": "amir",
Expand Down

0 comments on commit c9ecb8f

Please sign in to comment.