diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 917da8d..0000000 --- a/.dockerignore +++ /dev/null @@ -1,11 +0,0 @@ -* -!CMakeLists.txt -!Makefile -!src/ -!setup.py -!pyproject.toml -!piper_phonemize/ -!LICENSE.md -!README.md -!MANIFEST.in -!etc/libtashkeel_model.ort diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..d1ac609 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,89 @@ +name: main + +on: + workflow_dispatch: + push: + tags: + - "*" + +jobs: + create_release: + name: Create release + runs-on: ubuntu-latest + outputs: + upload_url: ${{ steps.create_release.outputs.upload_url }} + steps: + - name: Create release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ github.token }} + with: + tag_name: ${{ github.ref }} + release_name: ${{ github.ref }} + draft: false + prerelease: false + build_linux: + name: "linux build: ${{ matrix.arch }}" + runs-on: ubuntu-20.04 # use older version on purpose for GLIBC + needs: create_release # we need to know the upload URL + strategy: + fail-fast: true + matrix: + arch: [x64, aarch64, armv7] + steps: + - uses: actions/checkout@v3 + - name: configure + run: | + cmake -Bbuild -DCMAKE_INSTALL_PREFIX=_install/piper-phonemize + - name: build + run: | + cmake --build build --config Release + - name: install + run: | + cmake --install build + - name: package + run: | + cd _install && \ + tar -czf piper-phonemize_linux_${{ matrix.arch }}.tar.gz piper-phonemize/ + - name: upload + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ github.token }} + with: + upload_url: ${{ needs.create_release.outputs.upload_url }} + asset_path: _install/piper-phonemize_linux_${{ matrix.arch }}.tar.gz + asset_name: piper-phonemize_linux_${{ matrix.arch }}.tar.gz + asset_content_type: application/octet-stream + build_windows: + runs-on: windows-latest + name: "windows build: ${{ matrix.arch }}" + needs: create_release # we need to know the upload URL + strategy: + fail-fast: true + matrix: + arch: [x64] + steps: + - uses: actions/checkout@v3 + - name: configure + run: | + cmake -Bbuild -DCMAKE_INSTALL_PREFIX=_install/piper-phonemize + - name: build + run: | + cmake --build build --config Release + - name: install + run: | + cmake --install build + - name: package + run: | + cd _install + Compress-Archive -LiteralPath piper-phonemize -DestinationPath piper-phonemize_windows_amd64.zip + - name: upload + uses: actions/upload-release-asset@v1 + env: + GITHUB_TOKEN: ${{ github.token }} + with: + upload_url: ${{ needs.create_release.outputs.upload_url }} + asset_path: _install/piper-phonemize_windows_amd64.zip + asset_name: piper-phonemize_windows_amd64.zip + asset_content_type: application/zip diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml deleted file mode 100644 index 439e3dc..0000000 --- a/.github/workflows/windows.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: Windows - -on: - workflow_dispatch: - push: - branches: [ master ] - pull_request: - branches: [ master ] - -jobs: - build: - runs-on: windows-latest - name: "build: ${{ matrix.arch }} ${{ matrix.config }} ${{ matrix.link }}" - strategy: - fail-fast: false - matrix: - arch: [x64] - config: [Release] - link: [dll] - - include: - - link: dll - shlib: "ON" - steps: - - uses: actions/checkout@v3 - with: - repository: rhasspy/espeak-ng - path: espeak-ng - - name: configure-espeak - run: cd espeak-ng && cmake -Bbuild -DUSE_ASYNC:BOOL=OFF -DBUILD_SHARED_LIBS:BOOL=${{ matrix.shlib }} -A ${{ matrix.arch }} -DUSE_MBROLA:BOOL=OFF -DUSE_LIBSONIC:BOOL=OFF -DUSE_LIBPCAUDIO:BOOL=OFF -DUSE_KLATT:BOOL=OFF -DUSE_SPEECHPLAYER:BOOL=OFF -DEXTRA_cmn:BOOL=ON -DEXTRA_ru:BOOL=ON - - name: make-espeak - run: cd espeak-ng && cmake --build build --config ${{ matrix.config }} - - uses: actions/checkout@v3 - with: - path: piper-phonemize - - name: prepare-onnxruntime - run: curl -L -o onnxruntime.zip 'https://github.com/microsoft/onnxruntime/releases/download/v1.14.1/onnxruntime-win-x64-1.14.1.zip' && unzip onnxruntime.zip && mkdir -p piper-phonemize/lib/Windows-x86_64 && mv onnxruntime-* piper-phonemize/lib/Windows-x86_64/onnxruntime - - name: configure-piper - env: - PKG_CONFIG_PATH: espeak-ng/lib/pkgconfig - run: cd piper-phonemize && cmake -Bbuild - - name: make-piper - run: cd piper-phonemize && cmake --build build --config ${{ matrix.config }} diff --git a/.gitignore b/.gitignore index 4cb8fb4..b1ec3fc 100644 --- a/.gitignore +++ b/.gitignore @@ -16,7 +16,7 @@ htmlcov /*.so /test -espeak-ng-data/ - -/espeak-ng/ /lib/ +/download/ +/cmake/ +/_install/ diff --git a/CMakeLists.txt b/CMakeLists.txt index ad6fce5..e14b798 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,86 +10,185 @@ project( LANGUAGES CXX ) -string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra -Wl,-rpath,'$ORIGIN'") -string(APPEND CMAKE_C_FLAGS " -Wall -Wextra") +if(MSVC) + # Force compiler to use UTF-8 for IPA constants + add_compile_options("$<$:/utf-8>") + add_compile_options("$<$:/utf-8>") -# lib/Linux-x86_64 -# lib/Linux-aarch64 -set(ONNXRUNTIME_ROOTDIR ${CMAKE_CURRENT_LIST_DIR}/lib/${CMAKE_HOST_SYSTEM_NAME}-${CMAKE_HOST_SYSTEM_PROCESSOR}/onnxruntime) - -# ---- espeak-ng --- - -find_package(PkgConfig) -pkg_check_modules(ESPEAK_NG REQUIRED espeak-ng<2) - -# ---- Declare library ---- +elseif(NOT APPLE) + # Linux flags + string(APPEND CMAKE_CXX_FLAGS " -Wall -Wextra -Wl,-rpath,'$ORIGIN'") + string(APPEND CMAKE_C_FLAGS " -Wall -Wextra") +endif() add_library( piper_phonemize SHARED src/phonemize.cpp src/phoneme_ids.cpp src/tashkeel.cpp + src/shared.cpp ) set_target_properties(piper_phonemize PROPERTIES VERSION ${PROJECT_VERSION} - SOVERSION ${PROJECT_VERSION_MAJOR}) + SOVERSION ${PROJECT_VERSION_MAJOR} +) + +# ---- onnxruntime --- + +# Look for onnxruntime files in /lib +if(NOT DEFINED ONNXRUNTIME_DIR) + if(NOT DEFINED ONNXRUNTIME_VERSION) + set(ONNXRUNTIME_VERSION "1.14.1") + endif() + + if(WIN32) + # Windows x86-64 + set(ONNXRUNTIME_PREFIX "onnxruntime-win-x64-${ONNXRUNTIME_VERSION}") + set(ONNXRUNTIME_EXT "zip") + else() + if(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64) + # Linux x86-64 + set(ONNXRUNTIME_PREFIX "onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}") + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) + # Linux ARM 64-bit + set(ONNXRUNTIME_PREFIX "onnxruntime-linux-aarch64-${ONNXRUNTIME_VERSION}") + elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL armv7l) + # Linux ARM 32-bit + set(ONNXRUNTIME_PREFIX "onnxruntime-linux-arm32-${ONNXRUNTIME_VERSION}") + set(ONNXRUNTIME_URL "https://github.com/synesthesiam/prebuilt-apps/releases/download/v1.0/onnxruntime-linux-arm32-${ONNXRUNTIME_VERSION}.tgz") + else() + message(FATAL_ERROR "Unsupported architecture for onnxruntime") + endif() + + set(ONNXRUNTIME_EXT "tgz") + endif() + + if(NOT DEFINED ONNXRUNTIME_URL) + set(ONNXRUNTIME_URL "https://github.com/microsoft/onnxruntime/releases/download/v${ONNXRUNTIME_VERSION}/${ONNXRUNTIME_PREFIX}.${ONNXRUNTIME_EXT}") + endif() + + set(ONNXRUNTIME_FILENAME "${ONNXRUNTIME_PREFIX}.${ONNXRUNTIME_EXT}") + set(ONNXRUNTIME_DIR "${CMAKE_CURRENT_LIST_DIR}/lib/${ONNXRUNTIME_PREFIX}") + + if(NOT EXISTS "${ONNXRUNTIME_DIR}") + if(NOT EXISTS "download/${ONNXRUNTIME_FILENAME}") + # Download onnxruntime release + message("Downloading ${ONNXRUNTIME_URL}") + file(DOWNLOAD "${ONNXRUNTIME_URL}" "download/${ONNXRUNTIME_FILENAME}") + endif() + + # Extract .zip or .tgz to a directory like lib/onnxruntime-linux-x64-1.14.1/ + file(ARCHIVE_EXTRACT INPUT "download/${ONNXRUNTIME_FILENAME}" DESTINATION "${CMAKE_CURRENT_LIST_DIR}/lib") + endif() +endif() + +# ---- espeak-ng --- + +if(NOT DEFINED ESPEAK_NG_DIR) + set(ESPEAK_NG_DIR "${CMAKE_CURRENT_BINARY_DIR}/ei") + + include(ExternalProject) + ExternalProject_Add( + espeak_ng_external + PREFIX "${CMAKE_CURRENT_BINARY_DIR}/e" + URL "https://github.com/rhasspy/espeak-ng/archive/refs/heads/master.zip" + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${ESPEAK_NG_DIR} + CMAKE_ARGS -DUSE_ASYNC:BOOL=OFF + CMAKE_ARGS -DBUILD_SHARED_LIBS:BOOL=ON + CMAKE_ARGS -DUSE_MBROLA:BOOL=OFF + CMAKE_ARGS -DUSE_LIBSONIC:BOOL=OFF + CMAKE_ARGS -DUSE_LIBPCAUDIO:BOOL=OFF + CMAKE_ARGS -DUSE_KLATT:BOOL=OFF + CMAKE_ARGS -DUSE_SPEECHPLAYER:BOOL=OFF + CMAKE_ARGS -DEXTRA_cmn:BOOL=ON + CMAKE_ARGS -DEXTRA_ru:BOOL=ON + ) + add_dependencies(piper_phonemize espeak_ng_external) +endif() + + +# ---- Declare library ---- target_include_directories( piper_phonemize PUBLIC "$" - ${ESPEAK_NG_INCLUDE_DIRS} - ${ONNXRUNTIME_ROOTDIR}/include + ${ESPEAK_NG_DIR}/include + ${ONNXRUNTIME_DIR}/include ) target_link_directories( piper_phonemize PUBLIC - ${ESPEAK_NG_LIBRARY_DIRS} - ${ONNXRUNTIME_ROOTDIR}/lib + ${ESPEAK_NG_DIR}/lib + ${ONNXRUNTIME_DIR}/lib ) target_link_libraries( piper_phonemize - ${ESPEAK_NG_LIBRARIES} + espeak-ng onnxruntime ) -target_compile_options( - piper_phonemize PUBLIC - ${ESPEAK_NG_CFLAGS_OTHER} -) - target_compile_features(piper_phonemize PUBLIC cxx_std_17) # ---- Declare executable ---- -add_executable(piper_phonemize_exe src/main.cpp) -set_property(TARGET piper_phonemize_exe PROPERTY OUTPUT_NAME piper_phonemize) +add_executable(piper_phonemize_exe src/main.cpp src/phoneme_ids.cpp) + +if(NOT WIN32) + set_property(TARGET piper_phonemize_exe PROPERTY OUTPUT_NAME piper_phonemize) +endif() target_compile_features(piper_phonemize_exe PUBLIC cxx_std_17) target_include_directories( piper_phonemize_exe PUBLIC "$" - ${ESPEAK_NG_INCLUDE_DIRS} + ${ESPEAK_NG_DIR}/include ) target_link_directories( piper_phonemize_exe PUBLIC - ${ESPEAK_NG_LIBRARY_DIRS} + ${ESPEAK_NG_DIR}/lib ) target_link_libraries(piper_phonemize_exe PUBLIC - piper_phonemize - ${ESPEAK_NG_LIBRARIES}) + piper_phonemize + espeak-ng +) -target_compile_options( - piper_phonemize_exe PUBLIC - ${ESPEAK_NG_CFLAGS_OTHER} +# ---- Declare test ---- + +include(CTest) +enable_testing() +add_executable(test_piper_phonemize src/test.cpp src/phoneme_ids.cpp) +add_test( + NAME test_piper_phonemize + COMMAND test_piper_phonemize "${ESPEAK_NG_DIR}/share/espeak-ng-data" "${CMAKE_SOURCE_DIR}/etc/libtashkeel_model.ort" +) + +target_compile_features(test_piper_phonemize PUBLIC cxx_std_17) + +target_include_directories( + test_piper_phonemize PUBLIC + "$" + ${ESPEAK_NG_DIR}/include +) + +target_link_directories( + test_piper_phonemize PUBLIC + ${ESPEAK_NG_DIR}/lib +) + +target_link_libraries(test_piper_phonemize PUBLIC + piper_phonemize + espeak-ng ) # ---- Declare install targets ---- +include(GNUInstallDirs) + install( TARGETS piper_phonemize LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) @@ -104,3 +203,20 @@ install( install( TARGETS piper_phonemize_exe ARCHIVE DESTINATION ${CMAKE_INSTALL_BINDIR}) + +install( + FILES ${CMAKE_SOURCE_DIR}/etc/libtashkeel_model.ort + TYPE DATA) + +# Dependencies +install( + DIRECTORY ${ESPEAK_NG_DIR}/ + DESTINATION ${CMAKE_INSTALL_PREFIX}) + +install( + DIRECTORY ${ONNXRUNTIME_DIR}/include/ + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) + +install( + DIRECTORY ${ONNXRUNTIME_DIR}/lib/ + DESTINATION ${CMAKE_INSTALL_LIBDIR}) diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 9724023..0000000 --- a/Dockerfile +++ /dev/null @@ -1,125 +0,0 @@ -FROM quay.io/pypa/manylinux_2_28_x86_64 as build-amd64 -ARG ONNXRUNTIME_VERSION='1.14.1' -ENV ONNXRUNTIME_URL="https://github.com/microsoft/onnxruntime/releases/download/v${ONNXRUNTIME_VERSION}/onnxruntime-linux-x64-${ONNXRUNTIME_VERSION}.tgz" - -FROM quay.io/pypa/manylinux_2_28_aarch64 as build-arm64 -ARG ONNXRUNTIME_VERSION='1.14.1' -ENV ONNXRUNTIME_URL="https://github.com/microsoft/onnxruntime/releases/download/v${ONNXRUNTIME_VERSION}/onnxruntime-linux-aarch64-${ONNXRUNTIME_VERSION}.tgz" - -FROM debian:bullseye as build-armv7 -ARG ONNXRUNTIME_VERSION='1.14.1' -ENV ONNXRUNTIME_URL="https://github.com/synesthesiam/prebuilt-apps/releases/download/v1.0/onnxruntime-linux-arm32-${ONNXRUNTIME_VERSION}.tgz" - -ENV LANG C.UTF-8 -ENV DEBIAN_FRONTEND=noninteractive - -RUN apt-get update && \ - apt-get install --yes \ - build-essential cmake curl ca-certificates autoconf automake libtool pkg-config - -# ----------------------------------------------------------------------------- - -ARG TARGETARCH -ARG TARGETVARIANT -FROM build-${TARGETARCH}${TARGETVARIANT} as build -ARG TARGETARCH -ARG TARGETVARIANT - -ENV LANG C.UTF-8 -ENV DEBIAN_FRONTEND=noninteractive - -WORKDIR /build - -RUN mkdir -p "lib/Linux-$(uname -m)" - -# Download and extract onnxruntime -RUN curl -L "${ONNXRUNTIME_URL}" | \ - tar -C "lib/Linux-$(uname -m)" -xzvf - && \ - mv "lib/Linux-$(uname -m)"/onnxruntime-* \ - "lib/Linux-$(uname -m)/onnxruntime" - -# Build minimal version of espeak-ng -RUN curl -L "https://github.com/rhasspy/espeak-ng/archive/refs/heads/master.tar.gz" | \ - tar -xzf - - -RUN cd espeak-ng-master && \ - export CFLAGS='-D_FILE_OFFSET_BITS=64' && \ - ./autogen.sh && \ - ./configure \ - --without-pcaudiolib \ - --without-klatt \ - --without-speechplayer \ - --without-mbrola \ - --without-sonic \ - --with-extdict-cmn \ - --with-extdict-ru \ - --prefix=/usr && \ - make -j8 src/espeak-ng src/speak-ng && \ - make && \ - make install - -# For future, not currently working -# cmake -Bbuild \ -# -DCMAKE_INSTALL_PREFIX=/usr \ -# -DCMAKE_BUILD_TYPE=Release \ -# -DBUILD_SHARED_LIBS=ON \ -# -DUSE_MBROLA=OFF \ -# -DUSE_LIBSONIC=OFF \ -# -DUSE_LIBPCAUDIO=OFF \ -# -DUSE_KLATT=OFF \ -# -DUSE_SPEECHPLAYER=OFF \ -# -DUSE_ASYNC=OFF \ -# -DEXTRA_cmn=ON \ -# -DEXTRA_ru=ON && \ - -# Build libpiper_phonemize.so -COPY etc/libtashkeel_model.ort ./etc/ -COPY CMakeLists.txt Makefile ./ -COPY src/ ./src/ - -# Sanity check -RUN make test - -# Build libpiper_phonemize.so -RUN mkdir build && \ - cd build && \ - cmake -DCMAKE_BUILD_TYPE=Release .. && \ - make - -# Package libpiper_phonemize.so and piper_phonemize -RUN mkdir -p /dist/lib && \ - cd /dist && \ - cp /build/build/libpiper_phonemize.so* /build/build/piper_phonemize ./lib/ && \ - find /usr -name 'libespeak-ng*.so*' -exec cp -a {} ./lib/ \; && \ - find /usr -type d -name 'espeak-ng-data' -exec cp -R {} ./lib/ \; && \ - mkdir -p ./include && \ - cp -R /usr/include/espeak-ng ./include/ && \ - cp /build/src/phonemize.hpp /build/src/phoneme_ids.hpp /build/src/tashkeel.hpp ./include/ && \ - cp -a "/build/lib/Linux-$(uname -m)/onnxruntime/lib"/libonnxruntime*.so* ./lib/ && \ - cp -R "/build/lib/Linux-$(uname -m)/onnxruntime/include"/* ./include/ && \ - cp -R /build/etc ./ && \ - tar -czf libpiper_phonemize.tar.gz * - -# Build piper_phonemize Python package -COPY setup.py pyproject.toml MANIFEST.in README.md LICENSE.md ./ -COPY piper_phonemize/ ./piper_phonemize/ -RUN find /usr -type d -name 'espeak-ng-data' -exec cp -R {} ./piper_phonemize/ \; && \ - cp /build/etc/libtashkeel_model.ort ./piper_phonemize/ - -RUN mkdir -p wheelhouse -RUN if [ "$(which auditwheel)" ]; then \ - /opt/python/cp39-cp39/bin/pip wheel . && \ - /opt/python/cp310-cp310/bin/pip wheel . && \ - /opt/python/cp311-cp311/bin/pip wheel . && \ - cp -a "/build/lib/Linux-$(uname -m)/onnxruntime/lib"/libonnxruntime*.so* /usr/lib/ && \ - auditwheel repair *.whl; \ - fi - -# ----------------------------------------------------------------------------- - -FROM scratch -ARG TARGETARCH -ARG TARGETVARIANT - -COPY --from=build /dist/libpiper_phonemize.tar.gz ./libpiper_phonemize-${TARGETARCH}${TARGETVARIANT}.tar.gz -COPY --from=build /build/wheelhouse/ ./ diff --git a/Makefile b/Makefile index cb4f784..79e96c3 100644 --- a/Makefile +++ b/Makefile @@ -1,33 +1,10 @@ -.PHONY: release test python python-test docker +.PHONY: clean -LIB_DIR := lib/Linux-$(shell uname -m) -DOCKER_PLATFORM ?= linux/amd64,linux/arm64,linux/arm/v7 -VENV ?= .venv +all: + cmake -Bbuild -DCMAKE_INSTALL_PREFIX=install + cmake --build build --config Release + cd build && ctest --config Release + cmake --install install -release: - mkdir -p build - cd build && PKG_CONFIG_PATH='../espeak-ng/build/lib/pkgconfig' cmake .. -DCMAKE_BUILD_TYPE=Release && make - cp -a espeak-ng/build/lib/libespeak*.so* build/ - cp -R espeak-ng/build/share/espeak-ng-data build/ - cp -a $(LIB_DIR)/onnxruntime/lib/libonnxruntime*.so* build/ - cp etc/libtashkeel_model.ort build/ - -test: - g++ -Wall -o test -Iespeak-ng/build/include -Lespeak-ng/build/lib -I$(LIB_DIR)/onnxruntime/include -L$(LIB_DIR)/onnxruntime/lib -Isrc -std=c++17 src/test.cpp src/phonemize.cpp src/phoneme_ids.cpp src/tashkeel.cpp -lespeak-ng -lonnxruntime - LD_LIBRARY_PATH="espeak-ng/build/lib:$(LIB_DIR)/onnxruntime/lib:${LD_LIBRARY_PATH}" ./test 'espeak-ng/build/share/espeak-ng-data' - -python: - cp -R espeak-ng/build/share/espeak-ng-data piper_phonemize/ - cp etc/libtashkeel_model.ort piper_phonemize/ - LD_LIBRARY_PATH="espeak-ng/build/lib:${LD_LIBRARY_PATH}" "$(VENV)/bin/pip3" install -e . - -python-test: - LD_LIBRARY_PATH="espeak-ng/build/lib:$(LIB_DIR)/onnxruntime/lib:${LD_LIBRARY_PATH}" "$(VENV)/bin/python3" src/python_test.py - -python-wheel: - cp -R espeak-ng/build/share/espeak-ng-data piper_phonemize/ - cp etc/libtashkeel_model.ort piper_phonemize/ - LD_LIBRARY_PATH="espeak-ng/build/lib:${LD_LIBRARY_PATH}" "$(VENV)/bin/python3" setup.py bdist_wheel - -docker: - docker buildx build . --platform "$(DOCKER_PLATFORM)" --output 'type=local,dest=dist' +clean: + rm -rf build install diff --git a/src/main.cpp b/src/main.cpp index 46799f7..be1e233 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -7,6 +7,12 @@ #include +#ifdef _MSC_VER +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX +#include +#endif + #include "json.hpp" #include "phoneme_ids.hpp" #include "phonemize.hpp" @@ -18,7 +24,7 @@ using json = nlohmann::json; enum PhonemeType { eSpeakPhonemes, TextPhonemes }; struct RunConfig { - std::string language; + std::string language = ""; PhonemeType phonemeType = eSpeakPhonemes; std::optional eSpeakDataPath; std::optional tashkeelModelPath; @@ -40,6 +46,11 @@ int main(int argc, char *argv[]) { RunConfig runConfig; parseArgs(argc, argv, runConfig); +#ifdef _WIN32 + // Required on Windows to show IPA symbols + SetConsoleOutputCP(CP_UTF8); +#endif + piper::eSpeakPhonemeConfig eSpeakConfig; piper::CodepointsPhonemeConfig codepointsConfig; piper::PhonemeIdConfig idConfig; diff --git a/src/phoneme_ids.cpp b/src/phoneme_ids.cpp index 6372dc0..b6a0d40 100644 --- a/src/phoneme_ids.cpp +++ b/src/phoneme_ids.cpp @@ -6,202 +6,10 @@ namespace piper { -const size_t MAX_PHONEMES = 256; -PhonemeIdMap DEFAULT_PHONEME_ID_MAP = { - {U'_', {0}}, - {U'^', {1}}, - {U'$', {2}}, - {U' ', {3}}, - {U'!', {4}}, - {U'\'', {5}}, - {U'(', {6}}, - {U')', {7}}, - {U',', {8}}, - {U'-', {9}}, - {U'.', {10}}, - {U':', {11}}, - {U';', {12}}, - {U'?', {13}}, - {U'a', {14}}, - {U'b', {15}}, - {U'c', {16}}, - {U'd', {17}}, - {U'e', {18}}, - {U'f', {19}}, - {U'h', {20}}, - {U'i', {21}}, - {U'j', {22}}, - {U'k', {23}}, - {U'l', {24}}, - {U'm', {25}}, - {U'n', {26}}, - {U'o', {27}}, - {U'p', {28}}, - {U'q', {29}}, - {U'r', {30}}, - {U's', {31}}, - {U't', {32}}, - {U'u', {33}}, - {U'v', {34}}, - {U'w', {35}}, - {U'x', {36}}, - {U'y', {37}}, - {U'z', {38}}, - {U'æ', {39}}, - {U'ç', {40}}, - {U'ð', {41}}, - {U'ø', {42}}, - {U'ħ', {43}}, - {U'ŋ', {44}}, - {U'œ', {45}}, - {U'ǀ', {46}}, - {U'ǁ', {47}}, - {U'ǂ', {48}}, - {U'ǃ', {49}}, - {U'ɐ', {50}}, - {U'ɑ', {51}}, - {U'ɒ', {52}}, - {U'ɓ', {53}}, - {U'ɔ', {54}}, - {U'ɕ', {55}}, - {U'ɖ', {56}}, - {U'ɗ', {57}}, - {U'ɘ', {58}}, - {U'ə', {59}}, - {U'ɚ', {60}}, - {U'ɛ', {61}}, - {U'ɜ', {62}}, - {U'ɞ', {63}}, - {U'ɟ', {64}}, - {U'ɠ', {65}}, - {U'ɡ', {66}}, - {U'ɢ', {67}}, - {U'ɣ', {68}}, - {U'ɤ', {69}}, - {U'ɥ', {70}}, - {U'ɦ', {71}}, - {U'ɧ', {72}}, - {U'ɨ', {73}}, - {U'ɪ', {74}}, - {U'ɫ', {75}}, - {U'ɬ', {76}}, - {U'ɭ', {77}}, - {U'ɮ', {78}}, - {U'ɯ', {79}}, - {U'ɰ', {80}}, - {U'ɱ', {81}}, - {U'ɲ', {82}}, - {U'ɳ', {83}}, - {U'ɴ', {84}}, - {U'ɵ', {85}}, - {U'ɶ', {86}}, - {U'ɸ', {87}}, - {U'ɹ', {88}}, - {U'ɺ', {89}}, - {U'ɻ', {90}}, - {U'ɽ', {91}}, - {U'ɾ', {92}}, - {U'ʀ', {93}}, - {U'ʁ', {94}}, - {U'ʂ', {95}}, - {U'ʃ', {96}}, - {U'ʄ', {97}}, - {U'ʈ', {98}}, - {U'ʉ', {99}}, - {U'ʊ', {100}}, - {U'ʋ', {101}}, - {U'ʌ', {102}}, - {U'ʍ', {103}}, - {U'ʎ', {104}}, - {U'ʏ', {105}}, - {U'ʐ', {106}}, - {U'ʑ', {107}}, - {U'ʒ', {108}}, - {U'ʔ', {109}}, - {U'ʕ', {110}}, - {U'ʘ', {111}}, - {U'ʙ', {112}}, - {U'ʛ', {113}}, - {U'ʜ', {114}}, - {U'ʝ', {115}}, - {U'ʟ', {116}}, - {U'ʡ', {117}}, - {U'ʢ', {118}}, - {U'ʲ', {119}}, - {U'ˈ', {120}}, - {U'ˌ', {121}}, - {U'ː', {122}}, - {U'ˑ', {123}}, - {U'˞', {124}}, - {U'β', {125}}, - {U'θ', {126}}, - {U'χ', {127}}, - {U'ᵻ', {128}}, - {U'ⱱ', {129}}, - - // tones - {U'0', {130}}, - {U'1', {131}}, - {U'2', {132}}, - {U'3', {133}}, - {U'4', {134}}, - {U'5', {135}}, - {U'6', {136}}, - {U'7', {137}}, - {U'8', {138}}, - {U'9', {139}}, - {U'\u0327', {140}}, // combining cedilla - {U'\u0303', {141}}, // combining tilde - {U'\u032a', {142}}, // combining bridge below - {U'\u032f', {143}}, // combining inverted breve below - {U'\u0329', {144}}, // combining vertical line below - {U'ʰ', {145}}, - {U'ˤ', {146}}, - {U'ε', {147}}, - {U'↓', {148}}, - {U'#', {149}}, // Icelandic - {U'\"', {150}}, // Russian - - {U'↑', {151}}, - - // Basque - {U'\u033a', {152}}, - {U'\u033b', {153}}, - - // Luxembourgish - {U'g', {154}}, - {U'ʦ', {155}}, - {U'X', {156}}, - - // Czech - {U'\u031d', {157}}, - {U'\u030a', {158}}, -}; - -// language -> phoneme -> [id, ...] -std::map DEFAULT_ALPHABET = { - // Ukrainian - {"uk", - { - {U'_', {0}}, {U'^', {1}}, {U'$', {2}}, {U' ', {3}}, - {U'!', {4}}, {U'\'', {5}}, {U',', {6}}, {U'-', {7}}, - {U'.', {8}}, {U':', {9}}, {U';', {10}}, {U'?', {11}}, - {U'а', {12}}, {U'б', {13}}, {U'в', {14}}, {U'г', {15}}, - {U'ґ', {16}}, {U'д', {17}}, {U'е', {18}}, {U'є', {19}}, - {U'ж', {20}}, {U'з', {21}}, {U'и', {22}}, {U'і', {23}}, - {U'ї', {24}}, {U'й', {25}}, {U'к', {26}}, {U'л', {27}}, - {U'м', {28}}, {U'н', {29}}, {U'о', {30}}, {U'п', {31}}, - {U'р', {32}}, {U'с', {33}}, {U'т', {34}}, {U'у', {35}}, - {U'ф', {36}}, {U'х', {37}}, {U'ц', {38}}, {U'ч', {39}}, - {U'ш', {40}}, {U'щ', {41}}, {U'ь', {42}}, {U'ю', {43}}, - {U'я', {44}}, {U'\u0301', {45}}, {U'\u0306', {46}}, {U'\u0308', {47}}, - {U'—', {48}}, - }}}; - -void phonemes_to_ids(const std::vector &phonemes, - PhonemeIdConfig &config, - std::vector &phonemeIds, - std::map &missingPhonemes) { +PIPERPHONEMIZE_EXPORT void +phonemes_to_ids(const std::vector &phonemes, PhonemeIdConfig &config, + std::vector &phonemeIds, + std::map &missingPhonemes) { auto phonemeIdMap = std::make_shared(DEFAULT_PHONEME_ID_MAP); if (config.phonemeIdMap) { diff --git a/src/phoneme_ids.hpp b/src/phoneme_ids.hpp index 2ac2dc8..8978df0 100644 --- a/src/phoneme_ids.hpp +++ b/src/phoneme_ids.hpp @@ -6,6 +6,7 @@ #include #include "phonemize.hpp" +#include "shared.hpp" namespace piper { @@ -31,14 +32,202 @@ struct PhonemeIdConfig { std::shared_ptr phonemeIdMap; }; -extern const size_t MAX_PHONEMES; -extern PhonemeIdMap DEFAULT_PHONEME_ID_MAP; -extern std::map DEFAULT_ALPHABET; +static const size_t MAX_PHONEMES = 256; +static PhonemeIdMap DEFAULT_PHONEME_ID_MAP = { + {U'_', {0}}, + {U'^', {1}}, + {U'$', {2}}, + {U' ', {3}}, + {U'!', {4}}, + {U'\'', {5}}, + {U'(', {6}}, + {U')', {7}}, + {U',', {8}}, + {U'-', {9}}, + {U'.', {10}}, + {U':', {11}}, + {U';', {12}}, + {U'?', {13}}, + {U'a', {14}}, + {U'b', {15}}, + {U'c', {16}}, + {U'd', {17}}, + {U'e', {18}}, + {U'f', {19}}, + {U'h', {20}}, + {U'i', {21}}, + {U'j', {22}}, + {U'k', {23}}, + {U'l', {24}}, + {U'm', {25}}, + {U'n', {26}}, + {U'o', {27}}, + {U'p', {28}}, + {U'q', {29}}, + {U'r', {30}}, + {U's', {31}}, + {U't', {32}}, + {U'u', {33}}, + {U'v', {34}}, + {U'w', {35}}, + {U'x', {36}}, + {U'y', {37}}, + {U'z', {38}}, + {U'æ', {39}}, + {U'ç', {40}}, + {U'ð', {41}}, + {U'ø', {42}}, + {U'ħ', {43}}, + {U'ŋ', {44}}, + {U'œ', {45}}, + {U'ǀ', {46}}, + {U'ǁ', {47}}, + {U'ǂ', {48}}, + {U'ǃ', {49}}, + {U'ɐ', {50}}, + {U'ɑ', {51}}, + {U'ɒ', {52}}, + {U'ɓ', {53}}, + {U'ɔ', {54}}, + {U'ɕ', {55}}, + {U'ɖ', {56}}, + {U'ɗ', {57}}, + {U'ɘ', {58}}, + {U'ə', {59}}, + {U'ɚ', {60}}, + {U'ɛ', {61}}, + {U'ɜ', {62}}, + {U'ɞ', {63}}, + {U'ɟ', {64}}, + {U'ɠ', {65}}, + {U'ɡ', {66}}, + {U'ɢ', {67}}, + {U'ɣ', {68}}, + {U'ɤ', {69}}, + {U'ɥ', {70}}, + {U'ɦ', {71}}, + {U'ɧ', {72}}, + {U'ɨ', {73}}, + {U'ɪ', {74}}, + {U'ɫ', {75}}, + {U'ɬ', {76}}, + {U'ɭ', {77}}, + {U'ɮ', {78}}, + {U'ɯ', {79}}, + {U'ɰ', {80}}, + {U'ɱ', {81}}, + {U'ɲ', {82}}, + {U'ɳ', {83}}, + {U'ɴ', {84}}, + {U'ɵ', {85}}, + {U'ɶ', {86}}, + {U'ɸ', {87}}, + {U'ɹ', {88}}, + {U'ɺ', {89}}, + {U'ɻ', {90}}, + {U'ɽ', {91}}, + {U'ɾ', {92}}, + {U'ʀ', {93}}, + {U'ʁ', {94}}, + {U'ʂ', {95}}, + {U'ʃ', {96}}, + {U'ʄ', {97}}, + {U'ʈ', {98}}, + {U'ʉ', {99}}, + {U'ʊ', {100}}, + {U'ʋ', {101}}, + {U'ʌ', {102}}, + {U'ʍ', {103}}, + {U'ʎ', {104}}, + {U'ʏ', {105}}, + {U'ʐ', {106}}, + {U'ʑ', {107}}, + {U'ʒ', {108}}, + {U'ʔ', {109}}, + {U'ʕ', {110}}, + {U'ʘ', {111}}, + {U'ʙ', {112}}, + {U'ʛ', {113}}, + {U'ʜ', {114}}, + {U'ʝ', {115}}, + {U'ʟ', {116}}, + {U'ʡ', {117}}, + {U'ʢ', {118}}, + {U'ʲ', {119}}, + {U'ˈ', {120}}, + {U'ˌ', {121}}, + {U'ː', {122}}, + {U'ˑ', {123}}, + {U'˞', {124}}, + {U'β', {125}}, + {U'θ', {126}}, + {U'χ', {127}}, + {U'ᵻ', {128}}, + {U'ⱱ', {129}}, -void phonemes_to_ids(const std::vector &phonemes, - PhonemeIdConfig &config, - std::vector &phonemeIds, - std::map &missingPhonemes); + // tones + {U'0', {130}}, + {U'1', {131}}, + {U'2', {132}}, + {U'3', {133}}, + {U'4', {134}}, + {U'5', {135}}, + {U'6', {136}}, + {U'7', {137}}, + {U'8', {138}}, + {U'9', {139}}, + {U'\u0327', {140}}, // combining cedilla + {U'\u0303', {141}}, // combining tilde + {U'\u032a', {142}}, // combining bridge below + {U'\u032f', {143}}, // combining inverted breve below + {U'\u0329', {144}}, // combining vertical line below + {U'ʰ', {145}}, + {U'ˤ', {146}}, + {U'ε', {147}}, + {U'↓', {148}}, + {U'#', {149}}, // Icelandic + {U'\"', {150}}, // Russian + + {U'↑', {151}}, + + // Basque + {U'\u033a', {152}}, + {U'\u033b', {153}}, + + // Luxembourgish + {U'g', {154}}, + {U'ʦ', {155}}, + {U'X', {156}}, + + // Czech + {U'\u031d', {157}}, + {U'\u030a', {158}}, +}; + +// language -> phoneme -> [id, ...] +static std::map DEFAULT_ALPHABET = { + // Ukrainian + {"uk", + { + {U'_', {0}}, {U'^', {1}}, {U'$', {2}}, {U' ', {3}}, + {U'!', {4}}, {U'\'', {5}}, {U',', {6}}, {U'-', {7}}, + {U'.', {8}}, {U':', {9}}, {U';', {10}}, {U'?', {11}}, + {U'а', {12}}, {U'б', {13}}, {U'в', {14}}, {U'г', {15}}, + {U'ґ', {16}}, {U'д', {17}}, {U'е', {18}}, {U'є', {19}}, + {U'ж', {20}}, {U'з', {21}}, {U'и', {22}}, {U'і', {23}}, + {U'ї', {24}}, {U'й', {25}}, {U'к', {26}}, {U'л', {27}}, + {U'м', {28}}, {U'н', {29}}, {U'о', {30}}, {U'п', {31}}, + {U'р', {32}}, {U'с', {33}}, {U'т', {34}}, {U'у', {35}}, + {U'ф', {36}}, {U'х', {37}}, {U'ц', {38}}, {U'ч', {39}}, + {U'ш', {40}}, {U'щ', {41}}, {U'ь', {42}}, {U'ю', {43}}, + {U'я', {44}}, {U'\u0301', {45}}, {U'\u0306', {46}}, {U'\u0308', {47}}, + {U'—', {48}}, + }}}; + +PIPERPHONEMIZE_EXPORT void +phonemes_to_ids(const std::vector &phonemes, PhonemeIdConfig &config, + std::vector &phonemeIds, + std::map &missingPhonemes); } // namespace piper diff --git a/src/phonemize.cpp b/src/phonemize.cpp index 28b01e4..c920a95 100644 --- a/src/phonemize.cpp +++ b/src/phonemize.cpp @@ -14,8 +14,9 @@ namespace piper { std::map DEFAULT_PHONEME_MAP = { {"pt-br", {{U'c', {U'k'}}}}}; -void phonemize_eSpeak(std::string text, eSpeakPhonemeConfig &config, - std::vector> &phonemes) { +PIPERPHONEMIZE_EXPORT void +phonemize_eSpeak(std::string text, eSpeakPhonemeConfig &config, + std::vector> &phonemes) { auto voice = config.voice; int result = espeak_SetVoiceByName(voice.c_str()); @@ -134,8 +135,9 @@ void phonemize_eSpeak(std::string text, eSpeakPhonemeConfig &config, // ---------------------------------------------------------------------------- -void phonemize_codepoints(std::string text, CodepointsPhonemeConfig &config, - std::vector> &phonemes) { +PIPERPHONEMIZE_EXPORT void +phonemize_codepoints(std::string text, CodepointsPhonemeConfig &config, + std::vector> &phonemes) { if (config.casing == CASING_LOWER) { text = una::cases::to_lowercase_utf8(text); diff --git a/src/phonemize.hpp b/src/phonemize.hpp index bb8f4d9..5e4238e 100644 --- a/src/phonemize.hpp +++ b/src/phonemize.hpp @@ -6,6 +6,8 @@ #include #include +#include "shared.hpp" + #define CLAUSE_INTONATION_FULL_STOP 0x00000000 #define CLAUSE_INTONATION_COMMA 0x00001000 #define CLAUSE_INTONATION_QUESTION 0x00002000 @@ -48,8 +50,9 @@ struct eSpeakPhonemeConfig { // Returns phonemes for each sentence as a separate std::vector. // // Assumes espeak_Initialize has already been called. -void phonemize_eSpeak(std::string text, eSpeakPhonemeConfig &config, - std::vector> &phonemes); +PIPERPHONEMIZE_EXPORT void +phonemize_eSpeak(std::string text, eSpeakPhonemeConfig &config, + std::vector> &phonemes); enum TextCasing { CASING_IGNORE = 0, @@ -68,8 +71,9 @@ struct CodepointsPhonemeConfig { // Returns a single std::vector of "phonemes". // // Does not detect sentence boundaries. -void phonemize_codepoints(std::string text, CodepointsPhonemeConfig &config, - std::vector> &phonemes); +PIPERPHONEMIZE_EXPORT void +phonemize_codepoints(std::string text, CodepointsPhonemeConfig &config, + std::vector> &phonemes); } // namespace piper diff --git a/src/shared.cpp b/src/shared.cpp new file mode 100644 index 0000000..41d7c6c --- /dev/null +++ b/src/shared.cpp @@ -0,0 +1 @@ +#include "shared.hpp" diff --git a/src/shared.hpp b/src/shared.hpp new file mode 100644 index 0000000..c7264f0 --- /dev/null +++ b/src/shared.hpp @@ -0,0 +1,11 @@ +#ifndef SHARED_H_ +#define SHARED_H_ + +#ifdef _WIN32 +#define PIPERPHONEMIZE_EXPORT __declspec(dllexport) +#else +#define PIPERPHONEMIZE_EXPORT +#endif + + +#endif // SHARED_H_ diff --git a/src/tashkeel.cpp b/src/tashkeel.cpp index 2db274e..23683b7 100644 --- a/src/tashkeel.cpp +++ b/src/tashkeel.cpp @@ -78,15 +78,23 @@ std::set HARAKAT_CHARS{ std::set INVALID_HARAKA_IDS{UNK_ID, 8}; -void tashkeel_load(std::string modelPath, State &state) { +PIPERPHONEMIZE_EXPORT void tashkeel_load(std::string modelPath, State &state) { state.env = Ort::Env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, instanceName.c_str()); state.env.DisableTelemetryEvents(); state.options.SetExecutionMode(ExecutionMode::ORT_PARALLEL); - state.onnx = Ort::Session(state.env, modelPath.c_str(), state.options); + +#ifdef _WIN32 + auto modelPathW = std::wstring(modelPath.begin(), modelPath.end()); + auto modelPathStr = modelPathW.c_str(); +#else + auto modelPathStr = modelPath.c_str(); +#endif + + state.onnx = Ort::Session(state.env, modelPathStr, state.options); } -std::string tashkeel_run(std::string text, State &state) { +PIPERPHONEMIZE_EXPORT std::string tashkeel_run(std::string text, State &state) { auto memoryInfo = Ort::MemoryInfo::CreateCpu( OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault); diff --git a/src/tashkeel.hpp b/src/tashkeel.hpp index 4f00e57..1611779 100644 --- a/src/tashkeel.hpp +++ b/src/tashkeel.hpp @@ -7,6 +7,8 @@ #include +#include "shared.hpp" + // https://github.com/mush42/libtashkeel namespace tashkeel { @@ -29,8 +31,8 @@ struct State { State() : onnx(nullptr){}; }; -void tashkeel_load(std::string modelPath, State &state); -std::string tashkeel_run(std::string text, State &state); +PIPERPHONEMIZE_EXPORT void tashkeel_load(std::string modelPath, State &state); +PIPERPHONEMIZE_EXPORT std::string tashkeel_run(std::string text, State &state); } // namespace tashkeel diff --git a/src/test.cpp b/src/test.cpp index f580b60..b7619a1 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -51,6 +51,11 @@ int main(int argc, char *argv[]) { return 1; } + if (argc < 3) { + std::cerr << "Need tashkeel model path" << std::endl; + return 1; + } + int result = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 0, argv[1], 0); if (result < 0) { std::cerr << "Failed to initialize eSpeak" << std::endl; @@ -163,7 +168,7 @@ int main(int argc, char *argv[]) { // Test Arabic with libtashkeel (https://github.com/mush42/libtashkeel) tashkeel::State tashkeelState; - tashkeel::tashkeel_load("etc/libtashkeel_model.ort", tashkeelState); + tashkeel::tashkeel_load(argv[2], tashkeelState); std::string expectedText = "مَرْحَبًا"; std::string actualText = tashkeel::tashkeel_run("مرحبا", tashkeelState);