Skip to content

Commit

Permalink
Merge branch 'master' into HEAD
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Aug 27, 2023
2 parents 793498e + 7e54df4 commit 52222bd
Show file tree
Hide file tree
Showing 9 changed files with 201 additions and 134 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Or you can even run it straight in the browser: [talk.wasm](examples/talk.wasm)
- Various other examples are available in the [examples](examples) folder

The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD
instrisics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
intrinsics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since
the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.

## Quick start
Expand Down
36 changes: 34 additions & 2 deletions examples/main/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ struct whisper_params {
float logprob_thold = -1.00f;

bool speed_up = false;
bool debug_mode = false;
bool translate = false;
bool detect_language = false;
bool diarize = false;
Expand All @@ -87,6 +88,7 @@ struct whisper_params {
bool print_colors = false;
bool print_progress = false;
bool no_timestamps = false;
bool log_score = false;

std::string language = "en";
std::string prompt;
Expand Down Expand Up @@ -134,7 +136,8 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
else if (arg == "-wt" || arg == "--word-thold") { params.word_thold = std::stof(argv[++i]); }
else if (arg == "-et" || arg == "--entropy-thold") { params.entropy_thold = std::stof(argv[++i]); }
else if (arg == "-lpt" || arg == "--logprob-thold") { params.logprob_thold = std::stof(argv[++i]); }
else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
// else if (arg == "-su" || arg == "--speed-up") { params.speed_up = true; }
else if (arg == "-debug"|| arg == "--debug-mode") { params.debug_mode = true; }
else if (arg == "-tr" || arg == "--translate") { params.translate = true; }
else if (arg == "-di" || arg == "--diarize") { params.diarize = true; }
else if (arg == "-tdrz" || arg == "--tinydiarize") { params.tinydiarize = true; }
Expand All @@ -159,6 +162,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
else if (arg == "-f" || arg == "--file") { params.fname_inp.emplace_back(argv[++i]); }
else if (arg == "-oved" || arg == "--ov-e-device") { params.openvino_encode_device = argv[++i]; }
else if (arg == "-ls" || arg == "--log-score") { params.log_score = true; }
else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
whisper_print_usage(argc, argv, params);
Expand Down Expand Up @@ -188,7 +192,8 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -wt N, --word-thold N [%-7.2f] word timestamp probability threshold\n", params.word_thold);
fprintf(stderr, " -et N, --entropy-thold N [%-7.2f] entropy threshold for decoder fail\n", params.entropy_thold);
fprintf(stderr, " -lpt N, --logprob-thold N [%-7.2f] log probability threshold for decoder fail\n", params.logprob_thold);
fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
// fprintf(stderr, " -su, --speed-up [%-7s] speed up audio by x2 (reduced accuracy)\n", params.speed_up ? "true" : "false");
fprintf(stderr, " -debug, --debug-mode [%-7s] enable debug mode (eg. dump log_mel)\n", params.debug_mode ? "true" : "false");
fprintf(stderr, " -tr, --translate [%-7s] translate from source language to english\n", params.translate ? "true" : "false");
fprintf(stderr, " -di, --diarize [%-7s] stereo audio diarization\n", params.diarize ? "true" : "false");
fprintf(stderr, " -tdrz, --tinydiarize [%-7s] enable tinydiarize (requires a tdrz model)\n", params.tinydiarize ? "true" : "false");
Expand All @@ -212,6 +217,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
fprintf(stderr, " -f FNAME, --file FNAME [%-7s] input WAV file path\n", "");
fprintf(stderr, " -oved D, --ov-e-device DNAME [%-7s] the OpenVINO device used for encode inference\n", params.openvino_encode_device.c_str());
fprintf(stderr, " -ls, --log-score [%-7s] log best decoder scores of tokens\n", params.log_score?"true":"false");
fprintf(stderr, "\n");
}

Expand Down Expand Up @@ -486,6 +492,25 @@ bool output_csv(struct whisper_context * ctx, const char * fname, const whisper_
return true;
}

bool output_score(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
std::ofstream fout(fname);
fprintf(stderr, "%s: saving output to '%s'\n", __func__, fname);

const int n_segments = whisper_full_n_segments(ctx);
// fprintf(stderr,"segments: %d\n",n_segments);
for (int i = 0; i < n_segments; ++i) {
const int n_tokens = whisper_full_n_tokens(ctx, i);
// fprintf(stderr,"tokens: %d\n",n_tokens);
for (int j = 0; j < n_tokens; j++) {
auto token = whisper_full_get_token_text(ctx, i, j);
auto probability = whisper_full_get_token_p(ctx, i, j);
fout << token << '\t' << probability << std::endl;
// fprintf(stderr,"token: %s %f\n",token,probability);
}
}
return true;
}

bool output_json(struct whisper_context * ctx, const char * fname, const whisper_params & params, std::vector<std::vector<float>> pcmf32s) {
std::ofstream fout(fname);
int indent = 0;
Expand Down Expand Up @@ -893,6 +918,7 @@ int main(int argc, char ** argv) {
wparams.split_on_word = params.split_on_word;

wparams.speed_up = params.speed_up;
wparams.debug_mode = params.debug_mode;

wparams.tdrz_enable = params.tinydiarize; // [TDRZ]

Expand Down Expand Up @@ -982,6 +1008,12 @@ int main(int argc, char ** argv) {
const auto fname_lrc = fname_out + ".lrc";
output_lrc(ctx, fname_lrc.c_str(), params, pcmf32s);
}

// output to score file
if (params.log_score) {
const auto fname_score = fname_out + ".score.txt";
output_score(ctx, fname_score.c_str(), params, pcmf32s);
}
}
}

Expand Down
7 changes: 5 additions & 2 deletions examples/whisper.android/app/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ android {
vectorDrawables {
useSupportLibrary true
}
ndk {
abiFilters 'arm64-v8a', 'armeabi-v7a'
}
}

buildTypes {
Expand All @@ -42,8 +45,8 @@ android {
}
ndkVersion "25.1.8937393"
externalNativeBuild {
ndkBuild {
path 'src/main/jni/whisper/Android.mk'
cmake {
path = file("src/main/jni/whisper/CMakeLists.txt")
}
}
packagingOptions {
Expand Down
26 changes: 0 additions & 26 deletions examples/whisper.android/app/src/main/jni/whisper/Android.mk

This file was deleted.

This file was deleted.

44 changes: 44 additions & 0 deletions examples/whisper.android/app/src/main/jni/whisper/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
cmake_minimum_required(VERSION 3.10)

project(whisper.cpp)

set(CMAKE_CXX_STANDARD 11)
set(WHISPER_LIB_DIR ${CMAKE_SOURCE_DIR}/../../../../../../../)

set(
SOURCE_FILES
${WHISPER_LIB_DIR}/ggml.c
${WHISPER_LIB_DIR}/whisper.cpp
${CMAKE_SOURCE_DIR}/jni.c
)

if (${ANDROID_ABI} STREQUAL "arm64-v8a")
set(WHISPER_LIBRARY_NAME whisper_v8fp16_va)
elseif (${ANDROID_ABI} STREQUAL "armeabi-v7a")
set(WHISPER_LIBRARY_NAME whisper_vfpv4)
endif ()

add_library(
${WHISPER_LIBRARY_NAME}
SHARED
${SOURCE_FILES}
)

if (${ANDROID_ABI} STREQUAL "arm64-v8a")
target_compile_options(${WHISPER_LIBRARY_NAME} PRIVATE -march=armv8.2-a+fp16)
elseif (${ANDROID_ABI} STREQUAL "armeabi-v7a")
target_compile_options(${WHISPER_LIBRARY_NAME} PRIVATE -mfpu=neon-vfpv4)
endif ()


target_link_libraries(${WHISPER_LIBRARY_NAME} log android)
include_directories(${WHISPER_LIB_DIR})

if (NOT ${CMAKE_BUILD_TYPE} STREQUAL "Debug")
target_compile_options(${WHISPER_LIBRARY_NAME} PRIVATE -O3)
target_compile_options(${WHISPER_LIBRARY_NAME} PRIVATE -fvisibility=hidden -fvisibility-inlines-hidden)
target_compile_options(${WHISPER_LIBRARY_NAME} PRIVATE -ffunction-sections -fdata-sections)
target_link_options(${WHISPER_LIBRARY_NAME} PRIVATE -Wl,--gc-sections)
target_link_options(${WHISPER_LIBRARY_NAME} PRIVATE -Wl,--exclude-libs,ALL)
target_link_options(${WHISPER_LIBRARY_NAME} PRIVATE -flto)
endif ()
18 changes: 0 additions & 18 deletions examples/whisper.android/app/src/main/jni/whisper/Whisper.mk

This file was deleted.

Loading

0 comments on commit 52222bd

Please sign in to comment.