init
This commit is contained in:
6
whisper.cpp-1.5.2/examples/bench/CMakeLists.txt
Normal file
6
whisper.cpp-1.5.2/examples/bench/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
||||
set(TARGET bench)
|
||||
add_executable(${TARGET} bench.cpp)
|
||||
|
||||
include(DefaultTargetOptions)
|
||||
|
||||
target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
|
||||
54
whisper.cpp-1.5.2/examples/bench/README.md
Normal file
54
whisper.cpp-1.5.2/examples/bench/README.md
Normal file
@@ -0,0 +1,54 @@
|
||||
# bench
|
||||
|
||||
A very basic tool for benchmarking the inference performance on your device. The tool simply runs the Encoder part of
|
||||
the transformer on some random audio data and records the execution time. This way we can have an objective comparison
|
||||
of the performance of the model for various setups.
|
||||
|
||||
Benchmark results are tracked in the following Github issue: https://github.com/ggerganov/whisper.cpp/issues/89
|
||||
|
||||
```bash
|
||||
# build the bench tool
|
||||
$ make bench
|
||||
|
||||
# run it on the small.en model using 4 threads
|
||||
$ ./bench -m ./models/ggml-small.en.bin -t 4
|
||||
|
||||
whisper_model_load: loading model from './models/ggml-small.en.bin'
|
||||
whisper_model_load: n_vocab = 51864
|
||||
whisper_model_load: n_audio_ctx = 1500
|
||||
whisper_model_load: n_audio_state = 768
|
||||
whisper_model_load: n_audio_head = 12
|
||||
whisper_model_load: n_audio_layer = 12
|
||||
whisper_model_load: n_text_ctx = 448
|
||||
whisper_model_load: n_text_state = 768
|
||||
whisper_model_load: n_text_head = 12
|
||||
whisper_model_load: n_text_layer = 12
|
||||
whisper_model_load: n_mels = 80
|
||||
whisper_model_load: f16 = 1
|
||||
whisper_model_load: type = 3
|
||||
whisper_model_load: mem_required = 1048.00 MB
|
||||
whisper_model_load: adding 1607 extra tokens
|
||||
whisper_model_load: ggml ctx size = 533.05 MB
|
||||
whisper_model_load: memory size = 68.48 MB
|
||||
whisper_model_load: model size = 464.44 MB
|
||||
|
||||
whisper_print_timings: load time = 240.82 ms
|
||||
whisper_print_timings: mel time = 0.00 ms
|
||||
whisper_print_timings: sample time = 0.00 ms
|
||||
whisper_print_timings: encode time = 1062.21 ms / 88.52 ms per layer
|
||||
whisper_print_timings: decode time = 0.00 ms / 0.00 ms per layer
|
||||
whisper_print_timings: total time = 1303.04 ms
|
||||
|
||||
system_info: n_threads = 4 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
|
||||
|
||||
If you wish, you can submit these results here:
|
||||
|
||||
https://github.com/ggerganov/whisper.cpp/issues/89
|
||||
|
||||
Please include the following information:
|
||||
|
||||
- CPU model
|
||||
- Operating system
|
||||
- Compiler
|
||||
|
||||
```
|
||||
170
whisper.cpp-1.5.2/examples/bench/bench.cpp
Normal file
170
whisper.cpp-1.5.2/examples/bench/bench.cpp
Normal file
@@ -0,0 +1,170 @@
|
||||
#include "whisper.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
|
||||
// command-line parameters
|
||||
struct whisper_params {
|
||||
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
||||
int32_t what = 0; // what to benchmark: 0 - whisper ecoder, 1 - memcpy, 2 - ggml_mul_mat
|
||||
|
||||
std::string model = "models/ggml-base.en.bin";
|
||||
|
||||
bool use_gpu = true;
|
||||
};
|
||||
|
||||
void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
|
||||
|
||||
bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
|
||||
for (int i = 1; i < argc; i++) {
|
||||
std::string arg = argv[i];
|
||||
|
||||
if (arg == "-h" || arg == "--help") {
|
||||
whisper_print_usage(argc, argv, params);
|
||||
exit(0);
|
||||
}
|
||||
else if (arg == "-t" || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
|
||||
else if (arg == "-m" || arg == "--model") { params.model = argv[++i]; }
|
||||
else if (arg == "-w" || arg == "--what") { params.what = atoi(argv[++i]); }
|
||||
else if (arg == "-ng" || arg == "--no-gpu") { params.use_gpu = false; }
|
||||
else {
|
||||
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
|
||||
whisper_print_usage(argc, argv, params);
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "usage: %s [options]\n", argv[0]);
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "options:\n");
|
||||
fprintf(stderr, " -h, --help [default] show this help message and exit\n");
|
||||
fprintf(stderr, " -t N, --threads N [%-7d] number of threads to use during computation\n", params.n_threads);
|
||||
fprintf(stderr, " -m FNAME, --model FNAME [%-7s] model path\n", params.model.c_str());
|
||||
fprintf(stderr, " -w N, --what N [%-7d] what to benchmark:\n", params.what);
|
||||
fprintf(stderr, " -ng, --no-gpu [%-7s] disable GPU\n", params.use_gpu ? "false" : "true");
|
||||
fprintf(stderr, " %-7s 0 - whisper\n", "");
|
||||
fprintf(stderr, " %-7s 1 - memcpy\n", "");
|
||||
fprintf(stderr, " %-7s 2 - ggml_mul_mat\n", "");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
int whisper_bench_full(const whisper_params & params) {
|
||||
// whisper init
|
||||
|
||||
struct whisper_context_params cparams;
|
||||
cparams.use_gpu = params.use_gpu;
|
||||
|
||||
struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
|
||||
|
||||
{
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n", params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());
|
||||
}
|
||||
|
||||
if (ctx == nullptr) {
|
||||
fprintf(stderr, "error: failed to initialize whisper context\n");
|
||||
return 2;
|
||||
}
|
||||
|
||||
const int n_mels = whisper_model_n_mels(ctx);
|
||||
|
||||
if (int ret = whisper_set_mel(ctx, nullptr, 0, n_mels)) {
|
||||
fprintf(stderr, "error: failed to set mel: %d\n", ret);
|
||||
return 3;
|
||||
}
|
||||
// heat encoder
|
||||
if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to encode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
|
||||
whisper_token tokens[512];
|
||||
memset(tokens, 0, sizeof(tokens));
|
||||
|
||||
// prompt heat
|
||||
if (int ret = whisper_decode(ctx, tokens, 256, 0, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to decode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
|
||||
// text-generation heat
|
||||
if (int ret = whisper_decode(ctx, tokens, 1, 256, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to decode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
|
||||
whisper_reset_timings(ctx);
|
||||
|
||||
// actual run
|
||||
if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to encode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
|
||||
// text-generation
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (int ret = whisper_decode(ctx, tokens, 1, i, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to decode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
// batched decoding
|
||||
for (int i = 0; i < 64; i++) {
|
||||
if (int ret = whisper_decode(ctx, tokens, 5, 0, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to decode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
// prompt processing
|
||||
for (int i = 0; i < 16; i++) {
|
||||
if (int ret = whisper_decode(ctx, tokens, 256, 0, params.n_threads) != 0) {
|
||||
fprintf(stderr, "error: failed to decode: %d\n", ret);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
|
||||
whisper_print_timings(ctx);
|
||||
whisper_free(ctx);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "If you wish, you can submit these results here:\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " https://github.com/ggerganov/whisper.cpp/issues/89\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Please include the following information:\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, " - CPU model\n");
|
||||
fprintf(stderr, " - Operating system\n");
|
||||
fprintf(stderr, " - Compiler\n");
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
whisper_params params;
|
||||
|
||||
if (whisper_params_parse(argc, argv, params) == false) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int ret = -1;
|
||||
|
||||
switch (params.what) {
|
||||
case 0: ret = whisper_bench_full(params); break;
|
||||
case 1: ret = whisper_bench_memcpy(params.n_threads); break;
|
||||
case 2: ret = whisper_bench_ggml_mul_mat(params.n_threads); break;
|
||||
default: fprintf(stderr, "error: unknown benchmark: %d\n", params.what); break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
Reference in New Issue
Block a user