init

2025-06-17 15:53:01 +05:30
commit 4d20931ecc
411 changed files with 180695 additions and 0 deletions
--- a/whisper.cpp-1.5.2/examples/bench/CMakeLists.txt
+++ b/whisper.cpp-1.5.2/examples/bench/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(TARGET bench)
+add_executable(${TARGET} bench.cpp)
+
+include(DefaultTargetOptions)
+
+target_link_libraries(${TARGET} PRIVATE whisper ${CMAKE_THREAD_LIBS_INIT})
--- a/whisper.cpp-1.5.2/examples/bench/README.md
+++ b/whisper.cpp-1.5.2/examples/bench/README.md
@@ -0,0 +1,54 @@
+# bench
+
+A very basic tool for benchmarking the inference performance on your device. The tool simply runs the Encoder part of
+the transformer on some random audio data and records the execution time. This way we can have an objective comparison
+of the performance of the model for various setups.
+
+Benchmark results are tracked in the following Github issue: https://github.com/ggerganov/whisper.cpp/issues/89
+
+```bash
+# build the bench tool
+$ make bench
+
+# run it on the small.en model using 4 threads
+$ ./bench -m ./models/ggml-small.en.bin -t 4
+
+whisper_model_load: loading model from './models/ggml-small.en.bin'
+whisper_model_load: n_vocab       = 51864
+whisper_model_load: n_audio_ctx   = 1500
+whisper_model_load: n_audio_state = 768
+whisper_model_load: n_audio_head  = 12
+whisper_model_load: n_audio_layer = 12
+whisper_model_load: n_text_ctx    = 448
+whisper_model_load: n_text_state  = 768
+whisper_model_load: n_text_head   = 12
+whisper_model_load: n_text_layer  = 12
+whisper_model_load: n_mels        = 80
+whisper_model_load: f16           = 1
+whisper_model_load: type          = 3
+whisper_model_load: mem_required  = 1048.00 MB
+whisper_model_load: adding 1607 extra tokens
+whisper_model_load: ggml ctx size = 533.05 MB
+whisper_model_load: memory size =    68.48 MB 
+whisper_model_load: model size  =   464.44 MB
+
+whisper_print_timings:     load time =   240.82 ms
+whisper_print_timings:      mel time =     0.00 ms
+whisper_print_timings:   sample time =     0.00 ms
+whisper_print_timings:   encode time =  1062.21 ms / 88.52 ms per layer
+whisper_print_timings:   decode time =     0.00 ms / 0.00 ms per layer
+whisper_print_timings:    total time =  1303.04 ms
+
+system_info: n_threads = 4 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | 
+
+If you wish, you can submit these results here:
+
+  https://github.com/ggerganov/whisper.cpp/issues/89
+
+Please include the following information:
+
+  - CPU model
+  - Operating system
+  - Compiler
+
+```
--- a/whisper.cpp-1.5.2/examples/bench/bench.cpp
+++ b/whisper.cpp-1.5.2/examples/bench/bench.cpp
@@ -0,0 +1,170 @@
+#include "whisper.h"
+
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <thread>
+
+// command-line parameters
+struct whisper_params {
+    int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
+    int32_t what = 0; // what to benchmark: 0 - whisper ecoder, 1 - memcpy, 2 - ggml_mul_mat
+
+    std::string model = "models/ggml-base.en.bin";
+
+    bool use_gpu = true;
+};
+
+void whisper_print_usage(int argc, char ** argv, const whisper_params & params);
+
+bool whisper_params_parse(int argc, char ** argv, whisper_params & params) {
+    for (int i = 1; i < argc; i++) {
+        std::string arg = argv[i];
+
+        if (arg == "-h" || arg == "--help") {
+            whisper_print_usage(argc, argv, params);
+            exit(0);
+        }
+        else if (arg == "-t"  || arg == "--threads") { params.n_threads = std::stoi(argv[++i]); }
+        else if (arg == "-m"  || arg == "--model")   { params.model     = argv[++i]; }
+        else if (arg == "-w"  || arg == "--what")    { params.what      = atoi(argv[++i]); }
+        else if (arg == "-ng" || arg == "--no-gpu")  { params.use_gpu   = false; }
+        else {
+            fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
+            whisper_print_usage(argc, argv, params);
+            exit(0);
+        }
+    }
+
+    return true;
+}
+
+void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & params) {
+    fprintf(stderr, "\n");
+    fprintf(stderr, "usage: %s [options]\n", argv[0]);
+    fprintf(stderr, "\n");
+    fprintf(stderr, "options:\n");
+    fprintf(stderr, "  -h,       --help        [default] show this help message and exit\n");
+    fprintf(stderr, "  -t N,     --threads N   [%-7d] number of threads to use during computation\n", params.n_threads);
+    fprintf(stderr, "  -m FNAME, --model FNAME [%-7s] model path\n",                                  params.model.c_str());
+    fprintf(stderr, "  -w N,     --what N      [%-7d] what to benchmark:\n",                          params.what);
+    fprintf(stderr, "  -ng,      --no-gpu      [%-7s] disable GPU\n",                                 params.use_gpu ? "false" : "true");
+    fprintf(stderr, "                           %-7s  0 - whisper\n",                                 "");
+    fprintf(stderr, "                           %-7s  1 - memcpy\n",                                  "");
+    fprintf(stderr, "                           %-7s  2 - ggml_mul_mat\n",                            "");
+    fprintf(stderr, "\n");
+}
+
+int whisper_bench_full(const whisper_params & params) {
+    // whisper init
+
+    struct whisper_context_params cparams;
+    cparams.use_gpu = params.use_gpu;
+
+    struct whisper_context * ctx = whisper_init_from_file_with_params(params.model.c_str(), cparams);
+
+    {
+        fprintf(stderr, "\n");
+        fprintf(stderr, "system_info: n_threads = %d / %d | %s\n", params.n_threads, std::thread::hardware_concurrency(), whisper_print_system_info());
+    }
+
+    if (ctx == nullptr) {
+        fprintf(stderr, "error: failed to initialize whisper context\n");
+        return 2;
+    }
+
+    const int n_mels = whisper_model_n_mels(ctx);
+
+    if (int ret = whisper_set_mel(ctx, nullptr, 0, n_mels)) {
+        fprintf(stderr, "error: failed to set mel: %d\n", ret);
+        return 3;
+    }
+    // heat encoder
+    if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
+        fprintf(stderr, "error: failed to encode: %d\n", ret);
+        return 4;
+    }
+
+    whisper_token tokens[512];
+    memset(tokens, 0, sizeof(tokens));
+
+    // prompt heat
+    if (int ret = whisper_decode(ctx, tokens, 256, 0, params.n_threads) != 0) {
+        fprintf(stderr, "error: failed to decode: %d\n", ret);
+        return 4;
+    }
+
+    // text-generation heat
+    if (int ret = whisper_decode(ctx, tokens, 1, 256, params.n_threads) != 0) {
+        fprintf(stderr, "error: failed to decode: %d\n", ret);
+        return 4;
+    }
+
+    whisper_reset_timings(ctx);
+
+    // actual run
+    if (int ret = whisper_encode(ctx, 0, params.n_threads) != 0) {
+        fprintf(stderr, "error: failed to encode: %d\n", ret);
+        return 4;
+    }
+
+    // text-generation
+    for (int i = 0; i < 256; i++) {
+        if (int ret = whisper_decode(ctx, tokens, 1, i, params.n_threads) != 0) {
+            fprintf(stderr, "error: failed to decode: %d\n", ret);
+            return 4;
+        }
+    }
+
+    // batched decoding
+    for (int i = 0; i < 64; i++) {
+        if (int ret = whisper_decode(ctx, tokens, 5, 0, params.n_threads) != 0) {
+            fprintf(stderr, "error: failed to decode: %d\n", ret);
+            return 4;
+        }
+    }
+
+    // prompt processing
+    for (int i = 0; i < 16; i++) {
+        if (int ret = whisper_decode(ctx, tokens, 256, 0, params.n_threads) != 0) {
+            fprintf(stderr, "error: failed to decode: %d\n", ret);
+            return 4;
+        }
+    }
+
+    whisper_print_timings(ctx);
+    whisper_free(ctx);
+
+    fprintf(stderr, "\n");
+    fprintf(stderr, "If you wish, you can submit these results here:\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "  https://github.com/ggerganov/whisper.cpp/issues/89\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "Please include the following information:\n");
+    fprintf(stderr, "\n");
+    fprintf(stderr, "  - CPU model\n");
+    fprintf(stderr, "  - Operating system\n");
+    fprintf(stderr, "  - Compiler\n");
+    fprintf(stderr, "\n");
+
+    return 0;
+}
+
+int main(int argc, char ** argv) {
+    whisper_params params;
+
+    if (whisper_params_parse(argc, argv, params) == false) {
+        return 1;
+    }
+
+    int ret = -1;
+
+    switch (params.what) {
+        case 0: ret = whisper_bench_full(params);                break;
+        case 1: ret = whisper_bench_memcpy(params.n_threads);       break;
+        case 2: ret = whisper_bench_ggml_mul_mat(params.n_threads); break;
+        default: fprintf(stderr, "error: unknown benchmark: %d\n", params.what); break;
+    }
+
+    return ret;
+}