Skip to content

Commit 2e09069

Browse files
committed
Merge branch 'feature/static-xcframework' of github.com:richwaters/whisper.cpp into feature/static-xcframework
2 parents 34b96bd + 49b2c69 commit 2e09069

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+2143
-1760
lines changed

.devops/main-musa.Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
ARG UBUNTU_VERSION=22.04
22
# This needs to generally match the container host's environment.
3-
ARG MUSA_VERSION=rc4.0.1
3+
ARG MUSA_VERSION=rc4.2.0
44
# Target the MUSA build image
5-
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-devel-ubuntu${UBUNTU_VERSION}
5+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
66
# Target the MUSA runtime image
7-
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-mudnn-runtime-ubuntu${UBUNTU_VERSION}
7+
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
88

99
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
1010
WORKDIR /app

.github/workflows/build.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,25 @@ on:
66
- master
77
tags:
88
- 'v*'
9+
paths: ['.github/workflows/build.yml',
10+
'**/CMakeLists.txt',
11+
'**/Makefile',
12+
'**/*.mk',
13+
'**/*.cmake',
14+
'**/*.in',
15+
'**/*.h',
16+
'**/*.hpp',
17+
'**/*.c',
18+
'**/*.cpp',
19+
'**/*.cu',
20+
'**/*.cuh',
21+
'**/*.cl',
22+
'**/*.swift',
23+
'**/*.m',
24+
'**/*.mm',
25+
'**/*.metal',
26+
'**/*.comp',
27+
'**/*.java']
928
pull_request:
1029
types: [opened, synchronize, reopened]
1130
workflow_dispatch:

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ Run the inference examples as usual, for example:
386386
## Moore Threads GPU support
387387

388388
With Moore Threads cards the processing of the models is done efficiently on the GPU via muBLAS and custom MUSA kernels.
389-
First, make sure you have installed `MUSA SDK rc4.0.1`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=4.0.1
389+
First, make sure you have installed `MUSA SDK rc4.2.0`: https://developer.mthreads.com/sdk/download/musa?equipment=&os=&driverVersion=&version=4.2.0
390390

391391
Now build `whisper.cpp` with MUSA support:
392392

bindings/go/Makefile

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,16 @@ BUILD_DIR := build_go
1515
MODELS_DIR := models
1616
EXAMPLES_DIR := $(wildcard examples/*)
1717
INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
18-
LIBRARY_PATH := $(abspath ../../${BUILD_DIR}/src:$(abspath ../../${BUILD_DIR}/ggml/src))
18+
LIBRARY_PATH := $(abspath ../../${BUILD_DIR}/src):$(abspath ../../${BUILD_DIR}/ggml/src)
1919

2020
ifeq ($(GGML_CUDA),1)
2121
LIBRARY_PATH := $(LIBRARY_PATH):$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib/
2222
BUILD_FLAGS := -ldflags "-extldflags '-lcudart -lcuda -lcublas'"
2323
endif
2424

2525
ifeq ($(UNAME_S),Darwin)
26-
EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
26+
LIBRARY_PATH := $(LIBRARY_PATH):$(abspath ../../${BUILD_DIR}/ggml/src/ggml-blas):$(abspath ../../${BUILD_DIR}/ggml/src/ggml-metal)
27+
EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit -lggml-metal -lggml-blas
2728
endif
2829

2930
all: clean whisper examples

bindings/go/whisper.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ import (
99
// CGO
1010

1111
/*
12-
#cgo LDFLAGS: -lwhisper -lggml -lggml-base -lggml-cpu -lm -lstdc++ -fopenmp
12+
#cgo LDFLAGS: -lwhisper -lggml -lggml-base -lggml-cpu -lm -lstdc++
13+
#cgo linux LDFLAGS: -fopenmp
14+
#cgo darwin LDFLAGS: -lggml-metal -lggml-blas
1315
#cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
1416
#include <whisper.h>
1517
#include <stdlib.h>

build-xcframework.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ combine_static_libraries() {
328328
arch_flags+=" -arch $arch"
329329
done
330330

331-
331+
332332
if [[ "${BUILD_STATIC_XCFRAMEWORK}" == "ON" ]]; then
333333
echo "Packaging static framework for ${platform}."
334334
mkdir -p "$(dirname "${base_dir}/${output_lib}")"
@@ -551,7 +551,7 @@ if [[ "${BUILD_STATIC_XCFRAMEWORK}" == "ON" ]]; then
551551
-framework $(pwd)/build-tvos-sim/framework/whisper.framework \
552552
-output $(pwd)/build-apple/whisper.xcframework
553553
exit 0
554-
fi
554+
fi
555555

556556
xcodebuild -create-xcframework \
557557
-framework $(pwd)/build-ios-sim/framework/whisper.framework \

examples/bench.wasm/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,16 @@ cp bin/libbench.js /path/to/html/
3232
cp bin/libbench.worker.js /path/to/html/
3333
```
3434

35+
> 📝 **Note:** By default this example is built with `WHISPER_WASM_SINGLE_FILE=ON`
36+
> which means that that a separate .wasm file will not be generated. Instead, the
37+
> WASM module is embedded in the main JS file as a base64 encoded string. To
38+
> generate a separate .wasm file, you need to disable this option by passing
39+
> `-DWHISPER_WASM_SINGLE_FILE=OFF`:
40+
> ```console
41+
> emcmake cmake .. -DWHISPER_WASM_SINGLE_FILE=OFF
42+
> ```
43+
> This will generate a `libbench.wasm` file in the build/bin directory.
44+
3545
> 📝 **Note:** As of Emscripten 3.1.58 (April 2024), separate worker.js files are no
3646
> longer generated and the worker is embedded in the main JS file. So the worker
3747
> file will not be geneated for versions later than `3.1.58`.

examples/command.wasm/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,16 @@ cp bin/libcommand.js /path/to/html/
3232
cp bin/libcommand.worker.js /path/to/html/
3333
```
3434

35+
> 📝 **Note:** By default this example is built with `WHISPER_WASM_SINGLE_FILE=ON`
36+
> which means that that a separate .wasm file will not be generated. Instead, the
37+
> WASM module is embedded in the main JS file as a base64 encoded string. To
38+
> generate a separate .wasm file, you need to disable this option by passing
39+
> `-DWHISPER_WASM_SINGLE_FILE=OFF`:
40+
> ```console
41+
> emcmake cmake .. -DWHISPER_WASM_SINGLE_FILE=OFF
42+
> ```
43+
> This will generate a `libcommand.wasm` file in the build/bin directory.
44+
3545
> 📝 **Note:** As of Emscripten 3.1.58 (April 2024), separate worker.js files are no
3646
> longer generated and the worker is embedded in the main JS file. So the worker
3747
> file will not be geneated for versions later than `3.1.58`.

examples/server/server.cpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ struct whisper_params {
104104
bool flash_attn = false;
105105
bool suppress_nst = false;
106106
bool no_context = false;
107+
bool no_language_probabilities = false;
107108

108109
std::string language = "en";
109110
std::string prompt = "";
@@ -178,6 +179,7 @@ void whisper_print_usage(int /*argc*/, char ** argv, const whisper_params & para
178179
fprintf(stderr, " -nc, --no-context [%-7s] do not use previous audio context\n", params.no_context ? "true" : "false");
179180
fprintf(stderr, " -ng, --no-gpu [%-7s] do not use gpu\n", params.use_gpu ? "false" : "true");
180181
fprintf(stderr, " -fa, --flash-attn [%-7s] flash attention\n", params.flash_attn ? "true" : "false");
182+
fprintf(stderr, " -nlp, --no-language-probabilities [%-7s] exclude language probabilities from verbose_json output\n", params.no_language_probabilities ? "true" : "false");
181183
// Voice Activity Detection (VAD) parameters
182184
fprintf(stderr, "\nVoice Activity Detection (VAD) options:\n");
183185
fprintf(stderr, " --vad [%-7s] enable Voice Activity Detection (VAD)\n", params.vad ? "true" : "false");
@@ -237,6 +239,7 @@ bool whisper_params_parse(int argc, char ** argv, whisper_params & params, serve
237239
else if (arg == "-sns" || arg == "--suppress-nst") { params.suppress_nst = true; }
238240
else if (arg == "-nth" || arg == "--no-speech-thold") { params.no_speech_thold = std::stof(argv[++i]); }
239241
else if (arg == "-nc" || arg == "--no-context") { params.no_context = true; }
242+
else if (arg == "-nlp" || arg == "--no-language-probabilities") { params.no_language_probabilities = true; }
240243

241244
// server params
242245
else if ( arg == "--port") { sparams.port = std::stoi(argv[++i]); }
@@ -599,6 +602,10 @@ void get_req_parameters(const Request & req, whisper_params & params)
599602
{
600603
params.vad_samples_overlap = std::stof(req.get_file_value("vad_samples_overlap").content);
601604
}
605+
if (req.has_file("no_language_probabilities"))
606+
{
607+
params.no_language_probabilities = parse_str_to_bool(req.get_file_value("no_language_probabilities").content);
608+
}
602609
}
603610

604611
} // namespace
@@ -1024,23 +1031,25 @@ int main(int argc, char ** argv) {
10241031
} else if (params.response_format == vjson_format) {
10251032
/* try to match openai/whisper's Python format */
10261033
std::string results = output_str(ctx, params, pcmf32s);
1027-
// Get language probabilities
1028-
std::vector<float> lang_probs(whisper_lang_max_id() + 1, 0.0f);
1029-
const auto detected_lang_id = whisper_lang_auto_detect(ctx, 0, params.n_threads, lang_probs.data());
10301034
json jres = json{
10311035
{"task", params.translate ? "translate" : "transcribe"},
10321036
{"language", whisper_lang_str_full(whisper_full_lang_id(ctx))},
10331037
{"duration", float(pcmf32.size())/WHISPER_SAMPLE_RATE},
10341038
{"text", results},
1035-
{"segments", json::array()},
1036-
{"detected_language", whisper_lang_str_full(detected_lang_id)},
1037-
{"detected_language_probability", lang_probs[detected_lang_id]},
1038-
{"language_probabilities", json::object()}
1039+
{"segments", json::array()}
10391040
};
1040-
// Add all language probabilities
1041-
for (int i = 0; i <= whisper_lang_max_id(); ++i) {
1042-
if (lang_probs[i] > 0.001f) { // Only include non-negligible probabilities
1043-
jres["language_probabilities"][whisper_lang_str(i)] = lang_probs[i];
1041+
// Only compute language probabilities if requested (expensive operation)
1042+
if (!params.no_language_probabilities) {
1043+
std::vector<float> lang_probs(whisper_lang_max_id() + 1, 0.0f);
1044+
const auto detected_lang_id = whisper_lang_auto_detect(ctx, 0, params.n_threads, lang_probs.data());
1045+
jres["detected_language"] = whisper_lang_str_full(detected_lang_id);
1046+
jres["detected_language_probability"] = lang_probs[detected_lang_id];
1047+
jres["language_probabilities"] = json::object();
1048+
// Add all language probabilities
1049+
for (int i = 0; i <= whisper_lang_max_id(); ++i) {
1050+
if (lang_probs[i] > 0.001f) { // Only include non-negligible probabilities
1051+
jres["language_probabilities"][whisper_lang_str(i)] = lang_probs[i];
1052+
}
10441053
}
10451054
}
10461055
const int n_segments = whisper_full_n_segments(ctx);

examples/stream.wasm/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,16 @@ cp bin/libstream.js /path/to/html/
3030
cp bin/libstream.worker.js /path/to/html/
3131
```
3232

33+
> 📝 **Note:** By default this example is built with `WHISPER_WASM_SINGLE_FILE=ON`
34+
> which means that that a separate .wasm file will not be generated. Instead, the
35+
> WASM module is embedded in the main JS file as a base64 encoded string. To
36+
> generate a separate .wasm file, you need to disable this option by passing
37+
> `-DWHISPER_WASM_SINGLE_FILE=OFF`:
38+
> ```console
39+
> emcmake cmake .. -DWHISPER_WASM_SINGLE_FILE=OFF
40+
> ```
41+
> This will generate a `libstream.wasm` file in the build/bin directory.
42+
3343
> 📝 **Note:** As of Emscripten 3.1.58 (April 2024), separate worker.js files are no
3444
> longer generated and the worker is embedded in the main JS file. So the worker
3545
> file will not be geneated for versions later than `3.1.58`.

0 commit comments

Comments
 (0)