Skip to content

Commit ff7330c

Browse files
committed
Build llama.cpp separately
Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent c7a1d9c commit ff7330c

File tree

12 files changed

+356
-64
lines changed

12 files changed

+356
-64
lines changed

.dockerignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
.vscode
44
.devcontainer
55
models
6+
backends
67
examples/chatbot-ui/models
8+
backend/go/image/stablediffusion-ggml/build/
79
examples/rwkv/models
810
examples/**/models
911
Dockerfile*
@@ -14,4 +16,4 @@ __pycache__
1416

1517
# backend virtual environments
1618
**/venv
17-
backend/python/**/source
19+
backend/python/**/source

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ prepare-sources
88
/backend/cpp/llama/grpc-server
99
/backend/cpp/llama/llama.cpp
1010
/backend/cpp/llama-*
11+
!backend/cpp/llama-cpp
1112

1213
*.log
1314

Makefile

Lines changed: 53 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -323,7 +323,7 @@ sources/whisper.cpp/build/src/libwhisper.a: sources/whisper.cpp
323323
cd sources/whisper.cpp && cmake $(WHISPER_CMAKE_ARGS) . -B ./build
324324
cd sources/whisper.cpp/build && cmake --build . --config Release
325325

326-
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama/llama.cpp
326+
get-sources: sources/go-piper sources/stablediffusion-ggml.cpp sources/bark.cpp sources/whisper.cpp backend/cpp/llama-cpp/llama.cpp
327327

328328
replace:
329329
$(GOCMD) mod edit -replace github.com/ggerganov/whisper.cpp=$(CURDIR)/sources/whisper.cpp
@@ -356,9 +356,9 @@ clean: ## Remove build related file
356356
rm -rf backend-assets/*
357357
$(MAKE) -C backend/cpp/grpc clean
358358
$(MAKE) -C backend/go/bark-cpp clean
359-
$(MAKE) -C backend/cpp/llama clean
359+
$(MAKE) -C backend/cpp/llama-cpp clean
360360
$(MAKE) -C backend/go/image/stablediffusion-ggml clean
361-
rm -rf backend/cpp/llama-* || true
361+
rm -rf backend/cpp/llama-cpp-* || true
362362
$(MAKE) dropreplace
363363
$(MAKE) protogen-clean
364364
rmdir pkg/grpc/proto || true
@@ -669,7 +669,7 @@ backend-assets/espeak-ng-data: sources/go-piper sources/go-piper/libpiper_bindin
669669
mkdir -p backend-assets/espeak-ng-data
670670
@cp -rf sources/go-piper/piper-phonemize/pi/share/espeak-ng-data/. backend-assets/espeak-ng-data
671671

672-
backend-assets/grpc: protogen-go replace
672+
backend-assets/grpc:
673673
mkdir -p backend-assets/grpc
674674

675675
backend-assets/grpc/huggingface: backend-assets/grpc
@@ -678,8 +678,8 @@ ifneq ($(UPX),)
678678
$(UPX) backend-assets/grpc/huggingface
679679
endif
680680

681-
backend/cpp/llama/llama.cpp:
682-
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama llama.cpp
681+
backend/cpp/llama-cpp/llama.cpp:
682+
LLAMA_VERSION=$(CPPLLAMA_VERSION) $(MAKE) -C backend/cpp/llama-cpp llama.cpp
683683

684684
INSTALLED_PACKAGES=$(CURDIR)/backend/cpp/grpc/installed_packages
685685
INSTALLED_LIB_CMAKE=$(INSTALLED_PACKAGES)/lib/cmake
@@ -704,109 +704,109 @@ else
704704
endif
705705

706706
# This target is for manually building a variant with-auto detected flags
707-
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama/llama.cpp
708-
cp -rf backend/cpp/llama backend/cpp/llama-cpp
709-
$(MAKE) -C backend/cpp/llama-cpp purge
707+
backend-assets/grpc/llama-cpp: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
708+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-copy
709+
$(MAKE) -C backend/cpp/llama-cpp-copy purge
710710
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
711-
$(MAKE) VARIANT="llama-cpp" build-llama-cpp-grpc-server
712-
cp -rfv backend/cpp/llama-cpp/grpc-server backend-assets/grpc/llama-cpp
711+
$(MAKE) VARIANT="llama-cpp-copy" build-llama-cpp-grpc-server
712+
cp -rfv backend/cpp/llama-cpp-copy/grpc-server backend-assets/grpc/llama-cpp
713713

714-
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama/llama.cpp
715-
cp -rf backend/cpp/llama backend/cpp/llama-avx2
716-
$(MAKE) -C backend/cpp/llama-avx2 purge
714+
backend-assets/grpc/llama-cpp-avx2: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
715+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-avx2
716+
$(MAKE) -C backend/cpp/llama-cpp-avx2 purge
717717
$(info ${GREEN}I llama-cpp build info:avx2${RESET})
718718
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx2" build-llama-cpp-grpc-server
719-
cp -rfv backend/cpp/llama-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
719+
cp -rfv backend/cpp/llama-cpp-avx2/grpc-server backend-assets/grpc/llama-cpp-avx2
720720

721-
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama/llama.cpp
722-
cp -rf backend/cpp/llama backend/cpp/llama-avx512
723-
$(MAKE) -C backend/cpp/llama-avx512 purge
721+
backend-assets/grpc/llama-cpp-avx512: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
722+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-avx512
723+
$(MAKE) -C backend/cpp/llama-cpp-avx512 purge
724724
$(info ${GREEN}I llama-cpp build info:avx512${RESET})
725725
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=on -DGGML_FMA=on -DGGML_F16C=on" $(MAKE) VARIANT="llama-avx512" build-llama-cpp-grpc-server
726-
cp -rfv backend/cpp/llama-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
726+
cp -rfv backend/cpp/llama-cpp-avx512/grpc-server backend-assets/grpc/llama-cpp-avx512
727727

728-
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama/llama.cpp
729-
cp -rf backend/cpp/llama backend/cpp/llama-avx
730-
$(MAKE) -C backend/cpp/llama-avx purge
728+
backend-assets/grpc/llama-cpp-avx: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
729+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-avx
730+
$(MAKE) -C backend/cpp/llama-cpp-avx purge
731731
$(info ${GREEN}I llama-cpp build info:avx${RESET})
732732
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-avx" build-llama-cpp-grpc-server
733-
cp -rfv backend/cpp/llama-avx/grpc-server backend-assets/grpc/llama-cpp-avx
733+
cp -rfv backend/cpp/llama-cpp-avx/grpc-server backend-assets/grpc/llama-cpp-avx
734734

735-
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama/llama.cpp
736-
cp -rf backend/cpp/llama backend/cpp/llama-fallback
737-
$(MAKE) -C backend/cpp/llama-fallback purge
735+
backend-assets/grpc/llama-cpp-fallback: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
736+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-fallback
737+
$(MAKE) -C backend/cpp/llama-cpp-fallback purge
738738
$(info ${GREEN}I llama-cpp build info:fallback${RESET})
739739
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" $(MAKE) VARIANT="llama-fallback" build-llama-cpp-grpc-server
740-
cp -rfv backend/cpp/llama-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
740+
cp -rfv backend/cpp/llama-cpp-fallback/grpc-server backend-assets/grpc/llama-cpp-fallback
741741

742-
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama/llama.cpp
743-
cp -rf backend/cpp/llama backend/cpp/llama-cuda
744-
$(MAKE) -C backend/cpp/llama-cuda purge
742+
backend-assets/grpc/llama-cpp-cuda: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
743+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-cuda
744+
$(MAKE) -C backend/cpp/llama-cpp-cuda purge
745745
$(info ${GREEN}I llama-cpp build info:cuda${RESET})
746746
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_CUDA=ON" $(MAKE) VARIANT="llama-cuda" build-llama-cpp-grpc-server
747-
cp -rfv backend/cpp/llama-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
747+
cp -rfv backend/cpp/llama-cpp-cuda/grpc-server backend-assets/grpc/llama-cpp-cuda
748748

749-
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama/llama.cpp
750-
cp -rf backend/cpp/llama backend/cpp/llama-hipblas
751-
$(MAKE) -C backend/cpp/llama-hipblas purge
749+
backend-assets/grpc/llama-cpp-hipblas: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
750+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-hipblas
751+
$(MAKE) -C backend/cpp/llama-cpp-hipblas purge
752752
$(info ${GREEN}I llama-cpp build info:hipblas${RESET})
753753
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" BUILD_TYPE="hipblas" $(MAKE) VARIANT="llama-hipblas" build-llama-cpp-grpc-server
754-
cp -rfv backend/cpp/llama-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
754+
cp -rfv backend/cpp/llama-cpp-hipblas/grpc-server backend-assets/grpc/llama-cpp-hipblas
755755

756-
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama/llama.cpp
757-
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f16
758-
$(MAKE) -C backend/cpp/llama-sycl_f16 purge
756+
backend-assets/grpc/llama-cpp-sycl_f16: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
757+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-sycl_f16
758+
$(MAKE) -C backend/cpp/llama-cpp-sycl_f16 purge
759759
$(info ${GREEN}I llama-cpp build info:sycl_f16${RESET})
760760
BUILD_TYPE="sycl_f16" $(MAKE) VARIANT="llama-sycl_f16" build-llama-cpp-grpc-server
761-
cp -rfv backend/cpp/llama-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
761+
cp -rfv backend/cpp/llama-cpp-sycl_f16/grpc-server backend-assets/grpc/llama-cpp-sycl_f16
762762

763-
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama/llama.cpp
764-
cp -rf backend/cpp/llama backend/cpp/llama-sycl_f32
765-
$(MAKE) -C backend/cpp/llama-sycl_f32 purge
763+
backend-assets/grpc/llama-cpp-sycl_f32: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
764+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-sycl_f32
765+
$(MAKE) -C backend/cpp/llama-cpp-sycl_f32 purge
766766
$(info ${GREEN}I llama-cpp build info:sycl_f32${RESET})
767767
BUILD_TYPE="sycl_f32" $(MAKE) VARIANT="llama-sycl_f32" build-llama-cpp-grpc-server
768-
cp -rfv backend/cpp/llama-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
768+
cp -rfv backend/cpp/llama-cpp-sycl_f32/grpc-server backend-assets/grpc/llama-cpp-sycl_f32
769769

770-
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama/llama.cpp
771-
cp -rf backend/cpp/llama backend/cpp/llama-grpc
772-
$(MAKE) -C backend/cpp/llama-grpc purge
770+
backend-assets/grpc/llama-cpp-grpc: backend-assets/grpc backend/cpp/llama-cpp/llama.cpp
771+
cp -rf backend/cpp/llama-cpp backend/cpp/llama-cpp-grpc
772+
$(MAKE) -C backend/cpp/llama-cpp-grpc purge
773773
$(info ${GREEN}I llama-cpp build info:grpc${RESET})
774774
CMAKE_ARGS="$(CMAKE_ARGS) -DGGML_RPC=ON -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off" TARGET="--target grpc-server --target rpc-server" $(MAKE) VARIANT="llama-grpc" build-llama-cpp-grpc-server
775-
cp -rfv backend/cpp/llama-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
775+
cp -rfv backend/cpp/llama-cpp-grpc/grpc-server backend-assets/grpc/llama-cpp-grpc
776776

777777
backend-assets/util/llama-cpp-rpc-server: backend-assets/grpc/llama-cpp-grpc
778778
mkdir -p backend-assets/util/
779-
cp -rf backend/cpp/llama-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
779+
cp -rf backend/cpp/llama-cpp-grpc/llama.cpp/build/bin/rpc-server backend-assets/util/llama-cpp-rpc-server
780780

781-
backend-assets/grpc/bark-cpp: backend/go/bark-cpp/libbark.a backend-assets/grpc
781+
backend-assets/grpc/bark-cpp: protogen-go replace backend/go/bark-cpp/libbark.a backend-assets/grpc
782782
CGO_LDFLAGS="$(CGO_LDFLAGS)" C_INCLUDE_PATH=$(CURDIR)/backend/go/bark-cpp/ LIBRARY_PATH=$(CURDIR)/backend/go/bark-cpp/ \
783783
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/bark-cpp ./backend/go/bark-cpp/
784784
ifneq ($(UPX),)
785785
$(UPX) backend-assets/grpc/bark-cpp
786786
endif
787787

788-
backend-assets/grpc/piper: sources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
788+
backend-assets/grpc/piper: protogen-go replacesources/go-piper sources/go-piper/libpiper_binding.a backend-assets/grpc backend-assets/espeak-ng-data
789789
CGO_CXXFLAGS="$(PIPER_CGO_CXXFLAGS)" CGO_LDFLAGS="$(PIPER_CGO_LDFLAGS)" LIBRARY_PATH=$(CURDIR)/sources/go-piper \
790790
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/piper ./backend/go/tts/
791791
ifneq ($(UPX),)
792792
$(UPX) backend-assets/grpc/piper
793793
endif
794794

795-
backend-assets/grpc/silero-vad: backend-assets/grpc backend-assets/lib/libonnxruntime.so.1
795+
backend-assets/grpc/silero-vad: protogen-go replacebackend-assets/grpc backend-assets/lib/libonnxruntime.so.1
796796
CGO_LDFLAGS="$(CGO_LDFLAGS)" CPATH="$(CPATH):$(CURDIR)/sources/onnxruntime/include/" LIBRARY_PATH=$(CURDIR)/backend-assets/lib \
797797
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/silero-vad ./backend/go/vad/silero
798798
ifneq ($(UPX),)
799799
$(UPX) backend-assets/grpc/silero-vad
800800
endif
801801

802-
backend-assets/grpc/whisper: sources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
802+
backend-assets/grpc/whisper: protogen-go replacesources/whisper.cpp sources/whisper.cpp/build/src/libwhisper.a backend-assets/grpc
803803
CGO_LDFLAGS="$(CGO_LDFLAGS) $(CGO_LDFLAGS_WHISPER)" C_INCLUDE_PATH="${WHISPER_INCLUDE_PATH}" LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" LD_LIBRARY_PATH="${WHISPER_LIBRARY_PATH}" \
804804
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/whisper ./backend/go/transcribe/whisper
805805
ifneq ($(UPX),)
806806
$(UPX) backend-assets/grpc/whisper
807807
endif
808808

809-
backend-assets/grpc/local-store: backend-assets/grpc
809+
backend-assets/grpc/local-store: backend-assets/grpc protogen-go replace
810810
$(GOCMD) build -ldflags "$(LD_FLAGS)" -tags "$(GO_TAGS)" -o backend-assets/grpc/local-store ./backend/go/stores/
811811
ifneq ($(UPX),)
812812
$(UPX) backend-assets/grpc/local-store

0 commit comments

Comments
 (0)