Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
46346a7
WIP LLM pipeline and dataset implementation
farook-edev Aug 2, 2025
5aab20a
fixed issues preventing libraries from compiling, runtime errors not …
farook-edev Aug 4, 2025
f598e57
upgrade TensorFlow to 2.18.0
farook-edev Aug 18, 2025
fe32950
upgraded llm pipeline to use TFLite C++ api + small bug fixes
farook-edev Aug 18, 2025
24ad1d5
basic flutter app support for icon and dataset
farook-edev Aug 18, 2025
aa09439
added linux x86_64 config for internal testing
farook-edev Aug 19, 2025
84b164e
updated bazel config to use SSE/MMX instructions
farook-edev Aug 26, 2025
d57040c
fixed incorrect answer format and compression
farook-edev Aug 26, 2025
f9e40a5
got pipeline and dataset to produce proper results + fixed issues whe…
farook-edev Aug 26, 2025
057c9f8
added support for loadgen's token based performance measurement + imp…
farook-edev Sep 1, 2025
3c8b4f5
fixed bugs in inference process, first token function now handles onl…
farook-edev Sep 1, 2025
a03fbea
optimized tensor retrieval for inference + added check for input size…
farook-edev Sep 7, 2025
69a630a
clang-format
farook-edev Sep 7, 2025
816f282
mmlu dataset cleanup and formatting
farook-edev Sep 8, 2025
fca2905
slight code cleanup
farook-edev Sep 8, 2025
20e7805
fixed issue with genai ops import
farook-edev Sep 9, 2025
83aea46
code/config cleanup
farook-edev Sep 27, 2025
61a5c8a
add zero-shot option to MMLU constructor
farook-edev Sep 28, 2025
54adcd0
use function to detect which token is answer letter
farook-edev Sep 29, 2025
65f797f
quick initial implementation of first token callback
farook-edev Sep 29, 2025
719aefa
moved tokenizer to dataset side (possibly needs cleanup)
farook-edev Sep 29, 2025
765817e
added files needed for MMLU utils
farook-edev Oct 5, 2025
2e887cd
clang-format
farook-edev Oct 5, 2025
a3b0799
continued formatting
farook-edev Oct 5, 2025
5a96013
code cleanup / issue_query signature update to vendor backends
farook-edev Oct 6, 2025
3a54a66
signature update for QTI/Samsung backends
farook-edev Oct 6, 2025
26e562b
format
farook-edev Oct 6, 2025
d485523
formatted clang and bazel using docker based formatter
farook-edev Oct 6, 2025
24cf047
reverted issue_query change for samsung + bazel formatting
farook-edev Oct 6, 2025
30b6464
fix for MSVC C7555 error
farook-edev Oct 7, 2025
c294784
rough IFEval implementation using llm_instruction benchmark
farook-edev Oct 7, 2025
97ba25f
disabled XNNPACK AVX-VNNI for windows due to C2440 error
farook-edev Oct 11, 2025
5383e75
moved accuracy calculation away from ProcessOutput, ifeval accuracy i…
farook-edev Oct 13, 2025
8e21ed1
fixed issue with app not finding model/tokenizer
farook-edev Oct 19, 2025
94f3cd5
properly format 0-shot prompts + allow for file/directory for model path
farook-edev Oct 20, 2025
e56d622
formatting
farook-edev Oct 20, 2025
9120d63
potential fix for windows C2440
farook-edev Oct 27, 2025
002d2d0
fix for aligned free for windows
farook-edev Oct 28, 2025
9f81bdd
potential fix for IOS / windows CI issues
farook-edev Oct 28, 2025
93d5352
ifeval check cleanup and bugfixes
farook-edev Oct 28, 2025
fc0f241
formatting
farook-edev Oct 28, 2025
15880a9
all possible configs for removing eigen exceptions
farook-edev Oct 28, 2025
5ddbb87
removed objc opts
farook-edev Oct 28, 2025
7a4042a
use token latencies in app
farook-edev Oct 28, 2025
cfc719b
enable exceptions for IOS
farook-edev Oct 28, 2025
f87ef86
disable FP16 AVX for x86 simulator
farook-edev Oct 28, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ build --spawn_strategy=standalone
# This flag is required by tensorflow
common --experimental_repo_remote_exec

# Without these, tensorflow complains about lack of CUDA library.
common --repo_env=TF_NEED_CUDA=0
common --repo_env=TF_NEED_ROCM=0

# Default options should come above this line.

# Configure logs
Expand All @@ -18,13 +22,16 @@ build:verbose_logs --output_filter=

# Suppress C++ compiler warnings, otherwise build logs become 10s of MBs.
build:android --copt=-w
build:linux --copt=-w
build:ios --copt=-w
build:windows --copt=/W0

# Build in C++ 17 mode.
build --cxxopt=-std=c++17
build:android --cxxopt=-std=c++17
build:android --host_cxxopt=-std=c++17
build:linux --cxxopt=-std=c++17
build:linux --host_cxxopt=-std=c++17
build:ios --cxxopt=-std=c++17
build:ios --host_cxxopt=-std=c++17
build:ios --cxxopt=-xobjective-c++
Expand All @@ -41,10 +48,41 @@ build:android_x86_64 --config=android
build:android_x86_64 --cpu=x86_64
build:android_x86_64 --fat_apk_cpu=x86_64


build:android_x86_64 --define=xnn_enable_avx512fp16=false
build:android_x86_64 --define=xnn_enable_avxvnniint8=false

# Linux configs
build:linux_x86_64 --config=linux
build:linux_x86_64 --cpu=k8
# Not required, but enables the proper SSE/MMX instructions per CPU
build:linux_x86_64 --copt=-march=native

# These may be neccessary depending on CPU instruction support
#build:linux_x86_64 --define=xnn_enable_avx=false
#build:linux_x86_64 --define=xnn_enable_avx2=false
#build:linux_x86_64 --define=xnn_enable_avx512=false
build:linux_x86_64 --define=xnn_enable_avx512fp16=false
#build:linux_x86_64 --define=xnn_enable_avxvnni=false
build:linux_x86_64 --define=xnn_enable_avxvnniint8=false
#build:linux_x86_64 --define=xnn_enable_vnni=false


# Optional, enable for debugging or compilation errors
#build:linux_x86_64 --action_env=CC=gcc
#build:linux_x86_64 --action_env=CXX=g++
#build:linux_x86_64 --strip=never
#build:linux_x86_64 --copt=-fno-omit-frame-pointer
#build:linux_x86_64 --linkopt=-fno-omit-frame-pointer

# iOS configs
build:ios --apple_platform_type=ios
build:ios --copt=-Wno-c++11-narrowing
build:ios --cxxopt=-fobjc-arc
build:ios --copt=-fexceptions
build:ios --cxxopt=-fexceptions
# disable avx512-fp16 for x86 simulator
build:ios --define=xnn_enable_avx512fp16=false

# Windows configs

Expand Down Expand Up @@ -73,6 +111,10 @@ build:windows --host_linkopt=/OPT:REF
build:windows --linkopt=/OPT:ICF
build:windows --host_linkopt=/OPT:ICF

# MSVC does not support XNNPACK AVXVNNI instructions (causes C2440 error).
build:windows --define=xnn_enable_avxvnni=false
build:windows --define=xnn_enable_avxvnniint8=false

# Address sanitizer
build:asan --strip=never
build:asan --copt -fsanitize=address
Expand Down
42 changes: 36 additions & 6 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,44 @@ http_archive(
"//:flutter/third_party/use_unsigned_char.patch",
# Fix tensorflow not being able to read image files on Windows
"//:flutter/third_party/tensorflow-fix-file-opening-mode-for-Windows.patch",
"//:flutter/third_party/tf-eigen.patch",
# NDK 25 support
"//patches:ndk_25_r14.diff",
#"//:flutter/third_party/tf-eigen.patch",
] + PATCH_FILE,
sha256 = "ce357fd0728f0d1b0831d1653f475591662ec5bca736a94ff789e6b1944df19f",
strip_prefix = "tensorflow-2.14.0",
sha256 = "d7876f4bb0235cac60eb6316392a7c48676729860da1ab659fb440379ad5186d",
strip_prefix = "tensorflow-2.18.0",
urls = [
"https://github.com/tensorflow/tensorflow/archive/v2.14.0.tar.gz",
"https://github.com/tensorflow/tensorflow/archive/v2.18.0.tar.gz",
],
)

load("@org_tensorflow//third_party/gpus:cuda_configure.bzl", "cuda_configure")

cuda_configure(name = "local_config_cuda")

load("@org_tensorflow//third_party/gpus:rocm_configure.bzl", "rocm_configure")

rocm_configure(name = "local_config_rocm")

http_archive(
name = "com_google_sentencepiece",
build_file = "@//patches:sentencepiece.BUILD",
patch_args = ["-p1"],
patches = ["@//patches:com_google_sentencepiece.diff"],
sha256 = "8409b0126ebd62b256c685d5757150cf7fcb2b92a2f2b98efb3f38fc36719754",
strip_prefix = "sentencepiece-0.1.96",
urls = [
"https://github.com/google/sentencepiece/archive/refs/tags/v0.1.96.zip",
],
)

http_archive(
name = "darts_clone",
build_file = "@//patches:darts_clone.BUILD",
patch_args = ["-p0"],
patches = ["//patches:darts_no_exceptions.diff"],
sha256 = "c97f55d05c98da6fcaf7f9ecc6a6dc6bc5b18b8564465f77abff8879d446491c",
strip_prefix = "darts-clone-e40ce4627526985a7767444b6ed6893ab6ff8983",
urls = [
"https://github.com/s-yata/darts-clone/archive/e40ce4627526985a7767444b6ed6893ab6ff8983.zip",
],
)

Expand Down
4 changes: 2 additions & 2 deletions flutter/android/android-docker.mk
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ user_id=$(shell id -u)
.PHONY: flutter/android/docker/image
flutter/android/docker/image: output/docker/mlperf_mobile_flutter_android_${user_id}.stamp
output/docker/mlperf_mobile_flutter_android_${user_id}.stamp: flutter/android/docker/Dockerfile
docker image build -t ${DOCKER_IMAGE_TAG} flutter/android/docker
DOCKER_BUILDKIT=1 docker buildx build --tag ${DOCKER_IMAGE_TAG} flutter/android/docker
mkdir -p output/docker
touch $@

Expand Down Expand Up @@ -68,4 +68,4 @@ docker/flutter/android/release: flutter/check-release-env flutter/android/docker
docker/flutter/clean: flutter/check-release-env
MSYS2_ARG_CONV_EXCL="*" docker run \
${flutter_common_docker_flags} \
make flutter/clean
make flutter/clean
98 changes: 98 additions & 0 deletions flutter/assets/icons/ic_task_llm.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
101 changes: 101 additions & 0 deletions flutter/assets/icons/ic_task_llm_white.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
57 changes: 57 additions & 0 deletions flutter/assets/tasks.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,63 @@ task {
}
}

task {
id: "llm"
name: "LLM"
max_throughput: 2000
max_accuracy: 1.0
scenario: "SingleStream"
runs {
normal {
min_query_count: 100
min_duration: 60
max_duration: 300
}
quick {
min_query_count: 10
min_duration: 10
max_duration: 40
}
rapid {
min_query_count: 6
min_duration: 6
max_duration: 60
}
}
datasets {
type: MMLU
full {
name: "TinyMMLU prompt set for LLM"
input_path: "local:///mlperf_datasets/tinymmlu/data.tfrecord"
input_checksum: "c20f9115582217af15e4d9955b41ace1"
groundtruth_path: ""
groundtruth_checksum: ""
}
lite {
name: "TinyMMLU prompt set for LLM"
input_path: "local:///mlperf_datasets/tinymmlu/data.tfrecord"
input_checksum: "c20f9115582217af15e4d9955b41ace1"
groundtruth_path: ""
groundtruth_checksum: ""
}
tiny {
name: "TinyMMLU prompt set for LLM"
input_path: "local:///mlperf_datasets/tinymmlu/data.tfrecord"
input_checksum: "c20f9115582217af15e4d9955b41ace1"
groundtruth_path: ""
groundtruth_checksum: ""
}
}
model {
id: "LLM"
name: "LLM"
}
custom_config {
id: "llm_tokenizer_path"
value: "llama3_1b.spm.model"
}
}

task {
id: "stable_diffusion"
name: "Stable Diffusion"
Expand Down
4 changes: 3 additions & 1 deletion flutter/cpp/backend.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@ class Backend {
virtual const std::string& AcceleratorName() const = 0;

// Run inference for a sample. Inputs is already set by SetInputs.
virtual void IssueQuery() = 0;
// TODO might be good to provide the callback and context along with the
// inputs if possible
virtual void IssueQuery(ft_callback callback, void* context) = 0;

// Flush the staged queries immediately.
virtual void FlushQueries() = 0;
Expand Down
9 changes: 5 additions & 4 deletions flutter/cpp/backends/external.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ struct BackendFunctions {
using AcceleratorNamePtr =
std::add_pointer<const char*(mlperf_backend_ptr_t)>::type;
using BackendDeletePtr = std::add_pointer<void(mlperf_backend_ptr_t)>::type;
using IssueQueryPtr =
std::add_pointer<mlperf_status_t(mlperf_backend_ptr_t)>::type;
using IssueQueryPtr = std::add_pointer<mlperf_status_t(
mlperf_backend_ptr_t, ft_callback, void*)>::type;
using FlushQueriesPtr =
std::add_pointer<mlperf_status_t(mlperf_backend_ptr_t)>::type;

Expand Down Expand Up @@ -157,8 +157,9 @@ class ExternalBackend : public Backend {
}

// Run inference for a sample.
void IssueQuery() override {
if (backend_functions_.issue_query(backend_ptr_) != MLPERF_SUCCESS) {
void IssueQuery(ft_callback callback, void* context) override {
if (backend_functions_.issue_query(backend_ptr_, callback, context) !=
MLPERF_SUCCESS) {
LOG(FATAL) << "Error while inferencing model";
}
}
Expand Down
2 changes: 2 additions & 0 deletions flutter/cpp/binary/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ cc_binary(
"//flutter/cpp/datasets:ade20k",
"//flutter/cpp/datasets:coco",
"//flutter/cpp/datasets:coco_gen",
"//flutter/cpp/datasets:ifeval",
"//flutter/cpp/datasets:imagenet",
"//flutter/cpp/datasets:mmlu_gen",
"//flutter/cpp/datasets:snu_sr",
"//flutter/cpp/datasets:squad",
"//flutter/cpp/proto:mlperf_task_cc_proto",
Expand Down
8 changes: 7 additions & 1 deletion flutter/cpp/binary/cmdline-docker.mk
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,10 @@
docker/cmdline/android/release: flutter/android/docker/image
MSYS2_ARG_CONV_EXCL="*" docker run \
${flutter_common_docker_flags} \
make cmdline/android/bins/release
make cmdline/android/bins/release

.PHONY: docker/cmdline/linux/release
docker/cmdline/linux/release: flutter/android/docker/image
MSYS2_ARG_CONV_EXCL="*" docker run \
${flutter_common_docker_flags} \
make cmdline/linux/bins/release
Loading
Loading