Skip to content

Commit f7c597e

Browse files
authored
[None][perf] Make finalize fusion part of the tactic selection logic (#6915)
Signed-off-by: djns99 <[email protected]>
1 parent e18dacc commit f7c597e

File tree

20 files changed

+263
-371
lines changed

20 files changed

+263
-371
lines changed

cpp/micro_benchmarks/mixtureOfExpertsBackendBenchmarkFixture.h

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -833,7 +833,7 @@ class MixtureOfExpertsBenchmark : public ::benchmark::Fixture
833833
// Runs for 3 iterations or 1 second and picks the best option
834834
int pickBestTactic(MOEParallelismConfig parallelism_config, GemmToProfile gemm_to_profile)
835835
{
836-
auto tactics = mMoERunner.getTactics();
836+
auto tactics = mMoERunner.getTactics(static_cast<MoeGemmId>(gemm_to_profile));
837837
::nvtx3::scoped_range nvtx(tensorrt_llm::common::nvtx::nextColor(),
838838
"Tactic Profiling GEMM " + std::to_string(static_cast<int>(gemm_to_profile)));
839839
// We save space by reusing the same workspace buffer for all tactics when doing full layer profiling. So we
@@ -925,12 +925,14 @@ class MixtureOfExpertsBenchmark : public ::benchmark::Fixture
925925
std::pair<int, int> setTactic(
926926
int tactic_idx1, int tactic_idx2, MOEParallelismConfig parallelism_config, GemmToProfile gemm_to_profile)
927927
{
928-
auto tactics = mMoERunner.getTactics();
928+
auto tactics1 = mMoERunner.getTactics(MoeGemmId::GEMM_1);
929+
auto tactics2 = mMoERunner.getTactics(MoeGemmId::GEMM_2);
929930
std::vector<std::pair<std::reference_wrapper<int>, GemmToProfile>> tactics_to_profile{
930931
{tactic_idx1, GemmToProfile::GEMM_1}, {tactic_idx2, GemmToProfile::GEMM_2}};
931932
for (auto& combo : tactics_to_profile)
932933
{
933934
auto& t = combo.first.get();
935+
auto& tactics = combo.second == GemmToProfile::GEMM_1 ? tactics1 : tactics2;
934936
if (combo.second != gemm_to_profile && gemm_to_profile != GemmToProfile::LAYER)
935937
{
936938
t = 0; // Unneeded tactic, set to 0
@@ -947,7 +949,7 @@ class MixtureOfExpertsBenchmark : public ::benchmark::Fixture
947949
}
948950
}
949951

950-
mMoERunner.setTactic(tactics[tactic_idx1], tactics[tactic_idx2]);
952+
mMoERunner.setTactic(tactics1[tactic_idx1], tactics2[tactic_idx2]);
951953
mBestTacticGemm1 = tactic_idx1;
952954
mBestTacticGemm2 = tactic_idx2;
953955
return {tactic_idx1, tactic_idx2};
@@ -965,7 +967,7 @@ class MixtureOfExpertsBenchmark : public ::benchmark::Fixture
965967
auto expert_weights_size
966968
= gemm_to_profile == GemmToProfile::GEMM_1 ? mExpertWeight1Size : mExpertWeight2Size;
967969

968-
auto tactics = mMoERunner.getTactics()[tactic_idx];
970+
auto tactics = mMoERunner.getTactics(static_cast<MoeGemmId>(gemm_to_profile))[tactic_idx];
969971
if (static_cast<int>(gemm_to_profile) != static_cast<int>(mGemmProfilerBackend.mGemmToProfile))
970972
{
971973
throw std::runtime_error("Configuration mismatch between mGemmProfilerBackend and runMoEPermute");
@@ -1074,11 +1076,12 @@ void MixtureOfExpertsBenchmark<TypeTuple_>::runBenchmark(benchmark::State& state
10741076
}
10751077
if (LOG_LEVEL >= INFO)
10761078
{
1077-
auto tactics = mMoERunner.getTactics();
1078-
std::cout << "Selected tactic #1: " << tactic_idx1 << "/" << tactics.size() << "\n"
1079-
<< tactics[tactic_idx1].toString() << std::endl;
1080-
std::cout << "Selected tactic #2: " << tactic_idx2 << "/" << tactics.size() << "\n"
1081-
<< tactics[tactic_idx2].toString() << std::endl;
1079+
auto tactics1 = mMoERunner.getTactics(MoeGemmId::GEMM_1);
1080+
auto tactics2 = mMoERunner.getTactics(MoeGemmId::GEMM_2);
1081+
std::cout << "Selected tactic #1: " << tactic_idx1 << "/" << tactics1.size() << "\n"
1082+
<< tactics1[tactic_idx1].toString() << std::endl;
1083+
std::cout << "Selected tactic #2: " << tactic_idx2 << "/" << tactics2.size() << "\n"
1084+
<< tactics2[tactic_idx2].toString() << std::endl;
10821085
}
10831086
state.counters["tactic_idx1"] = tactic_idx1;
10841087
state.counters["tactic_idx2"] = tactic_idx2;

0 commit comments

Comments
 (0)