NVIDIA
diff --git a/‎cpp/micro_benchmarks/mixtureOfExpertsBackendBenchmarkFixture.h‎
Lines changed: 12 additions & 9 deletions b/‎cpp/micro_benchmarks/mixtureOfExpertsBackendBenchmarkFixture.h‎
Lines changed: 12 additions & 9 deletions
@@ -833,7 +833,7 @@ class MixtureOfExpertsBenchmark : public ::benchmark::Fixture
     // Runs for 3 iterations or 1 second and picks the best option
     int pickBestTactic(MOEParallelismConfig parallelism_config, GemmToProfile gemm_to_profile)
     {
-        auto tactics = mMoERunner.getTactics();
+        auto tactics = mMoERunner.getTactics(static_cast<MoeGemmId>(gemm_to_profile));
         ::nvtx3::scoped_range nvtx(tensorrt_llm::common::nvtx::nextColor(),
             "Tactic Profiling GEMM " + std::to_string(static_cast<int>(gemm_to_profile)));
         // We save space by reusing the same workspace buffer for all tactics when doing full layer profiling. So we
@@ -925,12 +925,14 @@ class MixtureOfExpertsBenchmark : public ::benchmark::Fixture
     std::pair<int, int> setTactic(
         int tactic_idx1, int tactic_idx2, MOEParallelismConfig parallelism_config, GemmToProfile gemm_to_profile)
     {
-        auto tactics = mMoERunner.getTactics();
+        auto tactics1 = mMoERunner.getTactics(MoeGemmId::GEMM_1);
+        auto tactics2 = mMoERunner.getTactics(MoeGemmId::GEMM_2);
         std::vector<std::pair<std::reference_wrapper<int>, GemmToProfile>> tactics_to_profile{
             {tactic_idx1, GemmToProfile::GEMM_1}, {tactic_idx2, GemmToProfile::GEMM_2}};
         for (auto& combo : tactics_to_profile)
         {
             auto& t = combo.first.get();
+            auto& tactics = combo.second == GemmToProfile::GEMM_1 ? tactics1 : tactics2;
             if (combo.second != gemm_to_profile && gemm_to_profile != GemmToProfile::LAYER)
             {
                 t = 0; // Unneeded tactic, set to 0
@@ -947,7 +949,7 @@ class MixtureOfExpertsBenchmark : public ::benchmark::Fixture
             }
         }
 
-        mMoERunner.setTactic(tactics[tactic_idx1], tactics[tactic_idx2]);
+        mMoERunner.setTactic(tactics1[tactic_idx1], tactics2[tactic_idx2]);
         mBestTacticGemm1 = tactic_idx1;
         mBestTacticGemm2 = tactic_idx2;
         return {tactic_idx1, tactic_idx2};
@@ -965,7 +967,7 @@ class MixtureOfExpertsBenchmark : public ::benchmark::Fixture
             auto expert_weights_size
                 = gemm_to_profile == GemmToProfile::GEMM_1 ? mExpertWeight1Size : mExpertWeight2Size;
 
-            auto tactics = mMoERunner.getTactics()[tactic_idx];
+            auto tactics = mMoERunner.getTactics(static_cast<MoeGemmId>(gemm_to_profile))[tactic_idx];
             if (static_cast<int>(gemm_to_profile) != static_cast<int>(mGemmProfilerBackend.mGemmToProfile))
             {
                 throw std::runtime_error("Configuration mismatch between mGemmProfilerBackend and runMoEPermute");
@@ -1074,11 +1076,12 @@ void MixtureOfExpertsBenchmark<TypeTuple_>::runBenchmark(benchmark::State& state
     }
     if (LOG_LEVEL >= INFO)
     {
-        auto tactics = mMoERunner.getTactics();
-        std::cout << "Selected tactic #1: " << tactic_idx1 << "/" << tactics.size() << "\n"
-                  << tactics[tactic_idx1].toString() << std::endl;
-        std::cout << "Selected tactic #2: " << tactic_idx2 << "/" << tactics.size() << "\n"
-                  << tactics[tactic_idx2].toString() << std::endl;
+        auto tactics1 = mMoERunner.getTactics(MoeGemmId::GEMM_1);
+        auto tactics2 = mMoERunner.getTactics(MoeGemmId::GEMM_2);
+        std::cout << "Selected tactic #1: " << tactic_idx1 << "/" << tactics1.size() << "\n"
+                  << tactics1[tactic_idx1].toString() << std::endl;
+        std::cout << "Selected tactic #2: " << tactic_idx2 << "/" << tactics2.size() << "\n"
+                  << tactics2[tactic_idx2].toString() << std::endl;
     }
     state.counters["tactic_idx1"] = tactic_idx1;
     state.counters["tactic_idx2"] = tactic_idx2;