quic · smedhe · Aug 5, 2025 · Aug 5, 2025 · Aug 11, 2025 · Aug 11, 2025
@@ -92,11 +92,9 @@
     "# Compile the model for provided compilation arguments\n",
     "# Please use platform SDK to Check num_cores for your card.\n",
     "\n",
-    "qeff_model.compile(\n",
-    "    num_cores=14,\n",
-    "    mxfp6=True,\n",
-    "    device_group=[0],\n",
-    ")"
+    "qeff_model.compile(num_cores=14, mxfp6=True)\n",
+    "\n",
+    "# the device_group arg is deprecated from compile and added in generate method"
    ]
   },
   {
@@ -117,7 +115,7 @@
     "# post compilation, we can print the latency stats for the kv models, We provide API to print token and Latency stats on Cloud AI 100\n",
     "# We need the compiled prefill and decode qpc to compute the token generated, This is based on Greedy Sampling Approach\n",
     "\n",
-    "qeff_model.generate(prompts=[\"My name is\"])"
+    "qeff_model.generate(prompts=[\"My name is\"], device_group=[0])"
    ]
   }
  ],

@@ -91,11 +91,8 @@
     "# Compile the model for provided compilation arguments\n",
     "# Please use platform SDK to Check num_cores for your card.\n",
     "\n",
-    "qeff_model.compile(\n",
-    "    num_cores=14,\n",
-    "    mxfp6=True,\n",
-    "    device_group=[0],\n",
-    ")"
+    "qeff_model.compile(num_cores=14, mxfp6=True)\n",
+    "# the device_group arg is deprecated from compile and added in generate method"
    ]
   },
   {
@@ -116,7 +113,7 @@
     "# post compilation, we can print the latency stats for the kv models, We provide API to print token and Latency stats on Cloud AI 100\n",
     "# We need the compiled prefill and decode qpc to compute the token generated, This is based on Greedy Sampling Approach\n",
     "\n",
-    "qeff_model.generate(prompts=[\"My name is\"])"
+    "qeff_model.generate(prompts=[\"My name is\"], device_group=[0])"
    ]
   }
  ],