From 17a88b575eaa5dd861070b48a9610a6d5ea6865b Mon Sep 17 00:00:00 2001
From: slokesha <slokeshappa@habana.ai>
Date: Fri, 8 Aug 2025 23:15:59 +0000
Subject: [PATCH] Fixed_test_plugin.py

Signed-off-by: slokesha <slokeshappa@habana.ai>
---
 examples/test_plugin.py | 50 ++++++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/examples/test_plugin.py b/examples/test_plugin.py
index 871f2114..47306189 100644
--- a/examples/test_plugin.py
+++ b/examples/test_plugin.py
@@ -2,28 +2,32 @@
 
 from vllm import LLM, SamplingParams
 
-os.environ["VLLM_SKIP_WARMUP"] = "true"
-prompts = [
-    "Hello, my name is",
-    "0.999 compares to 0.9 is ",
-    "The capital of France is",
-    "The future of AI is",
-]
-sampling_params = SamplingParams(temperature=0, max_tokens=50)
-model = "/mnt/weka/llm/Qwen3/Qwen3-30B-A3B/"
-# model = "/mnt/weka/llm/Qwen3/Qwen3-32B/"
-# model = "meta-llama/Llama-3.2-1B-Instruct"
-# model = "/mnt/weka/llm/DeepSeek-V2-Lite-Chat/"
-# model = "/mnt/weka/data/mlperf_models/Mixtral-8x7B-Instruct-v0.1"
-# model = "/mnt/weka/data/pytorch/llama3.1/Meta-Llama-3.1-8B/"
-kwargs = {"tensor_parallel_size": 1}
-if os.path.basename(model) in ["Qwen3-30B-A3B", "DeepSeek-V2-Lite-Chat"]:
-    kwargs["enable_expert_parallel"] = True
-llm = LLM(model=model, max_model_len=4096, trust_remote_code=True, **kwargs)
+def main():
+    os.environ["VLLM_SKIP_WARMUP"] = "true"
+    prompts = [
+        "Hello, my name is",
+        "0.999 compares to 0.9 is ",
+        "The capital of France is",
+        "The future of AI is",
+    ]
+    sampling_params = SamplingParams(temperature=0, max_tokens=50)
+    model = "/mnt/weka/llm/Qwen3/Qwen3-30B-A3B/"
+    # model = "/mnt/weka/llm/Qwen3/Qwen3-32B/"
+    # model = "meta-llama/Llama-3.2-1B-Instruct"
+    # model = "/mnt/weka/llm/DeepSeek-V2-Lite-Chat/"
+    # model = "/mnt/weka/data/mlperf_models/Mixtral-8x7B-Instruct-v0.1"
+    # model = "/mnt/weka/data/pytorch/llama3.1/Meta-Llama-3.1-8B/"
+    kwargs = {"tensor_parallel_size": 2}
+    if os.path.basename(model) in ["Qwen3-30B-A3B", "DeepSeek-V2-Lite-Chat"]:
+        kwargs["enable_expert_parallel"] = True
+    llm = LLM(model=model, max_model_len=4096, trust_remote_code=True, **kwargs)
 
-outputs = llm.generate(prompts, sampling_params)
+    outputs = llm.generate(prompts, sampling_params)
 
-for output in outputs:
-    prompt = output.prompt
-    generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file