From 17a88b575eaa5dd861070b48a9610a6d5ea6865b Mon Sep 17 00:00:00 2001 From: slokesha Date: Fri, 8 Aug 2025 23:15:59 +0000 Subject: [PATCH] Fixed_test_plugin.py Signed-off-by: slokesha --- examples/test_plugin.py | 50 ++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/examples/test_plugin.py b/examples/test_plugin.py index 871f2114..47306189 100644 --- a/examples/test_plugin.py +++ b/examples/test_plugin.py @@ -2,28 +2,32 @@ from vllm import LLM, SamplingParams -os.environ["VLLM_SKIP_WARMUP"] = "true" -prompts = [ - "Hello, my name is", - "0.999 compares to 0.9 is ", - "The capital of France is", - "The future of AI is", -] -sampling_params = SamplingParams(temperature=0, max_tokens=50) -model = "/mnt/weka/llm/Qwen3/Qwen3-30B-A3B/" -# model = "/mnt/weka/llm/Qwen3/Qwen3-32B/" -# model = "meta-llama/Llama-3.2-1B-Instruct" -# model = "/mnt/weka/llm/DeepSeek-V2-Lite-Chat/" -# model = "/mnt/weka/data/mlperf_models/Mixtral-8x7B-Instruct-v0.1" -# model = "/mnt/weka/data/pytorch/llama3.1/Meta-Llama-3.1-8B/" -kwargs = {"tensor_parallel_size": 1} -if os.path.basename(model) in ["Qwen3-30B-A3B", "DeepSeek-V2-Lite-Chat"]: - kwargs["enable_expert_parallel"] = True -llm = LLM(model=model, max_model_len=4096, trust_remote_code=True, **kwargs) +def main(): + os.environ["VLLM_SKIP_WARMUP"] = "true" + prompts = [ + "Hello, my name is", + "0.999 compares to 0.9 is ", + "The capital of France is", + "The future of AI is", + ] + sampling_params = SamplingParams(temperature=0, max_tokens=50) + model = "/mnt/weka/llm/Qwen3/Qwen3-30B-A3B/" + # model = "/mnt/weka/llm/Qwen3/Qwen3-32B/" + # model = "meta-llama/Llama-3.2-1B-Instruct" + # model = "/mnt/weka/llm/DeepSeek-V2-Lite-Chat/" + # model = "/mnt/weka/data/mlperf_models/Mixtral-8x7B-Instruct-v0.1" + # model = "/mnt/weka/data/pytorch/llama3.1/Meta-Llama-3.1-8B/" + kwargs = {"tensor_parallel_size": 2} + if os.path.basename(model) in ["Qwen3-30B-A3B", "DeepSeek-V2-Lite-Chat"]: + kwargs["enable_expert_parallel"] = True + llm = LLM(model=model, max_model_len=4096, trust_remote_code=True, **kwargs) -outputs = llm.generate(prompts, sampling_params) + outputs = llm.generate(prompts, sampling_params) -for output in outputs: - prompt = output.prompt - generated_text = output.outputs[0].text - print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + for output in outputs: + prompt = output.prompt + generated_text = output.outputs[0].text + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + +if __name__ == "__main__": + main() \ No newline at end of file