From f6aa8fe0a1ab0f95ee94d018f60d0f861a37844e Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 10:45:52 +0000 Subject: [PATCH 01/91] simple change --- examples/guidellm_example.py | 15 +++++++++++++-- src/automation/configs.py | 6 ++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index f09b1e6..f4cef02 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -1,3 +1,12 @@ +import os +import sys + +from clearml import Task +executable_path = os.path.dirname(sys.executable) +vllm_path = os.path.join(executable_path, "vllm") +print(f"The vllm path is: {vllm_path}") + +""" from automation.tasks import GuideLLMTask task = GuideLLMTask( @@ -14,6 +23,8 @@ data="prompt_tokens=512,generated_tokens=256", vllm_kwargs={"enable-chunked-prefill": True} ) +""" -task.execute_remotely("oneshot-a100x1") -#task.execute_locally() \ No newline at end of file +task = Task.init(project_name="alexandre_debug", task_name="test_guidellm_task") +task.execute_remotely("remote-upgrade-default") +#task.execute_locally() diff --git a/src/automation/configs.py b/src/automation/configs.py index 76dbe58..22b5abd 100644 --- a/src/automation/configs.py +++ b/src/automation/configs.py @@ -1,2 +1,4 @@ -DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_5:latest" -DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml" \ No newline at end of file +#DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_5:latest" +DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest" +#DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml" +DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081" From 18f41b1cc79ca752b13612494a4beb05f58e7840 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 10:50:18 +0000 Subject: [PATCH 02/91] test lmeval change --- examples/lmeval_example.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/lmeval_example.py b/examples/lmeval_example.py index 8910aa2..617ecca 100644 --- a/examples/lmeval_example.py +++ b/examples/lmeval_example.py @@ -9,5 +9,6 @@ batch_size="auto", ) -task.execute_remotely("oneshot-a100x1") -#task.execute_locally() \ No newline at end of file +#task.execute_remotely("oneshot-a100x1") +task.execute_remotely("remote-upgrade-default") +#task.execute_locally() From a425d4305e18009e933238f3c72104d0cd31a812 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 10:54:31 +0000 Subject: [PATCH 03/91] update branch --- examples/lmeval_example.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/lmeval_example.py b/examples/lmeval_example.py index 617ecca..7c8ee1e 100644 --- a/examples/lmeval_example.py +++ b/examples/lmeval_example.py @@ -3,6 +3,7 @@ task = LMEvalTask( project_name="alexandre_debug", task_name="test_lmeval_task", + branch = "update_guidellm", model_id="meta-llama/Llama-3.2-1B-Instruct", tasks="gsm8k", model_args="dtype=auto,max_model_len=8192", From 6fc29f4b8295aa3d5b934950babbd6d1ab44b48e Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 11:00:15 +0000 Subject: [PATCH 04/91] use main --- src/automation/tasks/guidellm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index 390012b..a8560a8 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -4,7 +4,7 @@ import os DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes -GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@http_backend" +GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git" class GuideLLMTask(BaseTask): From 956a12b41ab2dbdc0bfda97cbd9faecd255d220c Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 11:06:57 +0000 Subject: [PATCH 05/91] remove gcs --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9a0a63c..f0df931 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ ), install_requires=[ "clearml==1.14.4", - "google-cloud-storage>=1.13.2", + #"google-cloud-storage>=1.13.2", "datasets", "pyhocon", ], From 5e09fb72a66b11b14e4ff51f80b44ab4d7030d5b Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 11:23:37 +0000 Subject: [PATCH 06/91] readd gc --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f0df931..9a0a63c 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ ), install_requires=[ "clearml==1.14.4", - #"google-cloud-storage>=1.13.2", + "google-cloud-storage>=1.13.2", "datasets", "pyhocon", ], From 655f00e72bf1c4ceff2038bf6c997417d10d3a14 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 11:26:23 +0000 Subject: [PATCH 07/91] remove gc --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9a0a63c..f0df931 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ ), install_requires=[ "clearml==1.14.4", - "google-cloud-storage>=1.13.2", + #"google-cloud-storage>=1.13.2", "datasets", "pyhocon", ], From ba703b0c8efb6617e56691952fa88f37e8e21e81 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 11:30:55 +0000 Subject: [PATCH 08/91] back to guidellm --- examples/guidellm_example.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index f4cef02..8cf2061 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -1,12 +1,6 @@ import os import sys -from clearml import Task -executable_path = os.path.dirname(sys.executable) -vllm_path = os.path.join(executable_path, "vllm") -print(f"The vllm path is: {vllm_path}") - -""" from automation.tasks import GuideLLMTask task = GuideLLMTask( @@ -23,8 +17,11 @@ data="prompt_tokens=512,generated_tokens=256", vllm_kwargs={"enable-chunked-prefill": True} ) -""" -task = Task.init(project_name="alexandre_debug", task_name="test_guidellm_task") +#from clearml import Task +#task = Task.init(project_name="alexandre_debug", task_name="test_guidellm_task") task.execute_remotely("remote-upgrade-default") #task.execute_locally() +executable_path = os.path.dirname(sys.executable) +vllm_path = os.path.join(executable_path, "vllm") +print(f"The vllm path is: {vllm_path}") From b4deac89b1e5a079cb5c105f8e542d5e683fca59 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 11:44:41 +0000 Subject: [PATCH 09/91] simplified --- src/automation/tasks/scripts/guidellm_script.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 617b502..3932b84 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -9,6 +9,8 @@ def main(configurations=None): task = Task.current_task() + """ + args = task.get_parameters_as_dict(cast=True) if configurations is None: @@ -42,6 +44,7 @@ def main(configurations=None): # Resolve model_id model_id = resolve_model_id(args["Args"]["model"], clearml_model, force_download) + """ # Start vLLM server server_process, server_initialized, server_log = start_vllm_server( @@ -51,6 +54,7 @@ def main(configurations=None): args["Args"]["server_wait_time"], ) + """ if not server_initialized: kill_process_tree(server_process.pid) task.upload_artifact(name="vLLM server log", artifact_object=server_log) @@ -70,5 +74,7 @@ def main(configurations=None): task.upload_artifact(name="guidellm guidance report", artifact_object=report.to_json()) task.upload_artifact(name="vLLM server log", artifact_object=server_log) + """ + if __name__ == '__main__': - main() \ No newline at end of file + main() From 6ed6862ee10dd36025dc03e5738622966d2191d4 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 11:50:30 +0000 Subject: [PATCH 10/91] simple vllm --- src/automation/vllm/server.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py index 6036d65..6d59674 100644 --- a/src/automation/vllm/server.py +++ b/src/automation/vllm/server.py @@ -25,6 +25,7 @@ def start_vllm_server( parsed_target = urlparse(target) + """ server_command = [ f"{vllm_path}", "serve", model_id, @@ -32,6 +33,13 @@ def start_vllm_server( "--port", str(parsed_target.port), "--tensor-parallel-size", str(num_gpus) ] + """ + + server_command = [ + f"{vllm_path}", "serve", + "Qwen/Qwen2.5-1.5B-Instruct", + ] + subprocess_env = os.environ.copy() @@ -64,4 +72,4 @@ def start_vllm_server( if server_initialized: return server_process, True, server_log_file_name else: - return server_process, False, server_log_file_name \ No newline at end of file + return server_process, False, server_log_file_name From b3f55bc94e6aa055b6b5b958e91ff3fa92b1effa Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 12:19:25 +0000 Subject: [PATCH 11/91] skip vllm --- examples/guidellm_example.py | 3 ++- src/automation/tasks/scripts/guidellm_script.py | 5 +---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index 8cf2061..f632c33 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -11,7 +11,8 @@ backend="aiohttp_server", GUIDELLM__MAX_CONCURRENCY=256, GUIDELLM__REQUEST_TIMEOUT=21600, - target="http://localhost:8000/v1", + #target="http://localhost:8000/v1", + target="http://192.130.2.20:8000/v1", data_type="emulated", max_seconds=30, data="prompt_tokens=512,generated_tokens=256", diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 3932b84..d1d7330 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -9,8 +9,6 @@ def main(configurations=None): task = Task.current_task() - """ - args = task.get_parameters_as_dict(cast=True) if configurations is None: @@ -54,12 +52,12 @@ def main(configurations=None): args["Args"]["server_wait_time"], ) - """ if not server_initialized: kill_process_tree(server_process.pid) task.upload_artifact(name="vLLM server log", artifact_object=server_log) raise AssertionError("Server failed to intialize") + """ # Parse through environment variables for k, v in environment_args.items(): os.environ[k] = str(v) @@ -74,7 +72,6 @@ def main(configurations=None): task.upload_artifact(name="guidellm guidance report", artifact_object=report.to_json()) task.upload_artifact(name="vLLM server log", artifact_object=server_log) - """ if __name__ == '__main__': main() From 3a709da6f7e8b606f3a71db46a97c4b1eb47f689 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 13:07:01 +0000 Subject: [PATCH 12/91] pause vllm --- .../tasks/scripts/guidellm_script.py | 110 +++++++++++++----- 1 file changed, 79 insertions(+), 31 deletions(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index d1d7330..aec6598 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -1,35 +1,44 @@ - import os from clearml import Task from automation.utils import resolve_model_id, cast_args, kill_process_tree from automation.vllm import start_vllm_server from pyhocon import ConfigFactory - -def main(configurations=None): +def main(): task = Task.current_task() args = task.get_parameters_as_dict(cast=True) - if configurations is None: - guidellm_args = ConfigFactory.parse_string(task.get_configuration_object("GuideLLM")) - - environment_args = task.get_configuration_object("environment") - if environment_args is None: - environment_args = {} - else: - environment_args = ConfigFactory.parse_string(environment_args) + raw_config = task.get_configuration_object("GuideLLM") + if raw_config is None: + print("[DEBUG] `GuideLLM` config not found in configuration — checking parameters as fallback") + raw_config = task.get_parameters_as_dict().get("GuideLLM") + if raw_config is None: + raise RuntimeError("GuideLLM config is None. This likely means `get_configurations()` is not returning it or it's not passed via parameters.") + guidellm_args = ConfigFactory.from_dict(raw_config) + else: + guidellm_args = ConfigFactory.parse_string(raw_config) + + def clean_hocon_value(v): + if isinstance(v, str) and v.startswith('"') and v.endswith('"'): + return v[1:-1] + return v + + guidellm_args = {k: clean_hocon_value(v) for k, v in guidellm_args.items()} + + print("[DEBUG] Guidellm_Args:", guidellm_args) + + environment_args = task.get_configuration_object("environment") + if environment_args is None: + environment_args = {} + else: + environment_args = ConfigFactory.parse_string(environment_args) - vllm_args = task.get_configuration_object("vLLM") - if vllm_args is None: - vllm_args = {} - else: - vllm_args = ConfigFactory.parse_string(vllm_args) + vllm_args = task.get_configuration_object("vLLM") + if vllm_args is None: + vllm_args = {} else: - guidellm_args = configurations.get("GuideLLM", {}) - environment_args = configurations.get("environment", {}) - vllm_args = configurations.get("vLLM", {}) - + vllm_args = ConfigFactory.parse_string(vllm_args) clearml_model = args["Args"]["clearml_model"] if isinstance(clearml_model, str): @@ -39,9 +48,11 @@ def main(configurations=None): if isinstance(force_download, str): force_download = force_download.lower() == "true" - # Resolve model_id model_id = resolve_model_id(args["Args"]["model"], clearml_model, force_download) + + gpu_count = int(guidellm_args.get("gpu_count", 1)) + """ # Start vLLM server @@ -50,28 +61,65 @@ def main(configurations=None): model_id, guidellm_args["target"], args["Args"]["server_wait_time"], + gpu_count, ) if not server_initialized: kill_process_tree(server_process.pid) task.upload_artifact(name="vLLM server log", artifact_object=server_log) - raise AssertionError("Server failed to intialize") - + raise AssertionError("Server failed to initialize") """ + # Parse through environment variables for k, v in environment_args.items(): os.environ[k] = str(v) guidellm_args["model"] = model_id - from guidellm import generate_benchmark_report - guidellm_args = cast_args(guidellm_args, generate_benchmark_report) - report = generate_benchmark_report(**guidellm_args) - kill_process_tree(server_process.pid) - - task.upload_artifact(name="guidellm guidance report", artifact_object=report.to_json()) - task.upload_artifact(name="vLLM server log", artifact_object=server_log) - + import json + import asyncio + from pathlib import Path + from guidellm.benchmark import benchmark_generative_text + + # Ensure output_path is set and consistent + output_path = Path(guidellm_args.get("output_path", "guidellm-output.json")) + guidellm_args["output_path"] = str(output_path) + + print("[DEBUG] Calling benchmark_generative_text with:") + print(json.dumps(guidellm_args, indent=2)) + + try: + asyncio.run( + benchmark_generative_text( + target=guidellm_args["target"], + backend_type=guidellm_args.get("backend_type", "openai_http"), + backend_args=guidellm_args.get("backend_args", None), + model=guidellm_args.get("model"), + processor=guidellm_args.get("processor", None), + processor_args=guidellm_args.get("processor_args", None), + data=guidellm_args["data"], + data_args=guidellm_args.get("data_args", None), + data_sampler=guidellm_args.get("data_sampler", None), + rate_type=guidellm_args["rate_type"], + rate=guidellm_args.get("rate", None), + max_seconds=guidellm_args.get("max_seconds", None), + max_requests=guidellm_args.get("max_requests", None), + warmup_percent=guidellm_args.get("warmup_percent", None), + cooldown_percent=guidellm_args.get("cooldown_percent", None), + show_progress=not guidellm_args.get("disable_progress", False), + show_progress_scheduler_stats=guidellm_args.get("display_scheduler_stats", False), + output_console=not guidellm_args.get("disable_console_outputs", False), + output_path=output_path, + output_extras=guidellm_args.get("output_extras", None), + output_sampling=guidellm_args.get("output_sampling", None), + random_seed=guidellm_args.get("random_seed", 42), + ) + ) + + finally: + task.upload_artifact(name="guidellm guidance report", artifact_object=output_path) + task.upload_artifact(name="vLLM server log", artifact_object=server_log) + kill_process_tree(server_process.pid) if __name__ == '__main__': main() From 02cac57e2410372147ab297d9bedc40376782134 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 13:14:57 +0000 Subject: [PATCH 13/91] update benchmark report --- src/automation/tasks/scripts/guidellm_script.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index aec6598..242e360 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -79,7 +79,8 @@ def clean_hocon_value(v): import json import asyncio from pathlib import Path - from guidellm.benchmark import benchmark_generative_text + #from guidellm.benchmark import benchmark_generative_text + from guidellm.benchmark.output import GenerativeBenchmarksReport # Ensure output_path is set and consistent output_path = Path(guidellm_args.get("output_path", "guidellm-output.json")) @@ -88,6 +89,7 @@ def clean_hocon_value(v): print("[DEBUG] Calling benchmark_generative_text with:") print(json.dumps(guidellm_args, indent=2)) + """ try: asyncio.run( benchmark_generative_text( @@ -120,6 +122,7 @@ def clean_hocon_value(v): task.upload_artifact(name="guidellm guidance report", artifact_object=output_path) task.upload_artifact(name="vLLM server log", artifact_object=server_log) kill_process_tree(server_process.pid) + """ if __name__ == '__main__': main() From a85bb4fd8dd662754f8aab3e3fe21e69fb947148 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 13:18:51 +0000 Subject: [PATCH 14/91] update ip --- examples/guidellm_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index f632c33..b2b5a91 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -12,7 +12,7 @@ GUIDELLM__MAX_CONCURRENCY=256, GUIDELLM__REQUEST_TIMEOUT=21600, #target="http://localhost:8000/v1", - target="http://192.130.2.20:8000/v1", + target="http://192.130.2.29:8000/v1", data_type="emulated", max_seconds=30, data="prompt_tokens=512,generated_tokens=256", From c3af0cf01c6bf61a87b0e59c4418267eb5d01ab1 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 13:25:37 +0000 Subject: [PATCH 15/91] update branch --- examples/guidellm_example.py | 1 + src/automation/tasks/base_task.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index b2b5a91..99f8775 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -17,6 +17,7 @@ max_seconds=30, data="prompt_tokens=512,generated_tokens=256", vllm_kwargs={"enable-chunked-prefill": True} + branch = "update_guidellm" ) #from clearml import Task diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py index d886599..4c74e85 100644 --- a/src/automation/tasks/base_task.py +++ b/src/automation/tasks/base_task.py @@ -14,6 +14,7 @@ def __init__( project_name: str, task_name: str, docker_image: str, + branch: str = "main" packages: Optional[Sequence[str]]=None, task_type: str="training", ): @@ -29,6 +30,7 @@ def __init__( self.packages = packages self.task_type = task_type self.task = None + self.branch= branch self.script_path = None self.callable_artifacts = None @@ -91,7 +93,7 @@ def create_task(self): add_task_init_call=True, script=self.script_path, repo="https://github.com/neuralmagic/research.git", - branch="main", + branch=self.branch, ) self.task.output_uri = DEFAULT_OUTPUT_URI self.set_arguments() From ede7482fad3497af2e0a2b4d2fb35abd0ac8ef10 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 13:27:06 +0000 Subject: [PATCH 16/91] added base task param --- src/automation/tasks/base_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py index 4c74e85..010e4a2 100644 --- a/src/automation/tasks/base_task.py +++ b/src/automation/tasks/base_task.py @@ -14,7 +14,7 @@ def __init__( project_name: str, task_name: str, docker_image: str, - branch: str = "main" + branch: str = "main", packages: Optional[Sequence[str]]=None, task_type: str="training", ): From 87496ea0c6a20bab32cfc310d0326564c0841524 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 13:52:29 +0000 Subject: [PATCH 17/91] retry branch name --- examples/guidellm_example.py | 2 +- src/automation/tasks/base_task.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index 99f8775..e67def3 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -16,8 +16,8 @@ data_type="emulated", max_seconds=30, data="prompt_tokens=512,generated_tokens=256", + branch = "update_guidellm", vllm_kwargs={"enable-chunked-prefill": True} - branch = "update_guidellm" ) #from clearml import Task diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py index 010e4a2..95e07ba 100644 --- a/src/automation/tasks/base_task.py +++ b/src/automation/tasks/base_task.py @@ -14,7 +14,7 @@ def __init__( project_name: str, task_name: str, docker_image: str, - branch: str = "main", + branch: str, packages: Optional[Sequence[str]]=None, task_type: str="training", ): From b64ffd8eee841922a3916c6ff28d695286b3b843 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 13:54:09 +0000 Subject: [PATCH 18/91] repo branch --- src/automation/tasks/guidellm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index a8560a8..10f3f80 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -17,6 +17,7 @@ class GuideLLMTask(BaseTask): def __init__( self, project_name: str, + branch: str, task_name: str, model: str, server_wait_time: int=DEFAULT_SERVER_WAIT_TIME, @@ -52,6 +53,7 @@ def __init__( docker_image=docker_image, packages=packages, task_type=task_type, + branch = branch, ) # Check for conflicts in configs and constructor arguments From 7dc5e48c84cdc83a465c8efd667a0aef2c92a1a8 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 13:56:07 +0000 Subject: [PATCH 19/91] readd branch --- examples/guidellm_example.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index e67def3..3e078e9 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -16,7 +16,8 @@ data_type="emulated", max_seconds=30, data="prompt_tokens=512,generated_tokens=256", - branch = "update_guidellm", + #branch = "update_guidellm", + #branch = "update_guidellm", vllm_kwargs={"enable-chunked-prefill": True} ) From 2d05c640c4bd8e65dad63eda0ca862a6551834bb Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 14:00:11 +0000 Subject: [PATCH 20/91] branch in base task --- src/automation/tasks/guidellm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index 10f3f80..f1a8904 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -17,7 +17,7 @@ class GuideLLMTask(BaseTask): def __init__( self, project_name: str, - branch: str, + #branch: Optional[str], task_name: str, model: str, server_wait_time: int=DEFAULT_SERVER_WAIT_TIME, @@ -53,7 +53,7 @@ def __init__( docker_image=docker_image, packages=packages, task_type=task_type, - branch = branch, + #branch = branch, ) # Check for conflicts in configs and constructor arguments From 60e6e9ed3ca8bf8a0f93dedbc536fdb00cbff0cb Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 14:01:49 +0000 Subject: [PATCH 21/91] optional branch --- src/automation/tasks/guidellm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index f1a8904..461c935 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -17,7 +17,7 @@ class GuideLLMTask(BaseTask): def __init__( self, project_name: str, - #branch: Optional[str], + branch: str="main" , task_name: str, model: str, server_wait_time: int=DEFAULT_SERVER_WAIT_TIME, @@ -53,7 +53,7 @@ def __init__( docker_image=docker_image, packages=packages, task_type=task_type, - #branch = branch, + branch = branch, ) # Check for conflicts in configs and constructor arguments From ee4d7c94ccfa0745870f5fa4dda0da70ea465d3f Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 14:02:44 +0000 Subject: [PATCH 22/91] add branch choice --- src/automation/tasks/base_task.py | 6 +++--- src/automation/tasks/guidellm.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py index 95e07ba..cf70ce2 100644 --- a/src/automation/tasks/base_task.py +++ b/src/automation/tasks/base_task.py @@ -14,7 +14,7 @@ def __init__( project_name: str, task_name: str, docker_image: str, - branch: str, + branch: Optional[str] = "main", packages: Optional[Sequence[str]]=None, task_type: str="training", ): @@ -52,8 +52,8 @@ def process_config(self, config): return yaml.safe_load(open(STANDARD_CONFIGS[config], "r")) elif os.path.exists(config): return yaml.safe_load(open(config, "r")) - elif os.path.exists(os.path.join("..", "standatrds", config)): - return yaml.safe_load(open(os.path.join("..", "standatrds", config)), "r") + elif os.path.exists(os.path.join("..", "standards", config)): + return yaml.safe_load(open(os.path.join("..", "standards", config)), "r") else: return yaml.safe_load(config) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index 461c935..17af41f 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -17,13 +17,13 @@ class GuideLLMTask(BaseTask): def __init__( self, project_name: str, - branch: str="main" , task_name: str, model: str, server_wait_time: int=DEFAULT_SERVER_WAIT_TIME, docker_image: str=DEFAULT_DOCKER_IMAGE, packages: Optional[Sequence[str]]=None, clearml_model: bool=False, + branch: str="main", task_type: str="training", vllm_kwargs: dict={}, target: str="http://localhost:8000/v1", From 998a8bcadff0a177b4dcef46fcb1b3754851e537 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 14:04:25 +0000 Subject: [PATCH 23/91] include benchmark --- src/automation/tasks/scripts/guidellm_script.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 242e360..822e5c3 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -89,6 +89,8 @@ def clean_hocon_value(v): print("[DEBUG] Calling benchmark_generative_text with:") print(json.dumps(guidellm_args, indent=2)) + GenerativeBenchmarksReport() + """ try: asyncio.run( From 6944cb496f51ba3f408874205d1c838db084353c Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 14:12:40 +0000 Subject: [PATCH 24/91] refactor default --- examples/guidellm_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index 3e078e9..85a09c8 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -16,7 +16,7 @@ data_type="emulated", max_seconds=30, data="prompt_tokens=512,generated_tokens=256", - #branch = "update_guidellm", + branch = "update_guidellm", #branch = "update_guidellm", vllm_kwargs={"enable-chunked-prefill": True} ) From 6e4a5d59ed33bd030f7faa0f08a3a138d21f6def Mon Sep 17 00:00:00 2001 From: chibu <> Date: Fri, 27 Jun 2025 14:42:49 +0000 Subject: [PATCH 25/91] moved generate text --- src/automation/tasks/scripts/guidellm_script.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 822e5c3..6a35c40 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -89,9 +89,9 @@ def clean_hocon_value(v): print("[DEBUG] Calling benchmark_generative_text with:") print(json.dumps(guidellm_args, indent=2)) - GenerativeBenchmarksReport() + #GenerativeBenchmarksReport() + from guidellm.benchmark.entrypoints import benchmark_generative_text - """ try: asyncio.run( benchmark_generative_text( @@ -124,7 +124,6 @@ def clean_hocon_value(v): task.upload_artifact(name="guidellm guidance report", artifact_object=output_path) task.upload_artifact(name="vLLM server log", artifact_object=server_log) kill_process_tree(server_process.pid) - """ if __name__ == '__main__': main() From 41f3f217a8e322cb713a890d69533deb912603fe Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 12:21:21 +0000 Subject: [PATCH 26/91] test --- examples/guidellm_example.py | 6 +++--- src/automation/configs.py | 1 + src/automation/tasks/base_task.py | 4 ++-- src/automation/tasks/guidellm.py | 4 ++-- src/automation/tasks/scripts/guidellm_script.py | 3 +++ 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index 85a09c8..ff6de48 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -6,18 +6,18 @@ task = GuideLLMTask( project_name="alexandre_debug", task_name="test_guidellm_task", - model="meta-llama/Llama-3.2-1B-Instruct", + #model="meta-llama/Llama-3.2-1B-Instruct", + model="Qwen/Qwen2.5-1.5B-Instruct", rate_type="throughput", backend="aiohttp_server", GUIDELLM__MAX_CONCURRENCY=256, GUIDELLM__REQUEST_TIMEOUT=21600, #target="http://localhost:8000/v1", - target="http://192.130.2.29:8000/v1", + target="http://fed73cc1-us-east.lb.appdomain.cloud/v1", data_type="emulated", max_seconds=30, data="prompt_tokens=512,generated_tokens=256", branch = "update_guidellm", - #branch = "update_guidellm", vllm_kwargs={"enable-chunked-prefill": True} ) diff --git a/src/automation/configs.py b/src/automation/configs.py index 22b5abd..5c4bf22 100644 --- a/src/automation/configs.py +++ b/src/automation/configs.py @@ -2,3 +2,4 @@ DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest" #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml" DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081" +DEFAULT_RESEARCH_BRANCH = "main" diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py index cf70ce2..9e0fa7a 100644 --- a/src/automation/tasks/base_task.py +++ b/src/automation/tasks/base_task.py @@ -1,6 +1,6 @@ from clearml import Task from typing import Sequence, Optional -from automation.configs import DEFAULT_OUTPUT_URI +from automation.configs import DEFAULT_OUTPUT_URI, DEFAULT_RESEARCH_BRANCH from automation.standards import STANDARD_CONFIGS import yaml import os @@ -14,7 +14,7 @@ def __init__( project_name: str, task_name: str, docker_image: str, - branch: Optional[str] = "main", + branch: Optional[str] = DEFAULT_RESEARCH_BRANCH, packages: Optional[Sequence[str]]=None, task_type: str="training", ): diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index 17af41f..a85eb83 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -1,5 +1,5 @@ from automation.tasks import BaseTask -from automation.configs import DEFAULT_DOCKER_IMAGE +from automation.configs import DEFAULT_DOCKER_IMAGE, DEFAULT_RESEARCH_BRANCH from typing import Optional, Sequence import os @@ -23,7 +23,7 @@ def __init__( docker_image: str=DEFAULT_DOCKER_IMAGE, packages: Optional[Sequence[str]]=None, clearml_model: bool=False, - branch: str="main", + branch: str= DEFAULT_RESEARCH_BRANCH, task_type: str="training", vllm_kwargs: dict={}, target: str="http://localhost:8000/v1", diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 6a35c40..0c34137 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -92,6 +92,9 @@ def clean_hocon_value(v): #GenerativeBenchmarksReport() from guidellm.benchmark.entrypoints import benchmark_generative_text + import time + time.sleep(300) + try: asyncio.run( benchmark_generative_text( From 850fd213be8fcd217cbfb995064b95dceb723168 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 12:31:02 +0000 Subject: [PATCH 27/91] add debug --- src/automation/tasks/scripts/guidellm_script.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 0c34137..bdeb6a2 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -90,10 +90,17 @@ def clean_hocon_value(v): print(json.dumps(guidellm_args, indent=2)) #GenerativeBenchmarksReport() + import os + import sys + executable_path = os.path.dirname(sys.executable) + vllm_path = os.path.join(executable_path, "vllm") + print(f"The vllm path is: {vllm_path}") + from guidellm.benchmark.entrypoints import benchmark_generative_text + from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios - import time - time.sleep(300) + #import time + #time.sleep(300) try: asyncio.run( @@ -124,6 +131,8 @@ def clean_hocon_value(v): ) finally: + import time + time.sleep(300) task.upload_artifact(name="guidellm guidance report", artifact_object=output_path) task.upload_artifact(name="vLLM server log", artifact_object=server_log) kill_process_tree(server_process.pid) From 5e876746b3e82f7946217f4680dc87ae1a06b622 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 12:38:58 +0000 Subject: [PATCH 28/91] add os lib --- src/automation/tasks/scripts/guidellm_script.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index bdeb6a2..d62e97b 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -1,4 +1,5 @@ import os +import sys from clearml import Task from automation.utils import resolve_model_id, cast_args, kill_process_tree from automation.vllm import start_vllm_server @@ -90,8 +91,6 @@ def clean_hocon_value(v): print(json.dumps(guidellm_args, indent=2)) #GenerativeBenchmarksReport() - import os - import sys executable_path = os.path.dirname(sys.executable) vllm_path = os.path.join(executable_path, "vllm") print(f"The vllm path is: {vllm_path}") From c9b63a822f91b6d9313e76713ae2f87a0bfbc451 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 13:13:42 +0000 Subject: [PATCH 29/91] use default scenario --- .../tasks/scripts/guidellm_script.py | 23 +------------------ 1 file changed, 1 insertion(+), 22 deletions(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index d62e97b..9825e28 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -104,28 +104,7 @@ def clean_hocon_value(v): try: asyncio.run( benchmark_generative_text( - target=guidellm_args["target"], - backend_type=guidellm_args.get("backend_type", "openai_http"), - backend_args=guidellm_args.get("backend_args", None), - model=guidellm_args.get("model"), - processor=guidellm_args.get("processor", None), - processor_args=guidellm_args.get("processor_args", None), - data=guidellm_args["data"], - data_args=guidellm_args.get("data_args", None), - data_sampler=guidellm_args.get("data_sampler", None), - rate_type=guidellm_args["rate_type"], - rate=guidellm_args.get("rate", None), - max_seconds=guidellm_args.get("max_seconds", None), - max_requests=guidellm_args.get("max_requests", None), - warmup_percent=guidellm_args.get("warmup_percent", None), - cooldown_percent=guidellm_args.get("cooldown_percent", None), - show_progress=not guidellm_args.get("disable_progress", False), - show_progress_scheduler_stats=guidellm_args.get("display_scheduler_stats", False), - output_console=not guidellm_args.get("disable_console_outputs", False), - output_path=output_path, - output_extras=guidellm_args.get("output_extras", None), - output_sampling=guidellm_args.get("output_sampling", None), - random_seed=guidellm_args.get("random_seed", 42), + GenerativeTextScenario, ) ) From 4d68ea81e9cdb5b6e1fedc8ec04664d7a5438981 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 13:28:50 +0000 Subject: [PATCH 30/91] benchmark with scenario --- src/automation/tasks/scripts/guidellm_script.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 9825e28..05c7749 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -95,7 +95,7 @@ def clean_hocon_value(v): vllm_path = os.path.join(executable_path, "vllm") print(f"The vllm path is: {vllm_path}") - from guidellm.benchmark.entrypoints import benchmark_generative_text + from guidellm.benchmark.entrypoints import benchmark_generative_text, benchmark_with_scenario from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios #import time @@ -103,7 +103,7 @@ def clean_hocon_value(v): try: asyncio.run( - benchmark_generative_text( + benchmark_with_scenario( GenerativeTextScenario, ) ) From 0f07b28009aeca981d5155d7bfa22ae80e388d27 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 14:40:07 +0000 Subject: [PATCH 31/91] overlap with guidellm vars --- src/automation/tasks/scripts/guidellm_script.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 05c7749..967c5b5 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -100,11 +100,17 @@ def clean_hocon_value(v): #import time #time.sleep(300) + current_scenario = GenerativeTextScenario + overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args) + for element in overlap_keys: + element_field_info = current_scenario.model_fields[element] + element_field_info.default = guidellm_args[element] + current_scenario.model_fields[element] = element_field_info try: asyncio.run( benchmark_with_scenario( - GenerativeTextScenario, + current_scenario, ) ) From 6a6705014d77e034bbace447ae3e35f9cdc4ddf0 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 14:52:03 +0000 Subject: [PATCH 32/91] check model and target --- src/automation/tasks/scripts/guidellm_script.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 967c5b5..ef956a9 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -101,12 +101,17 @@ def clean_hocon_value(v): #import time #time.sleep(300) current_scenario = GenerativeTextScenario + print(current_scenario.model_fields["target"]) + print(current_scenario.model_fields["model"]) overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args) for element in overlap_keys: element_field_info = current_scenario.model_fields[element] element_field_info.default = guidellm_args[element] current_scenario.model_fields[element] = element_field_info + print(current_scenario.model_fields["target"]) + print(current_scenario.model_fields["model"]) + try: asyncio.run( benchmark_with_scenario( From 72094b4720e92a8965875bce8e9f83365d143e38 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 15:07:35 +0000 Subject: [PATCH 33/91] add debugs --- src/automation/tasks/scripts/guidellm_script.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index ef956a9..e6b8a5d 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -105,9 +105,11 @@ def clean_hocon_value(v): print(current_scenario.model_fields["model"]) overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args) for element in overlap_keys: + print(element) element_field_info = current_scenario.model_fields[element] element_field_info.default = guidellm_args[element] current_scenario.model_fields[element] = element_field_info + print(element_field_info.annotation) print(current_scenario.model_fields["target"]) print(current_scenario.model_fields["model"]) From 10180a38d9baed7c87b7d325ca6f73a991b3a870 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 15:25:24 +0000 Subject: [PATCH 34/91] list keys that overlap --- src/automation/tasks/scripts/guidellm_script.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index e6b8a5d..69f0065 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -105,11 +105,12 @@ def clean_hocon_value(v): print(current_scenario.model_fields["model"]) overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args) for element in overlap_keys: - print(element) + #print(element) element_field_info = current_scenario.model_fields[element] element_field_info.default = guidellm_args[element] current_scenario.model_fields[element] = element_field_info - print(element_field_info.annotation) + #print(element_field_info.annotation) + print(overlap_keys) print(current_scenario.model_fields["target"]) print(current_scenario.model_fields["model"]) From 9191f13210fe732ae9f5e0133bab72a74621d619 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 18:17:30 +0000 Subject: [PATCH 35/91] only replace model --- src/automation/tasks/scripts/guidellm_script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 69f0065..06f5e9d 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -104,6 +104,7 @@ def clean_hocon_value(v): print(current_scenario.model_fields["target"]) print(current_scenario.model_fields["model"]) overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args) + overlap_keys = ["model"] for element in overlap_keys: #print(element) element_field_info = current_scenario.model_fields[element] From 1b0e4a4ec765cf86bd80c290f35a7a9c168648fe Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 22:56:33 +0000 Subject: [PATCH 36/91] update with scenario --- examples/guidellm_example.py | 4 ++-- src/automation/tasks/scripts/guidellm_script.py | 15 ++++++++++----- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index ff6de48..6fc65c1 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -1,5 +1,3 @@ -import os -import sys from automation.tasks import GuideLLMTask @@ -25,6 +23,8 @@ #task = Task.init(project_name="alexandre_debug", task_name="test_guidellm_task") task.execute_remotely("remote-upgrade-default") #task.execute_locally() +import os +import sys executable_path = os.path.dirname(sys.executable) vllm_path = os.path.join(executable_path, "vllm") print(f"The vllm path is: {vllm_path}") diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 06f5e9d..6833b88 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -82,6 +82,8 @@ def clean_hocon_value(v): from pathlib import Path #from guidellm.benchmark import benchmark_generative_text from guidellm.benchmark.output import GenerativeBenchmarksReport + from guidellm.benchmark.entrypoints import benchmark_generative_text, benchmark_with_scenario + from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios # Ensure output_path is set and consistent output_path = Path(guidellm_args.get("output_path", "guidellm-output.json")) @@ -95,16 +97,16 @@ def clean_hocon_value(v): vllm_path = os.path.join(executable_path, "vllm") print(f"The vllm path is: {vllm_path}") - from guidellm.benchmark.entrypoints import benchmark_generative_text, benchmark_with_scenario - from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios + current_scenario = GenerativeTextScenario.from_builtin("chat", dict(guidellm_args)) #import time #time.sleep(300) + """ current_scenario = GenerativeTextScenario print(current_scenario.model_fields["target"]) print(current_scenario.model_fields["model"]) overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args) - overlap_keys = ["model"] + #overlap_keys = ["model"] for element in overlap_keys: #print(element) element_field_info = current_scenario.model_fields[element] @@ -116,16 +118,19 @@ def clean_hocon_value(v): print(current_scenario.model_fields["target"]) print(current_scenario.model_fields["model"]) + current_scenario = GenerativeTextScenario + """ + try: asyncio.run( benchmark_with_scenario( current_scenario, + output_path= output_path, + output_extras= None ) ) finally: - import time - time.sleep(300) task.upload_artifact(name="guidellm guidance report", artifact_object=output_path) task.upload_artifact(name="vLLM server log", artifact_object=server_log) kill_process_tree(server_process.pid) From 7515a617f4845f64d00c60d7e9eff53e343f4946 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 23:23:39 +0000 Subject: [PATCH 37/91] readd default scenario --- src/automation/tasks/scripts/guidellm_script.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 6833b88..7e7f58d 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -97,7 +97,9 @@ def clean_hocon_value(v): vllm_path = os.path.join(executable_path, "vllm") print(f"The vllm path is: {vllm_path}") - current_scenario = GenerativeTextScenario.from_builtin("chat", dict(guidellm_args)) + default_scenario = get_builtin_scenarios()[0] + + current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args)) #import time #time.sleep(300) From e6318f503197e385b99d960327b93f2d13684fc9 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 23:28:20 +0000 Subject: [PATCH 38/91] readd default scenario --- src/automation/tasks/scripts/guidellm_script.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 7e7f58d..4453c1b 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -97,6 +97,8 @@ def clean_hocon_value(v): vllm_path = os.path.join(executable_path, "vllm") print(f"The vllm path is: {vllm_path}") + + print(get_builtin_scenarios()) default_scenario = get_builtin_scenarios()[0] current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args)) From 9f61d6e594b42babd8f92dc70321075f8a6f0672 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Mon, 30 Jun 2025 23:29:46 +0000 Subject: [PATCH 39/91] pin to main --- src/automation/tasks/guidellm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index a85eb83..2ca454c 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -4,7 +4,7 @@ import os DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes -GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git" +GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main" class GuideLLMTask(BaseTask): From 8c8c23e11000bbfe976761a7cb2da2f4994eb9d4 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 10:31:49 +0000 Subject: [PATCH 40/91] readd vllm server --- src/automation/tasks/scripts/guidellm_script.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 4453c1b..7b6e3bb 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -54,8 +54,6 @@ def clean_hocon_value(v): gpu_count = int(guidellm_args.get("gpu_count", 1)) - """ - # Start vLLM server server_process, server_initialized, server_log = start_vllm_server( vllm_args, @@ -69,7 +67,6 @@ def clean_hocon_value(v): kill_process_tree(server_process.pid) task.upload_artifact(name="vLLM server log", artifact_object=server_log) raise AssertionError("Server failed to initialize") - """ # Parse through environment variables for k, v in environment_args.items(): From ec725d1377c589f8b5be50a86c5b5860cbd830b7 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 11:18:31 +0000 Subject: [PATCH 41/91] updated vllm server --- src/automation/vllm/server.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py index 6d59674..75c331d 100644 --- a/src/automation/vllm/server.py +++ b/src/automation/vllm/server.py @@ -14,32 +14,29 @@ def start_vllm_server( vllm_args, model_id, target, - server_wait_time, + server_wait_time, + gpu_count, ): task = Task.current_task() executable_path = os.path.dirname(sys.executable) vllm_path = os.path.join(executable_path, "vllm") - num_gpus = torch.cuda.device_count() + available_gpus = list(range(torch.cuda.device_count())) + selected_gpus = available_gpus[:gpu_count] + + subprocess_env = os.environ.copy() + subprocess_env["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in selected_gpus) parsed_target = urlparse(target) - """ server_command = [ f"{vllm_path}", "serve", model_id, "--host", parsed_target.hostname, "--port", str(parsed_target.port), - "--tensor-parallel-size", str(num_gpus) + "--tensor-parallel-size", str(gpu_count), ] - """ - - server_command = [ - f"{vllm_path}", "serve", - "Qwen/Qwen2.5-1.5B-Instruct", - ] - subprocess_env = os.environ.copy() @@ -53,6 +50,7 @@ def start_vllm_server( server_log_file_name = f"{SERVER_LOG_PREFIX}_{task.id}.txt" server_log_file = open(server_log_file_name, "w") + print("Server command:", " ".join(server_command)) server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env) delay = 5 @@ -60,6 +58,7 @@ def start_vllm_server( for _ in range(server_wait_time // delay): try: response = requests.get(target + "/models") + print(f"response: {response}") if response.status_code == 200: print("Server initialized") server_initialized = True @@ -72,4 +71,4 @@ def start_vllm_server( if server_initialized: return server_process, True, server_log_file_name else: - return server_process, False, server_log_file_name + return server_process, False, server_log_file_name \ No newline at end of file From 5b223098614a2411c934c517a85dd055f18e52cc Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 12:22:55 +0000 Subject: [PATCH 42/91] print the input vars --- src/automation/tasks/scripts/guidellm_script.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 7b6e3bb..e092803 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -54,6 +54,12 @@ def clean_hocon_value(v): gpu_count = int(guidellm_args.get("gpu_count", 1)) + print(vllm_args) + print(model_id) + print(guidellm_args["target"]) + print(args["Args"]["server_wait_time"]) + print(gpu_count) + # Start vLLM server server_process, server_initialized, server_log = start_vllm_server( vllm_args, From 5e8053a4867a57eea1a194a286c04ab7775e7501 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 12:34:56 +0000 Subject: [PATCH 43/91] remove gpu count --- src/automation/tasks/scripts/guidellm_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index e092803..e9612d2 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -66,7 +66,7 @@ def clean_hocon_value(v): model_id, guidellm_args["target"], args["Args"]["server_wait_time"], - gpu_count, + #gpu_count, ) if not server_initialized: From af3ebaa2addcf263997a944374e70349edb4f545 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 14:12:59 +0000 Subject: [PATCH 44/91] simple path --- src/automation/vllm/server.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py index 75c331d..0ebf48a 100644 --- a/src/automation/vllm/server.py +++ b/src/automation/vllm/server.py @@ -29,7 +29,13 @@ def start_vllm_server( subprocess_env["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in selected_gpus) parsed_target = urlparse(target) + print(f"vllm path is: {vllm_path}") + server_command = [ + f"{vllm_path}", "serve", + "Qwen/Qwen2.5-1.5B-Instruct", + ] + """ server_command = [ f"{vllm_path}", "serve", model_id, @@ -37,6 +43,7 @@ def start_vllm_server( "--port", str(parsed_target.port), "--tensor-parallel-size", str(gpu_count), ] + """ subprocess_env = os.environ.copy() @@ -71,4 +78,4 @@ def start_vllm_server( if server_initialized: return server_process, True, server_log_file_name else: - return server_process, False, server_log_file_name \ No newline at end of file + return server_process, False, server_log_file_name From 5c4f5b865f3e21390933d5563801ef94f2252e8f Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 14:37:53 +0000 Subject: [PATCH 45/91] vllm print --- src/automation/vllm/server.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py index 0ebf48a..7eeadad 100644 --- a/src/automation/vllm/server.py +++ b/src/automation/vllm/server.py @@ -19,6 +19,8 @@ def start_vllm_server( ): task = Task.current_task() + print("Inside start vllm server") + executable_path = os.path.dirname(sys.executable) vllm_path = os.path.join(executable_path, "vllm") @@ -35,6 +37,8 @@ def start_vllm_server( "Qwen/Qwen2.5-1.5B-Instruct", ] + print(server_command) + """ server_command = [ f"{vllm_path}", "serve", From b8a1e9f2520cef77d5add2d948225f39efabf274 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 16:22:17 +0000 Subject: [PATCH 46/91] added cwd --- src/automation/tasks/scripts/guidellm_script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index e9612d2..5b9c1cd 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -59,6 +59,7 @@ def clean_hocon_value(v): print(guidellm_args["target"]) print(args["Args"]["server_wait_time"]) print(gpu_count) + print(os.getcwd()) # Start vLLM server server_process, server_initialized, server_log = start_vllm_server( From 0365496f82d55d3145c6baf925023674e95796b1 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 16:48:10 +0000 Subject: [PATCH 47/91] ensure setup uses branch --- examples/guidellm_example.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index 6fc65c1..6d0dbf4 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -16,7 +16,7 @@ max_seconds=30, data="prompt_tokens=512,generated_tokens=256", branch = "update_guidellm", - vllm_kwargs={"enable-chunked-prefill": True} + #vllm_kwargs={"enable-chunked-prefill": True} ) #from clearml import Task diff --git a/setup.py b/setup.py index f0df931..c2112da 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ version="0.1.0", author="NM MLR", description="Automation tools", - url="https://github.com/neuralmagic/research", + #url="https://github.com/neuralmagic/research", package_dir={"": "src"}, packages=find_packages( "src", include=["automation", "automation.*"], exclude=["*.__pycache__.*"] From 348fd82617b69251d8d961f2a9b22abbf9db4b53 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 16:52:40 +0000 Subject: [PATCH 48/91] add guide again --- src/automation/tasks/base_task.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py index 9e0fa7a..12ce3bc 100644 --- a/src/automation/tasks/base_task.py +++ b/src/automation/tasks/base_task.py @@ -7,7 +7,8 @@ class BaseTask(): - base_packages = ["git+https://github.com/neuralmagic/research.git"] + #base_packages = ["git+https://github.com/neuralmagic/research.git"] + base_packages = ["git+https://github.com/neuralmagic/research.git@update_guidellm"] def __init__( self, From cb882af83ce0b97593d71306833f843fdc03fb7b Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 16:58:42 +0000 Subject: [PATCH 49/91] readd gpu count --- src/automation/tasks/scripts/guidellm_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 5b9c1cd..641c44b 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -67,7 +67,7 @@ def clean_hocon_value(v): model_id, guidellm_args["target"], args["Args"]["server_wait_time"], - #gpu_count, + gpu_count, ) if not server_initialized: From 464591e62bea7c0ffd1be1cdd93a6e61aaf01598 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 17:14:11 +0000 Subject: [PATCH 50/91] update vllm server --- src/automation/vllm/server.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py index 7eeadad..948c361 100644 --- a/src/automation/vllm/server.py +++ b/src/automation/vllm/server.py @@ -32,14 +32,13 @@ def start_vllm_server( parsed_target = urlparse(target) print(f"vllm path is: {vllm_path}") + """ server_command = [ f"{vllm_path}", "serve", "Qwen/Qwen2.5-1.5B-Instruct", ] - - print(server_command) - """ + server_command = [ f"{vllm_path}", "serve", model_id, @@ -47,8 +46,8 @@ def start_vllm_server( "--port", str(parsed_target.port), "--tensor-parallel-size", str(gpu_count), ] - """ + print(server_command) subprocess_env = os.environ.copy() for k, v in vllm_args.items(): From c0d0dba9f7c9555e2af636c9abb2a7b0a831580c Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 17:24:22 +0000 Subject: [PATCH 51/91] revert target --- examples/guidellm_example.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index 6d0dbf4..08e042b 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -10,8 +10,8 @@ backend="aiohttp_server", GUIDELLM__MAX_CONCURRENCY=256, GUIDELLM__REQUEST_TIMEOUT=21600, - #target="http://localhost:8000/v1", - target="http://fed73cc1-us-east.lb.appdomain.cloud/v1", + target="http://localhost:8000/v1", + #target="http://fed73cc1-us-east.lb.appdomain.cloud/v1", data_type="emulated", max_seconds=30, data="prompt_tokens=512,generated_tokens=256", From 81c62f7ee6c59d184dcf48c42473ea59ac8059eb Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 18:07:25 +0000 Subject: [PATCH 52/91] install editable guidellm --- src/automation/tasks/guidellm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index 2ca454c..e37b4e6 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -4,7 +4,8 @@ import os DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes -GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main" +#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main" +GUIDELLM_PACKAGE = "-e git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm" class GuideLLMTask(BaseTask): From 97e36cba71b28654a719e8e096c6c89d4cd948c0 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 18:30:38 +0000 Subject: [PATCH 53/91] print package list --- src/automation/tasks/guidellm.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index e37b4e6..f153003 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -39,14 +39,18 @@ def __init__( # Set packages, taking into account default packages # for the LMEvalTask and packages set in the config + print(self.guidellm_packages) + print(packages) if packages is not None: packages = list(set(packages + self.guidellm_packages)) else: packages = self.guidellm_packages + print(packages) if "packages" in config_kwargs: packages = list(set(packages + config_kwargs.pop("packages"))) + print(packages) # Initialize base parameters super().__init__( project_name=project_name, From 063c8b971335f62c1d58ef489f7de6f3655000b1 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 18:40:39 +0000 Subject: [PATCH 54/91] added package print --- src/automation/tasks/base_task.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py index 12ce3bc..1a605e0 100644 --- a/src/automation/tasks/base_task.py +++ b/src/automation/tasks/base_task.py @@ -25,6 +25,8 @@ def __init__( else: packages = self.base_packages + print(packages) + self.project_name = project_name self.task_name = task_name self.docker_image = docker_image From d6ef26605458668af477c1e0de7ed850df40c244 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Tue, 1 Jul 2025 19:09:18 +0000 Subject: [PATCH 55/91] older guidellm --- src/automation/tasks/guidellm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index f153003..fea7298 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -5,7 +5,8 @@ DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main" -GUIDELLM_PACKAGE = "-e git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm" +GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@use-old-run" +#GUIDELLM_PACKAGE = "-e git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm" class GuideLLMTask(BaseTask): From 8c649103d1bd9c782bd23e32c5f0db3caab1dd9e Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 09:58:04 +0000 Subject: [PATCH 56/91] updated to use dev branch --- src/automation/tasks/guidellm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index fea7298..bf6d620 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -5,8 +5,8 @@ DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main" -GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@use-old-run" -#GUIDELLM_PACKAGE = "-e git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm" +#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@use-old-run" +GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]" class GuideLLMTask(BaseTask): From 7dee38bb400493cf046bd20866d1d078bdac5922 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 10:53:33 +0000 Subject: [PATCH 57/91] redo with custom branch --- src/automation/tasks/guidellm.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index bf6d620..4031794 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -4,9 +4,8 @@ import os DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes -#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main" -#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@use-old-run" -GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]" +GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm" +#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]" class GuideLLMTask(BaseTask): From 263c2ff9a95c1e11d149433b01472df381f2f114 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 11:09:19 +0000 Subject: [PATCH 58/91] repo override --- src/automation/tasks/guidellm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index 4031794..f01a1dc 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -4,7 +4,8 @@ import os DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes -GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm" +GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm" +#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm" #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]" class GuideLLMTask(BaseTask): From 90e461b61a16204e45509e4c47d46d7e49239c4b Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 11:13:06 +0000 Subject: [PATCH 59/91] add packages to guidellm --- src/automation/tasks/guidellm.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index f01a1dc..9bca651 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -11,6 +11,9 @@ class GuideLLMTask(BaseTask): guidellm_packages = [ + "build>=1.0.0", + "setuptools>=61.0", + "setuptools-git-versioning>=2.0,<3", "vllm", GUIDELLM_PACKAGE, "hf_xet", From 4f00a5a03df2b76900e6c31b4c33854178624ef7 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 11:28:38 +0000 Subject: [PATCH 60/91] update setup.py --- setup.py | 3 +++ src/automation/tasks/guidellm.py | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index c2112da..6387290 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,9 @@ #"google-cloud-storage>=1.13.2", "datasets", "pyhocon", + "build>=1.0.0", + "setuptools>=61.0", + "setuptools-git-versioning>=2.0,<3", ], python_requires=">=3.7", ) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index 9bca651..e695a49 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -11,9 +11,9 @@ class GuideLLMTask(BaseTask): guidellm_packages = [ - "build>=1.0.0", - "setuptools>=61.0", - "setuptools-git-versioning>=2.0,<3", + #"build>=1.0.0", + #"setuptools>=61.0", + #"setuptools-git-versioning>=2.0,<3", "vllm", GUIDELLM_PACKAGE, "hf_xet", From 14f84ce53bc761c0ae75ef894a474d601d340405 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 11:51:47 +0000 Subject: [PATCH 61/91] readd --- src/automation/tasks/guidellm.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index e695a49..c86fb14 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -1,4 +1,5 @@ from automation.tasks import BaseTask +from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios from automation.configs import DEFAULT_DOCKER_IMAGE, DEFAULT_RESEARCH_BRANCH from typing import Optional, Sequence import os @@ -51,6 +52,8 @@ def __init__( packages = self.guidellm_packages print(packages) + print(get_builtin_scenarios()) + default_scenario = get_builtin_scenarios()[0] if "packages" in config_kwargs: packages = list(set(packages + config_kwargs.pop("packages"))) From ad2b4237504240727981ce9ca5c05a70ed5a929d Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 11:58:14 +0000 Subject: [PATCH 62/91] before vllm --- src/automation/tasks/guidellm.py | 6 +++--- src/automation/tasks/scripts/guidellm_script.py | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index c86fb14..9a7eb06 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -1,5 +1,5 @@ from automation.tasks import BaseTask -from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios +#from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios from automation.configs import DEFAULT_DOCKER_IMAGE, DEFAULT_RESEARCH_BRANCH from typing import Optional, Sequence import os @@ -52,8 +52,8 @@ def __init__( packages = self.guidellm_packages print(packages) - print(get_builtin_scenarios()) - default_scenario = get_builtin_scenarios()[0] + #print(get_builtin_scenarios()) + #default_scenario = get_builtin_scenarios()[0] if "packages" in config_kwargs: packages = list(set(packages + config_kwargs.pop("packages"))) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 641c44b..6c03f47 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -61,6 +61,9 @@ def clean_hocon_value(v): print(gpu_count) print(os.getcwd()) + from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios + print(get_builtin_scenarios()) + default_scenario = get_builtin_scenarios()[0] # Start vLLM server server_process, server_initialized, server_log = start_vllm_server( vllm_args, From 98eb6f8ec241b7c476ff1f5cdb99acef1da7c245 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 12:19:28 +0000 Subject: [PATCH 63/91] removed vllm --- src/automation/tasks/guidellm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index 9a7eb06..877f282 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -15,7 +15,7 @@ class GuideLLMTask(BaseTask): #"build>=1.0.0", #"setuptools>=61.0", #"setuptools-git-versioning>=2.0,<3", - "vllm", + #"vllm", GUIDELLM_PACKAGE, "hf_xet", ] From 10874d3d7f4a4aeeadb77855a1746da5236fc3b0 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 12:22:50 +0000 Subject: [PATCH 64/91] remove vllm --- src/automation/tasks/scripts/guidellm_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 6c03f47..f522815 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -2,7 +2,7 @@ import sys from clearml import Task from automation.utils import resolve_model_id, cast_args, kill_process_tree -from automation.vllm import start_vllm_server +#from automation.vllm import start_vllm_server from pyhocon import ConfigFactory def main(): From 629d195d972510019892e8ab91174c50b535347c Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 12:32:34 +0000 Subject: [PATCH 65/91] cleanup --- src/automation/tasks/guidellm.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index 877f282..2dc6336 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -1,5 +1,4 @@ from automation.tasks import BaseTask -#from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios from automation.configs import DEFAULT_DOCKER_IMAGE, DEFAULT_RESEARCH_BRANCH from typing import Optional, Sequence import os @@ -52,8 +51,6 @@ def __init__( packages = self.guidellm_packages print(packages) - #print(get_builtin_scenarios()) - #default_scenario = get_builtin_scenarios()[0] if "packages" in config_kwargs: packages = list(set(packages + config_kwargs.pop("packages"))) From 768d13589592297b379f5f22f82cb9b3a8a15b77 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 14:48:45 +0000 Subject: [PATCH 66/91] back to base --- setup.py | 3 --- src/automation/standards/benchmarking/chat.json | 13 +++++++++++++ src/automation/tasks/guidellm.py | 5 +++-- src/automation/tasks/scripts/guidellm_script.py | 14 ++++++++------ 4 files changed, 24 insertions(+), 11 deletions(-) create mode 100644 src/automation/standards/benchmarking/chat.json diff --git a/setup.py b/setup.py index 6387290..c2112da 100644 --- a/setup.py +++ b/setup.py @@ -17,9 +17,6 @@ #"google-cloud-storage>=1.13.2", "datasets", "pyhocon", - "build>=1.0.0", - "setuptools>=61.0", - "setuptools-git-versioning>=2.0,<3", ], python_requires=">=3.7", ) diff --git a/src/automation/standards/benchmarking/chat.json b/src/automation/standards/benchmarking/chat.json new file mode 100644 index 0000000..024438c --- /dev/null +++ b/src/automation/standards/benchmarking/chat.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 512, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 1024, + "output_tokens": 256, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 1024 + } +} diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index 2dc6336..6b6c1cb 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -4,7 +4,8 @@ import os DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes -GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm" +GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git" +#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm" #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm" #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]" @@ -14,7 +15,7 @@ class GuideLLMTask(BaseTask): #"build>=1.0.0", #"setuptools>=61.0", #"setuptools-git-versioning>=2.0,<3", - #"vllm", + "vllm", GUIDELLM_PACKAGE, "hf_xet", ] diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index f522815..d50c441 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -61,9 +61,9 @@ def clean_hocon_value(v): print(gpu_count) print(os.getcwd()) - from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios - print(get_builtin_scenarios()) - default_scenario = get_builtin_scenarios()[0] + from pathlib import Path + filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json")) + current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) # Start vLLM server server_process, server_initialized, server_log = start_vllm_server( vllm_args, @@ -105,10 +105,12 @@ def clean_hocon_value(v): print(f"The vllm path is: {vllm_path}") - print(get_builtin_scenarios()) - default_scenario = get_builtin_scenarios()[0] + #default_scenario = get_builtin_scenarios()[0] + #current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args)) - current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args)) + from pathlib import Path + filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json")) + current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) #import time #time.sleep(300) From 09c3978c73f3b9522149f03144b47316cb65b8ee Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 14:56:05 +0000 Subject: [PATCH 67/91] readd --- src/automation/tasks/scripts/guidellm_script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index d50c441..d1b0c95 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -62,6 +62,7 @@ def clean_hocon_value(v): print(os.getcwd()) from pathlib import Path + from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json")) current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) # Start vLLM server From e64fb1226cb90b776c854e87e93910236b3317d1 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 15:03:06 +0000 Subject: [PATCH 68/91] readd start vllm server --- src/automation/tasks/scripts/guidellm_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index d1b0c95..bd70a5f 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -2,7 +2,7 @@ import sys from clearml import Task from automation.utils import resolve_model_id, cast_args, kill_process_tree -#from automation.vllm import start_vllm_server +from automation.vllm import start_vllm_server from pyhocon import ConfigFactory def main(): From 873c222cbca8f62527ab932a4e81af91d9d3a37b Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 15:12:42 +0000 Subject: [PATCH 69/91] use guidellm branch --- src/automation/tasks/guidellm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index 6b6c1cb..90827f3 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -4,9 +4,9 @@ import os DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes -GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git" +#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git" +GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm" #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm" -#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm" #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]" class GuideLLMTask(BaseTask): From 16b83bc8542b10417da077b5867b63d83f9f9582 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 16:30:51 +0000 Subject: [PATCH 70/91] base complete --- examples/guidellm_example.py | 4 +++- src/automation/tasks/guidellm.py | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index 08e042b..cd9af23 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -14,7 +14,9 @@ #target="http://fed73cc1-us-east.lb.appdomain.cloud/v1", data_type="emulated", max_seconds=30, - data="prompt_tokens=512,generated_tokens=256", + #data="{'prompt_tokens': 512, 'generated_tokens': 256, 'output_tokens' : 256}", + data="prompt_tokens=512,generated_tokens=256,output_tokens=256", + #data="prompt_tokens=512,generated_tokens=256", branch = "update_guidellm", #vllm_kwargs={"enable-chunked-prefill": True} ) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index 90827f3..c0ff8f7 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -4,8 +4,8 @@ import os DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes -#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git" -GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm" +GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git" +#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm" #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm" #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]" From 432031ed3c6827aeaa88332b2daa4538f907203c Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 16:57:35 +0000 Subject: [PATCH 71/91] test rag --- examples/guidellm_example.py | 11 ----------- src/automation/standards/benchmarking/rag.json | 13 +++++++++++++ src/automation/tasks/scripts/guidellm_script.py | 8 ++++---- 3 files changed, 17 insertions(+), 15 deletions(-) create mode 100644 src/automation/standards/benchmarking/rag.json diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index cd9af23..f539192 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -11,22 +11,11 @@ GUIDELLM__MAX_CONCURRENCY=256, GUIDELLM__REQUEST_TIMEOUT=21600, target="http://localhost:8000/v1", - #target="http://fed73cc1-us-east.lb.appdomain.cloud/v1", data_type="emulated", max_seconds=30, - #data="{'prompt_tokens': 512, 'generated_tokens': 256, 'output_tokens' : 256}", data="prompt_tokens=512,generated_tokens=256,output_tokens=256", - #data="prompt_tokens=512,generated_tokens=256", branch = "update_guidellm", #vllm_kwargs={"enable-chunked-prefill": True} ) -#from clearml import Task -#task = Task.init(project_name="alexandre_debug", task_name="test_guidellm_task") task.execute_remotely("remote-upgrade-default") -#task.execute_locally() -import os -import sys -executable_path = os.path.dirname(sys.executable) -vllm_path = os.path.join(executable_path, "vllm") -print(f"The vllm path is: {vllm_path}") diff --git a/src/automation/standards/benchmarking/rag.json b/src/automation/standards/benchmarking/rag.json new file mode 100644 index 0000000..c7ee2f2 --- /dev/null +++ b/src/automation/standards/benchmarking/rag.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 4096, + "prompt_tokens_stdev": 512, + "prompt_tokens_min": 2048, + "prompt_tokens_max": 6144, + "output_tokens": 512, + "output_tokens_stdev": 128, + "output_tokens_min": 1, + "output_tokens_max": 1024 + } +} diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index bd70a5f..64dbae7 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -63,7 +63,7 @@ def clean_hocon_value(v): from pathlib import Path from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios - filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json")) + filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "rag.json")) current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) # Start vLLM server server_process, server_initialized, server_log = start_vllm_server( @@ -109,9 +109,9 @@ def clean_hocon_value(v): #default_scenario = get_builtin_scenarios()[0] #current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args)) - from pathlib import Path - filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json")) - current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) + #from pathlib import Path + #filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json")) + #current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) #import time #time.sleep(300) From e9117eacfa4b07678a7e4ea229d54d408d07a246 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 17:51:29 +0000 Subject: [PATCH 72/91] clean up --- src/automation/tasks/base_task.py | 3 +- src/automation/tasks/guidellm.py | 6 ---- .../tasks/scripts/guidellm_script.py | 32 ------------------- 3 files changed, 2 insertions(+), 39 deletions(-) diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py index 1a605e0..2e29030 100644 --- a/src/automation/tasks/base_task.py +++ b/src/automation/tasks/base_task.py @@ -8,7 +8,7 @@ class BaseTask(): #base_packages = ["git+https://github.com/neuralmagic/research.git"] - base_packages = ["git+https://github.com/neuralmagic/research.git@update_guidellm"] + #base_packages = ["git+https://github.com/neuralmagic/research.git@update_guidellm"] def __init__( self, @@ -19,6 +19,7 @@ def __init__( packages: Optional[Sequence[str]]=None, task_type: str="training", ): + base_packages = [f"git+https://github.com/neuralmagic/research.git@{branch}"] if packages is not None: packages = list(set(packages + self.base_packages)) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index c0ff8f7..f0f7758 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -5,16 +5,10 @@ DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git" -#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm" -#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm" -#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]" class GuideLLMTask(BaseTask): guidellm_packages = [ - #"build>=1.0.0", - #"setuptools>=61.0", - #"setuptools-git-versioning>=2.0,<3", "vllm", GUIDELLM_PACKAGE, "hf_xet", diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 64dbae7..525aab0 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -88,7 +88,6 @@ def clean_hocon_value(v): import json import asyncio from pathlib import Path - #from guidellm.benchmark import benchmark_generative_text from guidellm.benchmark.output import GenerativeBenchmarksReport from guidellm.benchmark.entrypoints import benchmark_generative_text, benchmark_with_scenario from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios @@ -100,41 +99,10 @@ def clean_hocon_value(v): print("[DEBUG] Calling benchmark_generative_text with:") print(json.dumps(guidellm_args, indent=2)) - #GenerativeBenchmarksReport() executable_path = os.path.dirname(sys.executable) vllm_path = os.path.join(executable_path, "vllm") print(f"The vllm path is: {vllm_path}") - - #default_scenario = get_builtin_scenarios()[0] - #current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args)) - - #from pathlib import Path - #filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json")) - #current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) - - #import time - #time.sleep(300) - """ - current_scenario = GenerativeTextScenario - print(current_scenario.model_fields["target"]) - print(current_scenario.model_fields["model"]) - overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args) - #overlap_keys = ["model"] - for element in overlap_keys: - #print(element) - element_field_info = current_scenario.model_fields[element] - element_field_info.default = guidellm_args[element] - current_scenario.model_fields[element] = element_field_info - #print(element_field_info.annotation) - print(overlap_keys) - - print(current_scenario.model_fields["target"]) - print(current_scenario.model_fields["model"]) - - current_scenario = GenerativeTextScenario - """ - try: asyncio.run( benchmark_with_scenario( From 9984a8ccc2e7d0660aee888fb676fff62f147658 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 17:54:11 +0000 Subject: [PATCH 73/91] base package as variable --- src/automation/tasks/base_task.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py index 2e29030..4f20e67 100644 --- a/src/automation/tasks/base_task.py +++ b/src/automation/tasks/base_task.py @@ -22,9 +22,9 @@ def __init__( base_packages = [f"git+https://github.com/neuralmagic/research.git@{branch}"] if packages is not None: - packages = list(set(packages + self.base_packages)) + packages = list(set(packages + base_packages)) else: - packages = self.base_packages + packages = base_packages print(packages) From b8b51e9b8c4e00606346b5301fe38bab8d5845c4 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 18:04:07 +0000 Subject: [PATCH 74/91] test default branch change --- src/automation/configs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/automation/configs.py b/src/automation/configs.py index 5c4bf22..a3cca2a 100644 --- a/src/automation/configs.py +++ b/src/automation/configs.py @@ -2,4 +2,5 @@ DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest" #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml" DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081" -DEFAULT_RESEARCH_BRANCH = "main" +#DEFAULT_RESEARCH_BRANCH = "main" +DEFAULT_RESEARCH_BRANCH = "update_guidellm" From b99afec4b02bd991d5a3ec18dda16f1edad18aee Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 18:53:20 +0000 Subject: [PATCH 75/91] update branch names --- examples/guidellm_example.py | 2 +- src/automation/tasks/base_task.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index f539192..dab5604 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -14,7 +14,7 @@ data_type="emulated", max_seconds=30, data="prompt_tokens=512,generated_tokens=256,output_tokens=256", - branch = "update_guidellm", + #branch = "update_guidellm", #vllm_kwargs={"enable-chunked-prefill": True} ) diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py index 4f20e67..74fa1ba 100644 --- a/src/automation/tasks/base_task.py +++ b/src/automation/tasks/base_task.py @@ -19,7 +19,8 @@ def __init__( packages: Optional[Sequence[str]]=None, task_type: str="training", ): - base_packages = [f"git+https://github.com/neuralmagic/research.git@{branch}"] + branch_name = branch or DEFAULT_RESEARCH_BRANCH + base_packages = [f"git+https://github.com/neuralmagic/research.git@{branch_name}"] if packages is not None: packages = list(set(packages + base_packages)) From b2c29184aedb5a9459ac7ef94b58ef611556ed66 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 19:03:56 +0000 Subject: [PATCH 76/91] use main branch in config --- src/automation/configs.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/automation/configs.py b/src/automation/configs.py index a3cca2a..5c4bf22 100644 --- a/src/automation/configs.py +++ b/src/automation/configs.py @@ -2,5 +2,4 @@ DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest" #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml" DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081" -#DEFAULT_RESEARCH_BRANCH = "main" -DEFAULT_RESEARCH_BRANCH = "update_guidellm" +DEFAULT_RESEARCH_BRANCH = "main" From d1e686b74ed5ab6573d8e9a43f65bc7235c25e32 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 19:33:45 +0000 Subject: [PATCH 77/91] print the scenario --- src/automation/tasks/scripts/guidellm_script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 525aab0..d49bb38 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -65,6 +65,7 @@ def clean_hocon_value(v): from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "rag.json")) current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) + print(current_scenario.model_fields) # Start vLLM server server_process, server_initialized, server_log = start_vllm_server( vllm_args, From 5d3e3ff29eadf4594826d5218bb8b0b19713a22b Mon Sep 17 00:00:00 2001 From: chibu <> Date: Wed, 2 Jul 2025 19:37:40 +0000 Subject: [PATCH 78/91] modify tokens --- examples/guidellm_example.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index dab5604..2a4d536 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -13,8 +13,9 @@ target="http://localhost:8000/v1", data_type="emulated", max_seconds=30, - data="prompt_tokens=512,generated_tokens=256,output_tokens=256", - #branch = "update_guidellm", + #data="prompt_tokens=512,generated_tokens=256,output_tokens=256", + data="prompt_tokens=128,generated_tokens=128,output_tokens=128", + branch = "update_guidellm", #vllm_kwargs={"enable-chunked-prefill": True} ) From 3b0d86c0655adc3f3a838592b5e57064d1e5f7ca Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 3 Jul 2025 09:29:24 +0000 Subject: [PATCH 79/91] revert lmeval and setup.py, update vllm server log --- examples/lmeval_example.py | 4 +--- setup.py | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/lmeval_example.py b/examples/lmeval_example.py index 7c8ee1e..fd07a7b 100644 --- a/examples/lmeval_example.py +++ b/examples/lmeval_example.py @@ -3,13 +3,11 @@ task = LMEvalTask( project_name="alexandre_debug", task_name="test_lmeval_task", - branch = "update_guidellm", model_id="meta-llama/Llama-3.2-1B-Instruct", tasks="gsm8k", model_args="dtype=auto,max_model_len=8192", batch_size="auto", ) -#task.execute_remotely("oneshot-a100x1") -task.execute_remotely("remote-upgrade-default") +task.execute_remotely("oneshot-a100x1") #task.execute_locally() diff --git a/setup.py b/setup.py index c2112da..755f2ea 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ ), install_requires=[ "clearml==1.14.4", - #"google-cloud-storage>=1.13.2", + "google-cloud-storage>=1.13.2", "datasets", "pyhocon", ], From a2d6eb5749019fb9eb865af021814168c600fc39 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 3 Jul 2025 09:54:02 +0000 Subject: [PATCH 80/91] readd default scenarios --- examples/guidellm_example.py | 6 +++--- setup.py | 2 +- src/automation/configs.py | 1 + src/automation/tasks/scripts/guidellm_script.py | 8 ++++++-- src/automation/vllm/server.py | 7 +++++-- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index 2a4d536..c5d5df4 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -13,10 +13,10 @@ target="http://localhost:8000/v1", data_type="emulated", max_seconds=30, - #data="prompt_tokens=512,generated_tokens=256,output_tokens=256", - data="prompt_tokens=128,generated_tokens=128,output_tokens=128", + #config = "benchmarking_32k", + data="prompt_tokens=128,output_tokens=128", branch = "update_guidellm", - #vllm_kwargs={"enable-chunked-prefill": True} + vllm_kwargs={"enable-chunked-prefill": True} ) task.execute_remotely("remote-upgrade-default") diff --git a/setup.py b/setup.py index 755f2ea..9a0a63c 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ version="0.1.0", author="NM MLR", description="Automation tools", - #url="https://github.com/neuralmagic/research", + url="https://github.com/neuralmagic/research", package_dir={"": "src"}, packages=find_packages( "src", include=["automation", "automation.*"], exclude=["*.__pycache__.*"] diff --git a/src/automation/configs.py b/src/automation/configs.py index 5c4bf22..dc67b98 100644 --- a/src/automation/configs.py +++ b/src/automation/configs.py @@ -3,3 +3,4 @@ #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml" DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081" DEFAULT_RESEARCH_BRANCH = "main" +DEFAULT_GUIDELLM_SCENARIO = "rag" diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index d49bb38..e45fd33 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -4,6 +4,7 @@ from automation.utils import resolve_model_id, cast_args, kill_process_tree from automation.vllm import start_vllm_server from pyhocon import ConfigFactory +from automation.configs import DEFAULT_GUIDELLM_SCENARIO def main(): task = Task.current_task() @@ -63,8 +64,11 @@ def clean_hocon_value(v): from pathlib import Path from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios - filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "rag.json")) - current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) + if len(get_builtin_scenarios()) > 0: + current_scenario = GenerativeTextScenario.from_builtin(DEFAULT_GUIDELLM_SCENARIO, dict(guidellm_args)) + else: + filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{DEFAULT_GUIDELLM_SCENARIO}.json")) + current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) print(current_scenario.model_fields) # Start vLLM server server_process, server_initialized, server_log = start_vllm_server( diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py index 948c361..011ab1a 100644 --- a/src/automation/vllm/server.py +++ b/src/automation/vllm/server.py @@ -55,12 +55,15 @@ def start_vllm_server( subprocess_env[k] = str(v) else: if v == True or v == "True": - v = "true" - server_command.extend([f"--{k}", str(v)]) + server_command.append(f"--{k}") + else: + server_command.extend([f"--{k}", str(v)]) + server_log_file_name = f"{SERVER_LOG_PREFIX}_{task.id}.txt" server_log_file = open(server_log_file_name, "w") print("Server command:", " ".join(server_command)) + print(f"VLLM logs are located at: {server_log_file} in {os.getcwd()}") server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env) delay = 5 From 81f519990163caabf8daa6bad13ba4cb7b7e6175 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 3 Jul 2025 10:33:10 +0000 Subject: [PATCH 81/91] change default guidellm json --- src/automation/configs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/automation/configs.py b/src/automation/configs.py index dc67b98..0ba89f3 100644 --- a/src/automation/configs.py +++ b/src/automation/configs.py @@ -3,4 +3,4 @@ #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml" DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081" DEFAULT_RESEARCH_BRANCH = "main" -DEFAULT_GUIDELLM_SCENARIO = "rag" +DEFAULT_GUIDELLM_SCENARIO = "chat" From 155033397f77ec0795e19c0a4d0d29182b591893 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 3 Jul 2025 10:56:06 +0000 Subject: [PATCH 82/91] add config examples json --- src/automation/configs.py | 3 ++- .../standards/benchmarking/benchmarking_128k.json | 13 +++++++++++++ .../standards/benchmarking/benchmarking_16k.json | 13 +++++++++++++ .../standards/benchmarking/benchmarking_32k.json | 13 +++++++++++++ .../standards/benchmarking/benchmarking_64k.json | 13 +++++++++++++ .../standards/benchmarking/benchmarking_chat.json | 13 +++++++++++++ .../benchmarking/benchmarking_code_completion.json | 13 +++++++++++++ .../benchmarking/benchmarking_code_fixing.json | 13 +++++++++++++ .../benchmarking_docstring_generation.json | 13 +++++++++++++ .../benchmarking/benchmarking_instruction.json | 13 +++++++++++++ .../benchmarking/benchmarking_long_rag.json | 13 +++++++++++++ .../standards/benchmarking/benchmarking_rag.json | 13 +++++++++++++ .../benchmarking/benchmarking_summarization.json | 13 +++++++++++++ 13 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 src/automation/standards/benchmarking/benchmarking_128k.json create mode 100644 src/automation/standards/benchmarking/benchmarking_16k.json create mode 100644 src/automation/standards/benchmarking/benchmarking_32k.json create mode 100644 src/automation/standards/benchmarking/benchmarking_64k.json create mode 100644 src/automation/standards/benchmarking/benchmarking_chat.json create mode 100644 src/automation/standards/benchmarking/benchmarking_code_completion.json create mode 100644 src/automation/standards/benchmarking/benchmarking_code_fixing.json create mode 100644 src/automation/standards/benchmarking/benchmarking_docstring_generation.json create mode 100644 src/automation/standards/benchmarking/benchmarking_instruction.json create mode 100644 src/automation/standards/benchmarking/benchmarking_long_rag.json create mode 100644 src/automation/standards/benchmarking/benchmarking_rag.json create mode 100644 src/automation/standards/benchmarking/benchmarking_summarization.json diff --git a/src/automation/configs.py b/src/automation/configs.py index 0ba89f3..75e0483 100644 --- a/src/automation/configs.py +++ b/src/automation/configs.py @@ -3,4 +3,5 @@ #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml" DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081" DEFAULT_RESEARCH_BRANCH = "main" -DEFAULT_GUIDELLM_SCENARIO = "chat" +#DEFAULT_GUIDELLM_SCENARIO = "chat" +DEFAULT_GUIDELLM_SCENARIO = "benchmarking_summarization" diff --git a/src/automation/standards/benchmarking/benchmarking_128k.json b/src/automation/standards/benchmarking/benchmarking_128k.json new file mode 100644 index 0000000..13b8105 --- /dev/null +++ b/src/automation/standards/benchmarking/benchmarking_128k.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 128000, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 128000, + "output_tokens": 2048, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 2048 + } +} diff --git a/src/automation/standards/benchmarking/benchmarking_16k.json b/src/automation/standards/benchmarking/benchmarking_16k.json new file mode 100644 index 0000000..f927a4a --- /dev/null +++ b/src/automation/standards/benchmarking/benchmarking_16k.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 16000, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 16000, + "output_tokens": 2048, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 2048 + } +} diff --git a/src/automation/standards/benchmarking/benchmarking_32k.json b/src/automation/standards/benchmarking/benchmarking_32k.json new file mode 100644 index 0000000..6543fd7 --- /dev/null +++ b/src/automation/standards/benchmarking/benchmarking_32k.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 32000, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 32000, + "output_tokens": 2048, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 2048 + } +} diff --git a/src/automation/standards/benchmarking/benchmarking_64k.json b/src/automation/standards/benchmarking/benchmarking_64k.json new file mode 100644 index 0000000..871b210 --- /dev/null +++ b/src/automation/standards/benchmarking/benchmarking_64k.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 64000, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 64000, + "output_tokens": 2048, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 2048 + } +} diff --git a/src/automation/standards/benchmarking/benchmarking_chat.json b/src/automation/standards/benchmarking/benchmarking_chat.json new file mode 100644 index 0000000..f4d0548 --- /dev/null +++ b/src/automation/standards/benchmarking/benchmarking_chat.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 512, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 512, + "output_tokens": 256, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 256 + } +} diff --git a/src/automation/standards/benchmarking/benchmarking_code_completion.json b/src/automation/standards/benchmarking/benchmarking_code_completion.json new file mode 100644 index 0000000..6be35df --- /dev/null +++ b/src/automation/standards/benchmarking/benchmarking_code_completion.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 256, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 256, + "output_tokens": 1024, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 1024 + } +} diff --git a/src/automation/standards/benchmarking/benchmarking_code_fixing.json b/src/automation/standards/benchmarking/benchmarking_code_fixing.json new file mode 100644 index 0000000..bceff14 --- /dev/null +++ b/src/automation/standards/benchmarking/benchmarking_code_fixing.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 1024, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 1024, + "output_tokens": 1024, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 1024 + } +} diff --git a/src/automation/standards/benchmarking/benchmarking_docstring_generation.json b/src/automation/standards/benchmarking/benchmarking_docstring_generation.json new file mode 100644 index 0000000..0eda212 --- /dev/null +++ b/src/automation/standards/benchmarking/benchmarking_docstring_generation.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 768, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 768, + "output_tokens": 128, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 128 + } +} diff --git a/src/automation/standards/benchmarking/benchmarking_instruction.json b/src/automation/standards/benchmarking/benchmarking_instruction.json new file mode 100644 index 0000000..0fac491 --- /dev/null +++ b/src/automation/standards/benchmarking/benchmarking_instruction.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 256, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 256, + "output_tokens": 128, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 128 + } +} diff --git a/src/automation/standards/benchmarking/benchmarking_long_rag.json b/src/automation/standards/benchmarking/benchmarking_long_rag.json new file mode 100644 index 0000000..4fe719a --- /dev/null +++ b/src/automation/standards/benchmarking/benchmarking_long_rag.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 10240, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 10240, + "output_tokens": 1536, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 1536 + } +} diff --git a/src/automation/standards/benchmarking/benchmarking_rag.json b/src/automation/standards/benchmarking/benchmarking_rag.json new file mode 100644 index 0000000..9525b09 --- /dev/null +++ b/src/automation/standards/benchmarking/benchmarking_rag.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 1024, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 1024, + "output_tokens": 128, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 128 + } +} diff --git a/src/automation/standards/benchmarking/benchmarking_summarization.json b/src/automation/standards/benchmarking/benchmarking_summarization.json new file mode 100644 index 0000000..9525b09 --- /dev/null +++ b/src/automation/standards/benchmarking/benchmarking_summarization.json @@ -0,0 +1,13 @@ +{ + "rate_type": "sweep", + "data": { + "prompt_tokens": 1024, + "prompt_tokens_stdev": 128, + "prompt_tokens_min": 1, + "prompt_tokens_max": 1024, + "output_tokens": 128, + "output_tokens_stdev": 64, + "output_tokens_min": 1, + "output_tokens_max": 128 + } +} From 420137d174bd72aeddc2aa556735f0df47641636 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 3 Jul 2025 11:16:19 +0000 Subject: [PATCH 83/91] use original default --- examples/guidellm_example.py | 4 ++-- src/automation/configs.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index c5d5df4..0ebc151 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -11,10 +11,10 @@ GUIDELLM__MAX_CONCURRENCY=256, GUIDELLM__REQUEST_TIMEOUT=21600, target="http://localhost:8000/v1", - data_type="emulated", + #data_type="emulated", max_seconds=30, #config = "benchmarking_32k", - data="prompt_tokens=128,output_tokens=128", + #data="prompt_tokens=128,output_tokens=128", branch = "update_guidellm", vllm_kwargs={"enable-chunked-prefill": True} ) diff --git a/src/automation/configs.py b/src/automation/configs.py index 75e0483..094f478 100644 --- a/src/automation/configs.py +++ b/src/automation/configs.py @@ -3,5 +3,5 @@ #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml" DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081" DEFAULT_RESEARCH_BRANCH = "main" -#DEFAULT_GUIDELLM_SCENARIO = "chat" -DEFAULT_GUIDELLM_SCENARIO = "benchmarking_summarization" +DEFAULT_GUIDELLM_SCENARIO = "chat" +#DEFAULT_GUIDELLM_SCENARIO = "benchmarking_summarization" From 9d284c978ec16ac1f16cce4c8cb1eb18563dbfe2 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 3 Jul 2025 11:22:01 +0000 Subject: [PATCH 84/91] add log --- src/automation/tasks/scripts/guidellm_script.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index e45fd33..07905e3 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -64,6 +64,7 @@ def clean_hocon_value(v): from pathlib import Path from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios + print(get_builtin_scenarios()) if len(get_builtin_scenarios()) > 0: current_scenario = GenerativeTextScenario.from_builtin(DEFAULT_GUIDELLM_SCENARIO, dict(guidellm_args)) else: From e863516b9d86a4572893bc2a580a1583011ad8ea Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 3 Jul 2025 12:02:24 +0000 Subject: [PATCH 85/91] include user scenario --- src/automation/tasks/scripts/guidellm_script.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index 07905e3..efa91fb 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -64,9 +64,12 @@ def clean_hocon_value(v): from pathlib import Path from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios - print(get_builtin_scenarios()) - if len(get_builtin_scenarios()) > 0: - current_scenario = GenerativeTextScenario.from_builtin(DEFAULT_GUIDELLM_SCENARIO, dict(guidellm_args)) + user_scenario = guidellm_args.get("scenario", "") + if user_scenario: + filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{user_scenario}.json")) + current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) + #elif len(get_builtin_scenarios()) > 0: + # current_scenario = GenerativeTextScenario.from_builtin(get_builtin_scenarios()[0], dict(guidellm_args)) else: filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{DEFAULT_GUIDELLM_SCENARIO}.json")) current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) From 3703e62d0b2ab0d25e5f5fdf1902dd49013768be Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 3 Jul 2025 12:14:37 +0000 Subject: [PATCH 86/91] revert lmeval example --- examples/lmeval_example.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/lmeval_example.py b/examples/lmeval_example.py index fd07a7b..688c355 100644 --- a/examples/lmeval_example.py +++ b/examples/lmeval_example.py @@ -6,7 +6,7 @@ model_id="meta-llama/Llama-3.2-1B-Instruct", tasks="gsm8k", model_args="dtype=auto,max_model_len=8192", - batch_size="auto", + batch_size="auto", ) task.execute_remotely("oneshot-a100x1") From d1b985ac627f5a2f1979679b1486536ceca27b71 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 3 Jul 2025 12:25:15 +0000 Subject: [PATCH 87/91] add file error handling --- .../tasks/scripts/guidellm_script.py | 39 ++++++++----------- src/automation/vllm/server.py | 6 --- 2 files changed, 17 insertions(+), 28 deletions(-) diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py index efa91fb..35269a9 100644 --- a/src/automation/tasks/scripts/guidellm_script.py +++ b/src/automation/tasks/scripts/guidellm_script.py @@ -55,25 +55,6 @@ def clean_hocon_value(v): gpu_count = int(guidellm_args.get("gpu_count", 1)) - print(vllm_args) - print(model_id) - print(guidellm_args["target"]) - print(args["Args"]["server_wait_time"]) - print(gpu_count) - print(os.getcwd()) - - from pathlib import Path - from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios - user_scenario = guidellm_args.get("scenario", "") - if user_scenario: - filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{user_scenario}.json")) - current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) - #elif len(get_builtin_scenarios()) > 0: - # current_scenario = GenerativeTextScenario.from_builtin(get_builtin_scenarios()[0], dict(guidellm_args)) - else: - filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{DEFAULT_GUIDELLM_SCENARIO}.json")) - current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) - print(current_scenario.model_fields) # Start vLLM server server_process, server_initialized, server_log = start_vllm_server( vllm_args, @@ -97,15 +78,29 @@ def clean_hocon_value(v): import json import asyncio from pathlib import Path - from guidellm.benchmark.output import GenerativeBenchmarksReport - from guidellm.benchmark.entrypoints import benchmark_generative_text, benchmark_with_scenario + from guidellm.benchmark.entrypoints import benchmark_with_scenario from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios + user_scenario = guidellm_args.get("scenario", "") + if user_scenario: + filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{user_scenario}.json")) + if os.path.exists(filepath): + current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) + else: + raise ValueError(f"Scenario path {filepath} does not exist") + #elif len(get_builtin_scenarios()) > 0: + # to be used when get_builtin_scenarios() bug is fiexed + # current_scenario = GenerativeTextScenario.from_builtin(get_builtin_scenarios()[0], dict(guidellm_args)) + else: + filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{DEFAULT_GUIDELLM_SCENARIO}.json")) + current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args)) + print(current_scenario.model_fields) + # Ensure output_path is set and consistent output_path = Path(guidellm_args.get("output_path", "guidellm-output.json")) guidellm_args["output_path"] = str(output_path) - print("[DEBUG] Calling benchmark_generative_text with:") + print("[DEBUG] Calling benchmark_with_scenario with:") print(json.dumps(guidellm_args, indent=2)) executable_path = os.path.dirname(sys.executable) diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py index 011ab1a..2e7d321 100644 --- a/src/automation/vllm/server.py +++ b/src/automation/vllm/server.py @@ -32,12 +32,6 @@ def start_vllm_server( parsed_target = urlparse(target) print(f"vllm path is: {vllm_path}") - """ - server_command = [ - f"{vllm_path}", "serve", - "Qwen/Qwen2.5-1.5B-Instruct", - ] - """ server_command = [ f"{vllm_path}", "serve", From e60aab1acb640b5e9bccc2908c2d3be190bb43e0 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 3 Jul 2025 12:28:24 +0000 Subject: [PATCH 88/91] removed package prints --- src/automation/tasks/guidellm.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py index f0f7758..a85eb83 100644 --- a/src/automation/tasks/guidellm.py +++ b/src/automation/tasks/guidellm.py @@ -38,18 +38,14 @@ def __init__( # Set packages, taking into account default packages # for the LMEvalTask and packages set in the config - print(self.guidellm_packages) - print(packages) if packages is not None: packages = list(set(packages + self.guidellm_packages)) else: packages = self.guidellm_packages - print(packages) if "packages" in config_kwargs: packages = list(set(packages + config_kwargs.pop("packages"))) - print(packages) # Initialize base parameters super().__init__( project_name=project_name, From 515a1dbe60a15902bc3ae3406dbdcb76f6f3eeb3 Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 3 Jul 2025 13:16:19 +0000 Subject: [PATCH 89/91] default config --- examples/guidellm_example.py | 5 ++--- src/automation/configs.py | 2 -- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index 0ebc151..ea0c932 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -11,10 +11,9 @@ GUIDELLM__MAX_CONCURRENCY=256, GUIDELLM__REQUEST_TIMEOUT=21600, target="http://localhost:8000/v1", - #data_type="emulated", max_seconds=30, - #config = "benchmarking_32k", - #data="prompt_tokens=128,output_tokens=128", + scenario = "benchmarking_32kz", + data="prompt_tokens=128,output_tokens=128", branch = "update_guidellm", vllm_kwargs={"enable-chunked-prefill": True} ) diff --git a/src/automation/configs.py b/src/automation/configs.py index 094f478..96087ab 100644 --- a/src/automation/configs.py +++ b/src/automation/configs.py @@ -1,7 +1,5 @@ -#DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_5:latest" DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest" #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml" DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081" DEFAULT_RESEARCH_BRANCH = "main" DEFAULT_GUIDELLM_SCENARIO = "chat" -#DEFAULT_GUIDELLM_SCENARIO = "benchmarking_summarization" From ac9ef63ffb42eea42ccf76ba7cebddad7bebd63f Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 3 Jul 2025 13:33:35 +0000 Subject: [PATCH 90/91] readd output path --- src/automation/configs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/automation/configs.py b/src/automation/configs.py index 96087ab..0bb90f6 100644 --- a/src/automation/configs.py +++ b/src/automation/configs.py @@ -1,5 +1,5 @@ DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest" -#DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml" -DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081" +DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml" +#DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081" DEFAULT_RESEARCH_BRANCH = "main" DEFAULT_GUIDELLM_SCENARIO = "chat" From 69638eae4c4ff224d06cf26a513c0fb5951e394f Mon Sep 17 00:00:00 2001 From: chibu <> Date: Thu, 3 Jul 2025 14:51:21 +0000 Subject: [PATCH 91/91] onpremise settings --- examples/guidellm_example.py | 9 ++++----- src/automation/configs.py | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py index ea0c932..fe2e297 100644 --- a/examples/guidellm_example.py +++ b/examples/guidellm_example.py @@ -1,21 +1,20 @@ - from automation.tasks import GuideLLMTask task = GuideLLMTask( project_name="alexandre_debug", task_name="test_guidellm_task", - #model="meta-llama/Llama-3.2-1B-Instruct", - model="Qwen/Qwen2.5-1.5B-Instruct", + model="meta-llama/Llama-3.2-1B-Instruct", rate_type="throughput", backend="aiohttp_server", GUIDELLM__MAX_CONCURRENCY=256, GUIDELLM__REQUEST_TIMEOUT=21600, target="http://localhost:8000/v1", max_seconds=30, - scenario = "benchmarking_32kz", + #scenario = "benchmarking_32k", data="prompt_tokens=128,output_tokens=128", branch = "update_guidellm", vllm_kwargs={"enable-chunked-prefill": True} ) -task.execute_remotely("remote-upgrade-default") +task.execute_remotely("oneshot-a100x1") +#task.execute_locally() diff --git a/src/automation/configs.py b/src/automation/configs.py index 0bb90f6..10aa396 100644 --- a/src/automation/configs.py +++ b/src/automation/configs.py @@ -1,5 +1,4 @@ DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest" DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml" -#DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081" DEFAULT_RESEARCH_BRANCH = "main" DEFAULT_GUIDELLM_SCENARIO = "chat"