From f6aa8fe0a1ab0f95ee94d018f60d0f861a37844e Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 10:45:52 +0000
Subject: [PATCH 01/91] simple change

---
 examples/guidellm_example.py | 15 +++++++++++++--
 src/automation/configs.py    |  6 ++++--
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index f09b1e6..f4cef02 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -1,3 +1,12 @@
+import os
+import sys
+
+from clearml import Task
+executable_path = os.path.dirname(sys.executable)
+vllm_path = os.path.join(executable_path, "vllm")
+print(f"The vllm path is: {vllm_path}")
+
+"""
 from automation.tasks import GuideLLMTask
 
 task = GuideLLMTask(
@@ -14,6 +23,8 @@
     data="prompt_tokens=512,generated_tokens=256",
     vllm_kwargs={"enable-chunked-prefill": True}
 )
+"""
 
-task.execute_remotely("oneshot-a100x1")
-#task.execute_locally()
\ No newline at end of file
+task = Task.init(project_name="alexandre_debug", task_name="test_guidellm_task")
+task.execute_remotely("remote-upgrade-default")
+#task.execute_locally()
diff --git a/src/automation/configs.py b/src/automation/configs.py
index 76dbe58..22b5abd 100644
--- a/src/automation/configs.py
+++ b/src/automation/configs.py
@@ -1,2 +1,4 @@
-DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_5:latest"
-DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
\ No newline at end of file
+#DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_5:latest"
+DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest"
+#DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
+DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"

From 18f41b1cc79ca752b13612494a4beb05f58e7840 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 10:50:18 +0000
Subject: [PATCH 02/91] test lmeval change

---
 examples/lmeval_example.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/lmeval_example.py b/examples/lmeval_example.py
index 8910aa2..617ecca 100644
--- a/examples/lmeval_example.py
+++ b/examples/lmeval_example.py
@@ -9,5 +9,6 @@
     batch_size="auto",    
 )
 
-task.execute_remotely("oneshot-a100x1")
-#task.execute_locally()
\ No newline at end of file
+#task.execute_remotely("oneshot-a100x1")
+task.execute_remotely("remote-upgrade-default")
+#task.execute_locally()

From a425d4305e18009e933238f3c72104d0cd31a812 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 10:54:31 +0000
Subject: [PATCH 03/91] update branch

---
 examples/lmeval_example.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/lmeval_example.py b/examples/lmeval_example.py
index 617ecca..7c8ee1e 100644
--- a/examples/lmeval_example.py
+++ b/examples/lmeval_example.py
@@ -3,6 +3,7 @@
 task = LMEvalTask(
     project_name="alexandre_debug",
     task_name="test_lmeval_task",
+    branch = "update_guidellm",
     model_id="meta-llama/Llama-3.2-1B-Instruct",
     tasks="gsm8k",
     model_args="dtype=auto,max_model_len=8192",

From 6fc29f4b8295aa3d5b934950babbd6d1ab44b48e Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 11:00:15 +0000
Subject: [PATCH 04/91] use main

---
 src/automation/tasks/guidellm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index 390012b..a8560a8 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -4,7 +4,7 @@
 import os
 
 DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
-GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@http_backend"
+GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git"
 
 class GuideLLMTask(BaseTask):
 

From 956a12b41ab2dbdc0bfda97cbd9faecd255d220c Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 11:06:57 +0000
Subject: [PATCH 05/91] remove gcs

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 9a0a63c..f0df931 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@
     ),
     install_requires=[
         "clearml==1.14.4",
-        "google-cloud-storage>=1.13.2",
+        #"google-cloud-storage>=1.13.2",
         "datasets",
         "pyhocon",
     ],

From 5e09fb72a66b11b14e4ff51f80b44ab4d7030d5b Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 11:23:37 +0000
Subject: [PATCH 06/91] readd gc

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index f0df931..9a0a63c 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@
     ),
     install_requires=[
         "clearml==1.14.4",
-        #"google-cloud-storage>=1.13.2",
+        "google-cloud-storage>=1.13.2",
         "datasets",
         "pyhocon",
     ],

From 655f00e72bf1c4ceff2038bf6c997417d10d3a14 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 11:26:23 +0000
Subject: [PATCH 07/91] remove gc

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 9a0a63c..f0df931 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@
     ),
     install_requires=[
         "clearml==1.14.4",
-        "google-cloud-storage>=1.13.2",
+        #"google-cloud-storage>=1.13.2",
         "datasets",
         "pyhocon",
     ],

From ba703b0c8efb6617e56691952fa88f37e8e21e81 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 11:30:55 +0000
Subject: [PATCH 08/91] back to guidellm

---
 examples/guidellm_example.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index f4cef02..8cf2061 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -1,12 +1,6 @@
 import os
 import sys
 
-from clearml import Task
-executable_path = os.path.dirname(sys.executable)
-vllm_path = os.path.join(executable_path, "vllm")
-print(f"The vllm path is: {vllm_path}")
-
-"""
 from automation.tasks import GuideLLMTask
 
 task = GuideLLMTask(
@@ -23,8 +17,11 @@
     data="prompt_tokens=512,generated_tokens=256",
     vllm_kwargs={"enable-chunked-prefill": True}
 )
-"""
 
-task = Task.init(project_name="alexandre_debug", task_name="test_guidellm_task")
+#from clearml import Task
+#task = Task.init(project_name="alexandre_debug", task_name="test_guidellm_task")
 task.execute_remotely("remote-upgrade-default")
 #task.execute_locally()
+executable_path = os.path.dirname(sys.executable)
+vllm_path = os.path.join(executable_path, "vllm")
+print(f"The vllm path is: {vllm_path}")

From b4deac89b1e5a079cb5c105f8e542d5e683fca59 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 11:44:41 +0000
Subject: [PATCH 09/91] simplified

---
 src/automation/tasks/scripts/guidellm_script.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 617b502..3932b84 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -9,6 +9,8 @@
 def main(configurations=None):
     task = Task.current_task()
 
+    """
+
     args = task.get_parameters_as_dict(cast=True)
     
     if configurations is None:
@@ -42,6 +44,7 @@ def main(configurations=None):
 
     # Resolve model_id
     model_id = resolve_model_id(args["Args"]["model"], clearml_model, force_download)
+    """
 
     # Start vLLM server
     server_process, server_initialized, server_log = start_vllm_server(
@@ -51,6 +54,7 @@ def main(configurations=None):
         args["Args"]["server_wait_time"],
     )
 
+    """
     if not server_initialized:
         kill_process_tree(server_process.pid)
         task.upload_artifact(name="vLLM server log", artifact_object=server_log)
@@ -70,5 +74,7 @@ def main(configurations=None):
     task.upload_artifact(name="guidellm guidance report", artifact_object=report.to_json())
     task.upload_artifact(name="vLLM server log", artifact_object=server_log)
 
+    """
+
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()

From 6ed6862ee10dd36025dc03e5738622966d2191d4 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 11:50:30 +0000
Subject: [PATCH 10/91] simple vllm

---
 src/automation/vllm/server.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py
index 6036d65..6d59674 100644
--- a/src/automation/vllm/server.py
+++ b/src/automation/vllm/server.py
@@ -25,6 +25,7 @@ def start_vllm_server(
 
     parsed_target = urlparse(target)
 
+    """
     server_command = [
         f"{vllm_path}", "serve", 
         model_id,
@@ -32,6 +33,13 @@ def start_vllm_server(
         "--port", str(parsed_target.port),
         "--tensor-parallel-size", str(num_gpus)
     ]
+    """
+
+    server_command = [
+        f"{vllm_path}", "serve", 
+        "Qwen/Qwen2.5-1.5B-Instruct",
+    ]
+
 
     subprocess_env = os.environ.copy()
 
@@ -64,4 +72,4 @@ def start_vllm_server(
     if server_initialized:
         return server_process, True, server_log_file_name
     else:
-        return server_process, False, server_log_file_name
\ No newline at end of file
+        return server_process, False, server_log_file_name

From b3f55bc94e6aa055b6b5b958e91ff3fa92b1effa Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 12:19:25 +0000
Subject: [PATCH 11/91] skip vllm

---
 examples/guidellm_example.py                    | 3 ++-
 src/automation/tasks/scripts/guidellm_script.py | 5 +----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index 8cf2061..f632c33 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -11,7 +11,8 @@
     backend="aiohttp_server",
     GUIDELLM__MAX_CONCURRENCY=256,
     GUIDELLM__REQUEST_TIMEOUT=21600,
-    target="http://localhost:8000/v1",
+    #target="http://localhost:8000/v1",
+    target="http://192.130.2.20:8000/v1",
     data_type="emulated",
     max_seconds=30,
     data="prompt_tokens=512,generated_tokens=256",
diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 3932b84..d1d7330 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -9,8 +9,6 @@
 def main(configurations=None):
     task = Task.current_task()
 
-    """
-
     args = task.get_parameters_as_dict(cast=True)
     
     if configurations is None:
@@ -54,12 +52,12 @@ def main(configurations=None):
         args["Args"]["server_wait_time"],
     )
 
-    """
     if not server_initialized:
         kill_process_tree(server_process.pid)
         task.upload_artifact(name="vLLM server log", artifact_object=server_log)
         raise AssertionError("Server failed to intialize")
 
+    """
     # Parse through environment variables
     for k, v in environment_args.items():
         os.environ[k] = str(v)
@@ -74,7 +72,6 @@ def main(configurations=None):
     task.upload_artifact(name="guidellm guidance report", artifact_object=report.to_json())
     task.upload_artifact(name="vLLM server log", artifact_object=server_log)
 
-    """
 
 if __name__ == '__main__':
     main()

From 3a709da6f7e8b606f3a71db46a97c4b1eb47f689 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 13:07:01 +0000
Subject: [PATCH 12/91] pause vllm

---
 .../tasks/scripts/guidellm_script.py          | 110 +++++++++++++-----
 1 file changed, 79 insertions(+), 31 deletions(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index d1d7330..aec6598 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -1,35 +1,44 @@
-
 import os
 from clearml import Task
 from automation.utils import resolve_model_id, cast_args, kill_process_tree
 from automation.vllm import start_vllm_server
 from pyhocon import ConfigFactory
 
-
-def main(configurations=None):
+def main():
     task = Task.current_task()
 
     args = task.get_parameters_as_dict(cast=True)
     
-    if configurations is None:
-        guidellm_args = ConfigFactory.parse_string(task.get_configuration_object("GuideLLM"))
-    
-        environment_args = task.get_configuration_object("environment")
-        if environment_args is None:
-            environment_args = {}
-        else:
-            environment_args = ConfigFactory.parse_string(environment_args)
+    raw_config = task.get_configuration_object("GuideLLM")
+    if raw_config is None:
+        print("[DEBUG] `GuideLLM` config not found in configuration — checking parameters as fallback")
+        raw_config = task.get_parameters_as_dict().get("GuideLLM")
+        if raw_config is None:
+            raise RuntimeError("GuideLLM config is None. This likely means `get_configurations()` is not returning it or it's not passed via parameters.")
+        guidellm_args = ConfigFactory.from_dict(raw_config)
+    else:
+        guidellm_args = ConfigFactory.parse_string(raw_config)
+
+    def clean_hocon_value(v):
+        if isinstance(v, str) and v.startswith('"') and v.endswith('"'):
+            return v[1:-1]
+        return v
+
+    guidellm_args = {k: clean_hocon_value(v) for k, v in guidellm_args.items()}
+
+    print("[DEBUG] Guidellm_Args:", guidellm_args)
+
+    environment_args = task.get_configuration_object("environment")
+    if environment_args is None:
+        environment_args = {}
+    else:
+        environment_args = ConfigFactory.parse_string(environment_args)
     
-        vllm_args = task.get_configuration_object("vLLM")
-        if vllm_args is None:
-            vllm_args = {}
-        else:
-            vllm_args = ConfigFactory.parse_string(vllm_args)
+    vllm_args = task.get_configuration_object("vLLM")
+    if vllm_args is None:
+        vllm_args = {}
     else:
-        guidellm_args = configurations.get("GuideLLM", {})
-        environment_args = configurations.get("environment", {})
-        vllm_args = configurations.get("vLLM", {})
-        
+        vllm_args = ConfigFactory.parse_string(vllm_args)
 
     clearml_model = args["Args"]["clearml_model"]
     if isinstance(clearml_model, str):
@@ -39,9 +48,11 @@ def main(configurations=None):
     if isinstance(force_download, str):
         force_download = force_download.lower() == "true"
 
-
     # Resolve model_id
     model_id = resolve_model_id(args["Args"]["model"], clearml_model, force_download)
+
+    gpu_count = int(guidellm_args.get("gpu_count", 1)) 
+
     """
 
     # Start vLLM server
@@ -50,28 +61,65 @@ def main(configurations=None):
         model_id,
         guidellm_args["target"],
         args["Args"]["server_wait_time"],
+        gpu_count,
     )
 
     if not server_initialized:
         kill_process_tree(server_process.pid)
         task.upload_artifact(name="vLLM server log", artifact_object=server_log)
-        raise AssertionError("Server failed to intialize")
-
+        raise AssertionError("Server failed to initialize")
     """
+
     # Parse through environment variables
     for k, v in environment_args.items():
         os.environ[k] = str(v)
 
     guidellm_args["model"] = model_id
 
-    from guidellm import generate_benchmark_report
-    guidellm_args = cast_args(guidellm_args, generate_benchmark_report)
-    report = generate_benchmark_report(**guidellm_args)
-    kill_process_tree(server_process.pid)
-
-    task.upload_artifact(name="guidellm guidance report", artifact_object=report.to_json())
-    task.upload_artifact(name="vLLM server log", artifact_object=server_log)
-
+    import json
+    import asyncio
+    from pathlib import Path
+    from guidellm.benchmark import benchmark_generative_text
+
+    # Ensure output_path is set and consistent
+    output_path = Path(guidellm_args.get("output_path", "guidellm-output.json"))
+    guidellm_args["output_path"] = str(output_path)
+
+    print("[DEBUG] Calling benchmark_generative_text with:")
+    print(json.dumps(guidellm_args, indent=2))
+
+    try:
+        asyncio.run(
+            benchmark_generative_text(
+                target=guidellm_args["target"],
+                backend_type=guidellm_args.get("backend_type", "openai_http"),
+                backend_args=guidellm_args.get("backend_args", None),
+                model=guidellm_args.get("model"),
+                processor=guidellm_args.get("processor", None),
+                processor_args=guidellm_args.get("processor_args", None),
+                data=guidellm_args["data"],
+                data_args=guidellm_args.get("data_args", None),
+                data_sampler=guidellm_args.get("data_sampler", None),
+                rate_type=guidellm_args["rate_type"],
+                rate=guidellm_args.get("rate", None),
+                max_seconds=guidellm_args.get("max_seconds", None),
+                max_requests=guidellm_args.get("max_requests", None),
+                warmup_percent=guidellm_args.get("warmup_percent", None),
+                cooldown_percent=guidellm_args.get("cooldown_percent", None),
+                show_progress=not guidellm_args.get("disable_progress", False),
+                show_progress_scheduler_stats=guidellm_args.get("display_scheduler_stats", False),
+                output_console=not guidellm_args.get("disable_console_outputs", False),
+                output_path=output_path,
+                output_extras=guidellm_args.get("output_extras", None),
+                output_sampling=guidellm_args.get("output_sampling", None),
+                random_seed=guidellm_args.get("random_seed", 42),
+            )
+        )
+
+    finally:
+        task.upload_artifact(name="guidellm guidance report", artifact_object=output_path)
+        task.upload_artifact(name="vLLM server log", artifact_object=server_log)
+        kill_process_tree(server_process.pid)
 
 if __name__ == '__main__':
     main()

From 02cac57e2410372147ab297d9bedc40376782134 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 13:14:57 +0000
Subject: [PATCH 13/91] update benchmark report

---
 src/automation/tasks/scripts/guidellm_script.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index aec6598..242e360 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -79,7 +79,8 @@ def clean_hocon_value(v):
     import json
     import asyncio
     from pathlib import Path
-    from guidellm.benchmark import benchmark_generative_text
+    #from guidellm.benchmark import benchmark_generative_text
+    from guidellm.benchmark.output import GenerativeBenchmarksReport
 
     # Ensure output_path is set and consistent
     output_path = Path(guidellm_args.get("output_path", "guidellm-output.json"))
@@ -88,6 +89,7 @@ def clean_hocon_value(v):
     print("[DEBUG] Calling benchmark_generative_text with:")
     print(json.dumps(guidellm_args, indent=2))
 
+    """
     try:
         asyncio.run(
             benchmark_generative_text(
@@ -120,6 +122,7 @@ def clean_hocon_value(v):
         task.upload_artifact(name="guidellm guidance report", artifact_object=output_path)
         task.upload_artifact(name="vLLM server log", artifact_object=server_log)
         kill_process_tree(server_process.pid)
+    """
 
 if __name__ == '__main__':
     main()

From a85bb4fd8dd662754f8aab3e3fe21e69fb947148 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 13:18:51 +0000
Subject: [PATCH 14/91] update ip

---
 examples/guidellm_example.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index f632c33..b2b5a91 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -12,7 +12,7 @@
     GUIDELLM__MAX_CONCURRENCY=256,
     GUIDELLM__REQUEST_TIMEOUT=21600,
     #target="http://localhost:8000/v1",
-    target="http://192.130.2.20:8000/v1",
+    target="http://192.130.2.29:8000/v1",
     data_type="emulated",
     max_seconds=30,
     data="prompt_tokens=512,generated_tokens=256",

From c3af0cf01c6bf61a87b0e59c4418267eb5d01ab1 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 13:25:37 +0000
Subject: [PATCH 15/91] update branch

---
 examples/guidellm_example.py      | 1 +
 src/automation/tasks/base_task.py | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index b2b5a91..99f8775 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -17,6 +17,7 @@
     max_seconds=30,
     data="prompt_tokens=512,generated_tokens=256",
     vllm_kwargs={"enable-chunked-prefill": True}
+    branch = "update_guidellm"
 )
 
 #from clearml import Task
diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py
index d886599..4c74e85 100644
--- a/src/automation/tasks/base_task.py
+++ b/src/automation/tasks/base_task.py
@@ -14,6 +14,7 @@ def __init__(
         project_name: str,
         task_name: str,
         docker_image: str,
+        branch: str = "main"
         packages: Optional[Sequence[str]]=None,
         task_type: str="training",
     ):
@@ -29,6 +30,7 @@ def __init__(
         self.packages = packages
         self.task_type = task_type
         self.task = None
+        self.branch= branch
         self.script_path = None
         self.callable_artifacts = None
   
@@ -91,7 +93,7 @@ def create_task(self):
             add_task_init_call=True,
             script=self.script_path,
             repo="https://github.com/neuralmagic/research.git",
-            branch="main",
+            branch=self.branch,
         )
         self.task.output_uri = DEFAULT_OUTPUT_URI
         self.set_arguments()

From ede7482fad3497af2e0a2b4d2fb35abd0ac8ef10 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 13:27:06 +0000
Subject: [PATCH 16/91] added base task param

---
 src/automation/tasks/base_task.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py
index 4c74e85..010e4a2 100644
--- a/src/automation/tasks/base_task.py
+++ b/src/automation/tasks/base_task.py
@@ -14,7 +14,7 @@ def __init__(
         project_name: str,
         task_name: str,
         docker_image: str,
-        branch: str = "main"
+        branch: str = "main",
         packages: Optional[Sequence[str]]=None,
         task_type: str="training",
     ):

From 87496ea0c6a20bab32cfc310d0326564c0841524 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 13:52:29 +0000
Subject: [PATCH 17/91] retry branch name

---
 examples/guidellm_example.py      | 2 +-
 src/automation/tasks/base_task.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index 99f8775..e67def3 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -16,8 +16,8 @@
     data_type="emulated",
     max_seconds=30,
     data="prompt_tokens=512,generated_tokens=256",
+    branch = "update_guidellm",
     vllm_kwargs={"enable-chunked-prefill": True}
-    branch = "update_guidellm"
 )
 
 #from clearml import Task
diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py
index 010e4a2..95e07ba 100644
--- a/src/automation/tasks/base_task.py
+++ b/src/automation/tasks/base_task.py
@@ -14,7 +14,7 @@ def __init__(
         project_name: str,
         task_name: str,
         docker_image: str,
-        branch: str = "main",
+        branch: str,
         packages: Optional[Sequence[str]]=None,
         task_type: str="training",
     ):

From b64ffd8eee841922a3916c6ff28d695286b3b843 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 13:54:09 +0000
Subject: [PATCH 18/91] repo branch

---
 src/automation/tasks/guidellm.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index a8560a8..10f3f80 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -17,6 +17,7 @@ class GuideLLMTask(BaseTask):
     def __init__(
         self,
         project_name: str,
+        branch: str,
         task_name: str,
         model: str,
         server_wait_time: int=DEFAULT_SERVER_WAIT_TIME,
@@ -52,6 +53,7 @@ def __init__(
             docker_image=docker_image,
             packages=packages,
             task_type=task_type,
+            branch = branch,
         )
 
         # Check for conflicts in configs and constructor arguments

From 7dc5e48c84cdc83a465c8efd667a0aef2c92a1a8 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 13:56:07 +0000
Subject: [PATCH 19/91] readd branch

---
 examples/guidellm_example.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index e67def3..3e078e9 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -16,7 +16,8 @@
     data_type="emulated",
     max_seconds=30,
     data="prompt_tokens=512,generated_tokens=256",
-    branch = "update_guidellm",
+    #branch = "update_guidellm",
+    #branch = "update_guidellm",
     vllm_kwargs={"enable-chunked-prefill": True}
 )
 

From 2d05c640c4bd8e65dad63eda0ca862a6551834bb Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 14:00:11 +0000
Subject: [PATCH 20/91] branch in base task

---
 src/automation/tasks/guidellm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index 10f3f80..f1a8904 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -17,7 +17,7 @@ class GuideLLMTask(BaseTask):
     def __init__(
         self,
         project_name: str,
-        branch: str,
+        #branch: Optional[str],
         task_name: str,
         model: str,
         server_wait_time: int=DEFAULT_SERVER_WAIT_TIME,
@@ -53,7 +53,7 @@ def __init__(
             docker_image=docker_image,
             packages=packages,
             task_type=task_type,
-            branch = branch,
+            #branch = branch,
         )
 
         # Check for conflicts in configs and constructor arguments

From 60e6e9ed3ca8bf8a0f93dedbc536fdb00cbff0cb Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 14:01:49 +0000
Subject: [PATCH 21/91] optional branch

---
 src/automation/tasks/guidellm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index f1a8904..461c935 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -17,7 +17,7 @@ class GuideLLMTask(BaseTask):
     def __init__(
         self,
         project_name: str,
-        #branch: Optional[str],
+        branch: str="main" ,
         task_name: str,
         model: str,
         server_wait_time: int=DEFAULT_SERVER_WAIT_TIME,
@@ -53,7 +53,7 @@ def __init__(
             docker_image=docker_image,
             packages=packages,
             task_type=task_type,
-            #branch = branch,
+            branch = branch,
         )
 
         # Check for conflicts in configs and constructor arguments

From ee4d7c94ccfa0745870f5fa4dda0da70ea465d3f Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 14:02:44 +0000
Subject: [PATCH 22/91] add branch choice

---
 src/automation/tasks/base_task.py | 6 +++---
 src/automation/tasks/guidellm.py  | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py
index 95e07ba..cf70ce2 100644
--- a/src/automation/tasks/base_task.py
+++ b/src/automation/tasks/base_task.py
@@ -14,7 +14,7 @@ def __init__(
         project_name: str,
         task_name: str,
         docker_image: str,
-        branch: str,
+        branch: Optional[str] = "main",
         packages: Optional[Sequence[str]]=None,
         task_type: str="training",
     ):
@@ -52,8 +52,8 @@ def process_config(self, config):
             return yaml.safe_load(open(STANDARD_CONFIGS[config], "r"))
         elif os.path.exists(config):
             return yaml.safe_load(open(config, "r"))
-        elif os.path.exists(os.path.join("..", "standatrds", config)):
-            return yaml.safe_load(open(os.path.join("..", "standatrds", config)), "r")
+        elif os.path.exists(os.path.join("..", "standards", config)):
+            return yaml.safe_load(open(os.path.join("..", "standards", config)), "r")
         else:
             return yaml.safe_load(config)
 
diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index 461c935..17af41f 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -17,13 +17,13 @@ class GuideLLMTask(BaseTask):
     def __init__(
         self,
         project_name: str,
-        branch: str="main" ,
         task_name: str,
         model: str,
         server_wait_time: int=DEFAULT_SERVER_WAIT_TIME,
         docker_image: str=DEFAULT_DOCKER_IMAGE,
         packages: Optional[Sequence[str]]=None,
         clearml_model: bool=False,
+        branch: str="main",
         task_type: str="training",
         vllm_kwargs: dict={},
         target: str="http://localhost:8000/v1",

From 998a8bcadff0a177b4dcef46fcb1b3754851e537 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 14:04:25 +0000
Subject: [PATCH 23/91] include benchmark

---
 src/automation/tasks/scripts/guidellm_script.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 242e360..822e5c3 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -89,6 +89,8 @@ def clean_hocon_value(v):
     print("[DEBUG] Calling benchmark_generative_text with:")
     print(json.dumps(guidellm_args, indent=2))
 
+    GenerativeBenchmarksReport()
+
     """
     try:
         asyncio.run(

From 6944cb496f51ba3f408874205d1c838db084353c Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 14:12:40 +0000
Subject: [PATCH 24/91] refactor default

---
 examples/guidellm_example.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index 3e078e9..85a09c8 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -16,7 +16,7 @@
     data_type="emulated",
     max_seconds=30,
     data="prompt_tokens=512,generated_tokens=256",
-    #branch = "update_guidellm",
+    branch = "update_guidellm",
     #branch = "update_guidellm",
     vllm_kwargs={"enable-chunked-prefill": True}
 )

From 6e4a5d59ed33bd030f7faa0f08a3a138d21f6def Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Fri, 27 Jun 2025 14:42:49 +0000
Subject: [PATCH 25/91] moved generate text

---
 src/automation/tasks/scripts/guidellm_script.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 822e5c3..6a35c40 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -89,9 +89,9 @@ def clean_hocon_value(v):
     print("[DEBUG] Calling benchmark_generative_text with:")
     print(json.dumps(guidellm_args, indent=2))
 
-    GenerativeBenchmarksReport()
+    #GenerativeBenchmarksReport()
+    from guidellm.benchmark.entrypoints import benchmark_generative_text
 
-    """
     try:
         asyncio.run(
             benchmark_generative_text(
@@ -124,7 +124,6 @@ def clean_hocon_value(v):
         task.upload_artifact(name="guidellm guidance report", artifact_object=output_path)
         task.upload_artifact(name="vLLM server log", artifact_object=server_log)
         kill_process_tree(server_process.pid)
-    """
 
 if __name__ == '__main__':
     main()

From 41f3f217a8e322cb713a890d69533deb912603fe Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 12:21:21 +0000
Subject: [PATCH 26/91] test

---
 examples/guidellm_example.py                    | 6 +++---
 src/automation/configs.py                       | 1 +
 src/automation/tasks/base_task.py               | 4 ++--
 src/automation/tasks/guidellm.py                | 4 ++--
 src/automation/tasks/scripts/guidellm_script.py | 3 +++
 5 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index 85a09c8..ff6de48 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -6,18 +6,18 @@
 task = GuideLLMTask(
     project_name="alexandre_debug",
     task_name="test_guidellm_task",
-    model="meta-llama/Llama-3.2-1B-Instruct",
+    #model="meta-llama/Llama-3.2-1B-Instruct",
+    model="Qwen/Qwen2.5-1.5B-Instruct",
     rate_type="throughput",
     backend="aiohttp_server",
     GUIDELLM__MAX_CONCURRENCY=256,
     GUIDELLM__REQUEST_TIMEOUT=21600,
     #target="http://localhost:8000/v1",
-    target="http://192.130.2.29:8000/v1",
+    target="http://fed73cc1-us-east.lb.appdomain.cloud/v1",
     data_type="emulated",
     max_seconds=30,
     data="prompt_tokens=512,generated_tokens=256",
     branch = "update_guidellm",
-    #branch = "update_guidellm",
     vllm_kwargs={"enable-chunked-prefill": True}
 )
 
diff --git a/src/automation/configs.py b/src/automation/configs.py
index 22b5abd..5c4bf22 100644
--- a/src/automation/configs.py
+++ b/src/automation/configs.py
@@ -2,3 +2,4 @@
 DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest"
 #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
 DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"
+DEFAULT_RESEARCH_BRANCH = "main"
diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py
index cf70ce2..9e0fa7a 100644
--- a/src/automation/tasks/base_task.py
+++ b/src/automation/tasks/base_task.py
@@ -1,6 +1,6 @@
 from clearml import Task
 from typing import Sequence, Optional
-from automation.configs import DEFAULT_OUTPUT_URI
+from automation.configs import DEFAULT_OUTPUT_URI, DEFAULT_RESEARCH_BRANCH
 from automation.standards import STANDARD_CONFIGS
 import yaml
 import os
@@ -14,7 +14,7 @@ def __init__(
         project_name: str,
         task_name: str,
         docker_image: str,
-        branch: Optional[str] = "main",
+        branch: Optional[str] = DEFAULT_RESEARCH_BRANCH,
         packages: Optional[Sequence[str]]=None,
         task_type: str="training",
     ):
diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index 17af41f..a85eb83 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -1,5 +1,5 @@
 from automation.tasks import BaseTask
-from automation.configs import DEFAULT_DOCKER_IMAGE
+from automation.configs import DEFAULT_DOCKER_IMAGE, DEFAULT_RESEARCH_BRANCH
 from typing import Optional, Sequence
 import os
 
@@ -23,7 +23,7 @@ def __init__(
         docker_image: str=DEFAULT_DOCKER_IMAGE,
         packages: Optional[Sequence[str]]=None,
         clearml_model: bool=False,
-        branch: str="main",
+        branch: str= DEFAULT_RESEARCH_BRANCH,
         task_type: str="training",
         vllm_kwargs: dict={},
         target: str="http://localhost:8000/v1",
diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 6a35c40..0c34137 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -92,6 +92,9 @@ def clean_hocon_value(v):
     #GenerativeBenchmarksReport()
     from guidellm.benchmark.entrypoints import benchmark_generative_text
 
+    import time 
+    time.sleep(300)
+
     try:
         asyncio.run(
             benchmark_generative_text(

From 850fd213be8fcd217cbfb995064b95dceb723168 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 12:31:02 +0000
Subject: [PATCH 27/91] add debug

---
 src/automation/tasks/scripts/guidellm_script.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 0c34137..bdeb6a2 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -90,10 +90,17 @@ def clean_hocon_value(v):
     print(json.dumps(guidellm_args, indent=2))
 
     #GenerativeBenchmarksReport()
+    import os
+    import sys
+    executable_path = os.path.dirname(sys.executable)
+    vllm_path = os.path.join(executable_path, "vllm")
+    print(f"The vllm path is: {vllm_path}")
+
     from guidellm.benchmark.entrypoints import benchmark_generative_text
+    from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
 
-    import time 
-    time.sleep(300)
+    #import time 
+    #time.sleep(300)
 
     try:
         asyncio.run(
@@ -124,6 +131,8 @@ def clean_hocon_value(v):
         )
 
     finally:
+        import time 
+        time.sleep(300)
         task.upload_artifact(name="guidellm guidance report", artifact_object=output_path)
         task.upload_artifact(name="vLLM server log", artifact_object=server_log)
         kill_process_tree(server_process.pid)

From 5e876746b3e82f7946217f4680dc87ae1a06b622 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 12:38:58 +0000
Subject: [PATCH 28/91] add os lib

---
 src/automation/tasks/scripts/guidellm_script.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index bdeb6a2..d62e97b 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -1,4 +1,5 @@
 import os
+import sys
 from clearml import Task
 from automation.utils import resolve_model_id, cast_args, kill_process_tree
 from automation.vllm import start_vllm_server
@@ -90,8 +91,6 @@ def clean_hocon_value(v):
     print(json.dumps(guidellm_args, indent=2))
 
     #GenerativeBenchmarksReport()
-    import os
-    import sys
     executable_path = os.path.dirname(sys.executable)
     vllm_path = os.path.join(executable_path, "vllm")
     print(f"The vllm path is: {vllm_path}")

From c9b63a822f91b6d9313e76713ae2f87a0bfbc451 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 13:13:42 +0000
Subject: [PATCH 29/91] use default scenario

---
 .../tasks/scripts/guidellm_script.py          | 23 +------------------
 1 file changed, 1 insertion(+), 22 deletions(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index d62e97b..9825e28 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -104,28 +104,7 @@ def clean_hocon_value(v):
     try:
         asyncio.run(
             benchmark_generative_text(
-                target=guidellm_args["target"],
-                backend_type=guidellm_args.get("backend_type", "openai_http"),
-                backend_args=guidellm_args.get("backend_args", None),
-                model=guidellm_args.get("model"),
-                processor=guidellm_args.get("processor", None),
-                processor_args=guidellm_args.get("processor_args", None),
-                data=guidellm_args["data"],
-                data_args=guidellm_args.get("data_args", None),
-                data_sampler=guidellm_args.get("data_sampler", None),
-                rate_type=guidellm_args["rate_type"],
-                rate=guidellm_args.get("rate", None),
-                max_seconds=guidellm_args.get("max_seconds", None),
-                max_requests=guidellm_args.get("max_requests", None),
-                warmup_percent=guidellm_args.get("warmup_percent", None),
-                cooldown_percent=guidellm_args.get("cooldown_percent", None),
-                show_progress=not guidellm_args.get("disable_progress", False),
-                show_progress_scheduler_stats=guidellm_args.get("display_scheduler_stats", False),
-                output_console=not guidellm_args.get("disable_console_outputs", False),
-                output_path=output_path,
-                output_extras=guidellm_args.get("output_extras", None),
-                output_sampling=guidellm_args.get("output_sampling", None),
-                random_seed=guidellm_args.get("random_seed", 42),
+                GenerativeTextScenario,
             )
         )
 

From 4d68ea81e9cdb5b6e1fedc8ec04664d7a5438981 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 13:28:50 +0000
Subject: [PATCH 30/91] benchmark with scenario

---
 src/automation/tasks/scripts/guidellm_script.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 9825e28..05c7749 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -95,7 +95,7 @@ def clean_hocon_value(v):
     vllm_path = os.path.join(executable_path, "vllm")
     print(f"The vllm path is: {vllm_path}")
 
-    from guidellm.benchmark.entrypoints import benchmark_generative_text
+    from guidellm.benchmark.entrypoints import benchmark_generative_text, benchmark_with_scenario
     from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
 
     #import time 
@@ -103,7 +103,7 @@ def clean_hocon_value(v):
 
     try:
         asyncio.run(
-            benchmark_generative_text(
+            benchmark_with_scenario(
                 GenerativeTextScenario,
             )
         )

From 0f07b28009aeca981d5155d7bfa22ae80e388d27 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 14:40:07 +0000
Subject: [PATCH 31/91] overlap with guidellm vars

---
 src/automation/tasks/scripts/guidellm_script.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 05c7749..967c5b5 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -100,11 +100,17 @@ def clean_hocon_value(v):
 
     #import time 
     #time.sleep(300)
+    current_scenario = GenerativeTextScenario
+    overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args)
+    for element  in overlap_keys:
+        element_field_info = current_scenario.model_fields[element]
+        element_field_info.default = guidellm_args[element]
+        current_scenario.model_fields[element] = element_field_info
 
     try:
         asyncio.run(
             benchmark_with_scenario(
-                GenerativeTextScenario,
+                current_scenario,
             )
         )
 

From 6a6705014d77e034bbace447ae3e35f9cdc4ddf0 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 14:52:03 +0000
Subject: [PATCH 32/91] check model and target

---
 src/automation/tasks/scripts/guidellm_script.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 967c5b5..ef956a9 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -101,12 +101,17 @@ def clean_hocon_value(v):
     #import time 
     #time.sleep(300)
     current_scenario = GenerativeTextScenario
+    print(current_scenario.model_fields["target"])
+    print(current_scenario.model_fields["model"])
     overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args)
     for element  in overlap_keys:
         element_field_info = current_scenario.model_fields[element]
         element_field_info.default = guidellm_args[element]
         current_scenario.model_fields[element] = element_field_info
 
+    print(current_scenario.model_fields["target"])
+    print(current_scenario.model_fields["model"])
+
     try:
         asyncio.run(
             benchmark_with_scenario(

From 72094b4720e92a8965875bce8e9f83365d143e38 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 15:07:35 +0000
Subject: [PATCH 33/91] add debugs

---
 src/automation/tasks/scripts/guidellm_script.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index ef956a9..e6b8a5d 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -105,9 +105,11 @@ def clean_hocon_value(v):
     print(current_scenario.model_fields["model"])
     overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args)
     for element  in overlap_keys:
+        print(element)
         element_field_info = current_scenario.model_fields[element]
         element_field_info.default = guidellm_args[element]
         current_scenario.model_fields[element] = element_field_info
+        print(element_field_info.annotation)
 
     print(current_scenario.model_fields["target"])
     print(current_scenario.model_fields["model"])

From 10180a38d9baed7c87b7d325ca6f73a991b3a870 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 15:25:24 +0000
Subject: [PATCH 34/91] list keys that overlap

---
 src/automation/tasks/scripts/guidellm_script.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index e6b8a5d..69f0065 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -105,11 +105,12 @@ def clean_hocon_value(v):
     print(current_scenario.model_fields["model"])
     overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args)
     for element  in overlap_keys:
-        print(element)
+        #print(element)
         element_field_info = current_scenario.model_fields[element]
         element_field_info.default = guidellm_args[element]
         current_scenario.model_fields[element] = element_field_info
-        print(element_field_info.annotation)
+        #print(element_field_info.annotation)
+    print(overlap_keys)
 
     print(current_scenario.model_fields["target"])
     print(current_scenario.model_fields["model"])

From 9191f13210fe732ae9f5e0133bab72a74621d619 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 18:17:30 +0000
Subject: [PATCH 35/91] only replace model

---
 src/automation/tasks/scripts/guidellm_script.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 69f0065..06f5e9d 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -104,6 +104,7 @@ def clean_hocon_value(v):
     print(current_scenario.model_fields["target"])
     print(current_scenario.model_fields["model"])
     overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args)
+    overlap_keys = ["model"]
     for element  in overlap_keys:
         #print(element)
         element_field_info = current_scenario.model_fields[element]

From 1b0e4a4ec765cf86bd80c290f35a7a9c168648fe Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 22:56:33 +0000
Subject: [PATCH 36/91] update with scenario

---
 examples/guidellm_example.py                    |  4 ++--
 src/automation/tasks/scripts/guidellm_script.py | 15 ++++++++++-----
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index ff6de48..6fc65c1 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -1,5 +1,3 @@
-import os
-import sys
 
 from automation.tasks import GuideLLMTask
 
@@ -25,6 +23,8 @@
 #task = Task.init(project_name="alexandre_debug", task_name="test_guidellm_task")
 task.execute_remotely("remote-upgrade-default")
 #task.execute_locally()
+import os
+import sys
 executable_path = os.path.dirname(sys.executable)
 vllm_path = os.path.join(executable_path, "vllm")
 print(f"The vllm path is: {vllm_path}")
diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 06f5e9d..6833b88 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -82,6 +82,8 @@ def clean_hocon_value(v):
     from pathlib import Path
     #from guidellm.benchmark import benchmark_generative_text
     from guidellm.benchmark.output import GenerativeBenchmarksReport
+    from guidellm.benchmark.entrypoints import benchmark_generative_text, benchmark_with_scenario
+    from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
 
     # Ensure output_path is set and consistent
     output_path = Path(guidellm_args.get("output_path", "guidellm-output.json"))
@@ -95,16 +97,16 @@ def clean_hocon_value(v):
     vllm_path = os.path.join(executable_path, "vllm")
     print(f"The vllm path is: {vllm_path}")
 
-    from guidellm.benchmark.entrypoints import benchmark_generative_text, benchmark_with_scenario
-    from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
+    current_scenario = GenerativeTextScenario.from_builtin("chat", dict(guidellm_args))
 
     #import time 
     #time.sleep(300)
+    """
     current_scenario = GenerativeTextScenario
     print(current_scenario.model_fields["target"])
     print(current_scenario.model_fields["model"])
     overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args)
-    overlap_keys = ["model"]
+    #overlap_keys = ["model"]
     for element  in overlap_keys:
         #print(element)
         element_field_info = current_scenario.model_fields[element]
@@ -116,16 +118,19 @@ def clean_hocon_value(v):
     print(current_scenario.model_fields["target"])
     print(current_scenario.model_fields["model"])
 
+    current_scenario = GenerativeTextScenario
+    """
+
     try:
         asyncio.run(
             benchmark_with_scenario(
                 current_scenario,
+                output_path= output_path,
+                output_extras= None
             )
         )
 
     finally:
-        import time 
-        time.sleep(300)
         task.upload_artifact(name="guidellm guidance report", artifact_object=output_path)
         task.upload_artifact(name="vLLM server log", artifact_object=server_log)
         kill_process_tree(server_process.pid)

From 7515a617f4845f64d00c60d7e9eff53e343f4946 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 23:23:39 +0000
Subject: [PATCH 37/91] readd default scenario

---
 src/automation/tasks/scripts/guidellm_script.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 6833b88..7e7f58d 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -97,7 +97,9 @@ def clean_hocon_value(v):
     vllm_path = os.path.join(executable_path, "vllm")
     print(f"The vllm path is: {vllm_path}")
 
-    current_scenario = GenerativeTextScenario.from_builtin("chat", dict(guidellm_args))
+    default_scenario = get_builtin_scenarios()[0]
+
+    current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args))
 
     #import time 
     #time.sleep(300)

From e6318f503197e385b99d960327b93f2d13684fc9 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 23:28:20 +0000
Subject: [PATCH 38/91] readd default scenario

---
 src/automation/tasks/scripts/guidellm_script.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 7e7f58d..4453c1b 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -97,6 +97,8 @@ def clean_hocon_value(v):
     vllm_path = os.path.join(executable_path, "vllm")
     print(f"The vllm path is: {vllm_path}")
 
+
+    print(get_builtin_scenarios())
     default_scenario = get_builtin_scenarios()[0]
 
     current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args))

From 9f61d6e594b42babd8f92dc70321075f8a6f0672 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Mon, 30 Jun 2025 23:29:46 +0000
Subject: [PATCH 39/91] pin to main

---
 src/automation/tasks/guidellm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index a85eb83..2ca454c 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -4,7 +4,7 @@
 import os
 
 DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
-GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git"
+GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main"
 
 class GuideLLMTask(BaseTask):
 

From 8c8c23e11000bbfe976761a7cb2da2f4994eb9d4 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 10:31:49 +0000
Subject: [PATCH 40/91] readd vllm server

---
 src/automation/tasks/scripts/guidellm_script.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 4453c1b..7b6e3bb 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -54,8 +54,6 @@ def clean_hocon_value(v):
 
     gpu_count = int(guidellm_args.get("gpu_count", 1)) 
 
-    """
-
     # Start vLLM server
     server_process, server_initialized, server_log = start_vllm_server(
         vllm_args,
@@ -69,7 +67,6 @@ def clean_hocon_value(v):
         kill_process_tree(server_process.pid)
         task.upload_artifact(name="vLLM server log", artifact_object=server_log)
         raise AssertionError("Server failed to initialize")
-    """
 
     # Parse through environment variables
     for k, v in environment_args.items():

From ec725d1377c589f8b5be50a86c5b5860cbd830b7 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 11:18:31 +0000
Subject: [PATCH 41/91] updated vllm server

---
 src/automation/vllm/server.py | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py
index 6d59674..75c331d 100644
--- a/src/automation/vllm/server.py
+++ b/src/automation/vllm/server.py
@@ -14,32 +14,29 @@ def start_vllm_server(
     vllm_args, 
     model_id, 
     target, 
-    server_wait_time, 
+    server_wait_time,
+    gpu_count,
 ):
     task = Task.current_task()
 
     executable_path = os.path.dirname(sys.executable)
     vllm_path = os.path.join(executable_path, "vllm")
 
-    num_gpus = torch.cuda.device_count()
+    available_gpus = list(range(torch.cuda.device_count()))
+    selected_gpus = available_gpus[:gpu_count]
+
+    subprocess_env = os.environ.copy()
+    subprocess_env["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in selected_gpus)
 
     parsed_target = urlparse(target)
 
-    """
     server_command = [
         f"{vllm_path}", "serve", 
         model_id,
         "--host", parsed_target.hostname, 
         "--port", str(parsed_target.port),
-        "--tensor-parallel-size", str(num_gpus)
+        "--tensor-parallel-size", str(gpu_count),
     ]
-    """
-
-    server_command = [
-        f"{vllm_path}", "serve", 
-        "Qwen/Qwen2.5-1.5B-Instruct",
-    ]
-
 
     subprocess_env = os.environ.copy()
 
@@ -53,6 +50,7 @@ def start_vllm_server(
 
     server_log_file_name = f"{SERVER_LOG_PREFIX}_{task.id}.txt"
     server_log_file = open(server_log_file_name, "w")
+    print("Server command:", " ".join(server_command))
     server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env)
 
     delay = 5
@@ -60,6 +58,7 @@ def start_vllm_server(
     for _ in range(server_wait_time // delay):
         try:
             response = requests.get(target + "/models")
+            print(f"response: {response}")
             if response.status_code == 200:
                 print("Server initialized")
                 server_initialized = True
@@ -72,4 +71,4 @@ def start_vllm_server(
     if server_initialized:
         return server_process, True, server_log_file_name
     else:
-        return server_process, False, server_log_file_name
+        return server_process, False, server_log_file_name
\ No newline at end of file

From 5b223098614a2411c934c517a85dd055f18e52cc Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 12:22:55 +0000
Subject: [PATCH 42/91] print the input vars

---
 src/automation/tasks/scripts/guidellm_script.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 7b6e3bb..e092803 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -54,6 +54,12 @@ def clean_hocon_value(v):
 
     gpu_count = int(guidellm_args.get("gpu_count", 1)) 
 
+    print(vllm_args)
+    print(model_id)
+    print(guidellm_args["target"])
+    print(args["Args"]["server_wait_time"])
+    print(gpu_count)
+
     # Start vLLM server
     server_process, server_initialized, server_log = start_vllm_server(
         vllm_args,

From 5e8053a4867a57eea1a194a286c04ab7775e7501 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 12:34:56 +0000
Subject: [PATCH 43/91] remove gpu count

---
 src/automation/tasks/scripts/guidellm_script.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index e092803..e9612d2 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -66,7 +66,7 @@ def clean_hocon_value(v):
         model_id,
         guidellm_args["target"],
         args["Args"]["server_wait_time"],
-        gpu_count,
+        #gpu_count,
     )
 
     if not server_initialized:

From af3ebaa2addcf263997a944374e70349edb4f545 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 14:12:59 +0000
Subject: [PATCH 44/91] simple path

---
 src/automation/vllm/server.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py
index 75c331d..0ebf48a 100644
--- a/src/automation/vllm/server.py
+++ b/src/automation/vllm/server.py
@@ -29,7 +29,13 @@ def start_vllm_server(
     subprocess_env["CUDA_VISIBLE_DEVICES"] = ",".join(str(i) for i in selected_gpus)
 
     parsed_target = urlparse(target)
+    print(f"vllm path is: {vllm_path}")
+    server_command = [
+        f"{vllm_path}", "serve", 
+        "Qwen/Qwen2.5-1.5B-Instruct",
+    ]
 
+    """
     server_command = [
         f"{vllm_path}", "serve", 
         model_id,
@@ -37,6 +43,7 @@ def start_vllm_server(
         "--port", str(parsed_target.port),
         "--tensor-parallel-size", str(gpu_count),
     ]
+    """
 
     subprocess_env = os.environ.copy()
 
@@ -71,4 +78,4 @@ def start_vllm_server(
     if server_initialized:
         return server_process, True, server_log_file_name
     else:
-        return server_process, False, server_log_file_name
\ No newline at end of file
+        return server_process, False, server_log_file_name

From 5c4f5b865f3e21390933d5563801ef94f2252e8f Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 14:37:53 +0000
Subject: [PATCH 45/91] vllm print

---
 src/automation/vllm/server.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py
index 0ebf48a..7eeadad 100644
--- a/src/automation/vllm/server.py
+++ b/src/automation/vllm/server.py
@@ -19,6 +19,8 @@ def start_vllm_server(
 ):
     task = Task.current_task()
 
+    print("Inside start vllm server")
+
     executable_path = os.path.dirname(sys.executable)
     vllm_path = os.path.join(executable_path, "vllm")
 
@@ -35,6 +37,8 @@ def start_vllm_server(
         "Qwen/Qwen2.5-1.5B-Instruct",
     ]
 
+    print(server_command)
+
     """
     server_command = [
         f"{vllm_path}", "serve", 

From b8a1e9f2520cef77d5add2d948225f39efabf274 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 16:22:17 +0000
Subject: [PATCH 46/91] added cwd

---
 src/automation/tasks/scripts/guidellm_script.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index e9612d2..5b9c1cd 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -59,6 +59,7 @@ def clean_hocon_value(v):
     print(guidellm_args["target"])
     print(args["Args"]["server_wait_time"])
     print(gpu_count)
+    print(os.getcwd())
 
     # Start vLLM server
     server_process, server_initialized, server_log = start_vllm_server(

From 0365496f82d55d3145c6baf925023674e95796b1 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 16:48:10 +0000
Subject: [PATCH 47/91] ensure setup uses branch

---
 examples/guidellm_example.py | 2 +-
 setup.py                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index 6fc65c1..6d0dbf4 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -16,7 +16,7 @@
     max_seconds=30,
     data="prompt_tokens=512,generated_tokens=256",
     branch = "update_guidellm",
-    vllm_kwargs={"enable-chunked-prefill": True}
+    #vllm_kwargs={"enable-chunked-prefill": True}
 )
 
 #from clearml import Task
diff --git a/setup.py b/setup.py
index f0df931..c2112da 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
     version="0.1.0",
     author="NM MLR",
     description="Automation tools",
-    url="https://github.com/neuralmagic/research",
+    #url="https://github.com/neuralmagic/research",
     package_dir={"": "src"},
     packages=find_packages(
         "src", include=["automation", "automation.*"], exclude=["*.__pycache__.*"]

From 348fd82617b69251d8d961f2a9b22abbf9db4b53 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 16:52:40 +0000
Subject: [PATCH 48/91] add guide again

---
 src/automation/tasks/base_task.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py
index 9e0fa7a..12ce3bc 100644
--- a/src/automation/tasks/base_task.py
+++ b/src/automation/tasks/base_task.py
@@ -7,7 +7,8 @@
 
 class BaseTask():
 
-    base_packages = ["git+https://github.com/neuralmagic/research.git"]
+    #base_packages = ["git+https://github.com/neuralmagic/research.git"]
+    base_packages = ["git+https://github.com/neuralmagic/research.git@update_guidellm"]
 
     def __init__(
         self,

From cb882af83ce0b97593d71306833f843fdc03fb7b Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 16:58:42 +0000
Subject: [PATCH 49/91] readd gpu count

---
 src/automation/tasks/scripts/guidellm_script.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 5b9c1cd..641c44b 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -67,7 +67,7 @@ def clean_hocon_value(v):
         model_id,
         guidellm_args["target"],
         args["Args"]["server_wait_time"],
-        #gpu_count,
+        gpu_count,
     )
 
     if not server_initialized:

From 464591e62bea7c0ffd1be1cdd93a6e61aaf01598 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 17:14:11 +0000
Subject: [PATCH 50/91] update vllm server

---
 src/automation/vllm/server.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py
index 7eeadad..948c361 100644
--- a/src/automation/vllm/server.py
+++ b/src/automation/vllm/server.py
@@ -32,14 +32,13 @@ def start_vllm_server(
 
     parsed_target = urlparse(target)
     print(f"vllm path is: {vllm_path}")
+    """
     server_command = [
         f"{vllm_path}", "serve", 
         "Qwen/Qwen2.5-1.5B-Instruct",
     ]
-
-    print(server_command)
-
     """
+
     server_command = [
         f"{vllm_path}", "serve", 
         model_id,
@@ -47,8 +46,8 @@ def start_vllm_server(
         "--port", str(parsed_target.port),
         "--tensor-parallel-size", str(gpu_count),
     ]
-    """
 
+    print(server_command)
     subprocess_env = os.environ.copy()
 
     for k, v in vllm_args.items():

From c0d0dba9f7c9555e2af636c9abb2a7b0a831580c Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 17:24:22 +0000
Subject: [PATCH 51/91] revert target

---
 examples/guidellm_example.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index 6d0dbf4..08e042b 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -10,8 +10,8 @@
     backend="aiohttp_server",
     GUIDELLM__MAX_CONCURRENCY=256,
     GUIDELLM__REQUEST_TIMEOUT=21600,
-    #target="http://localhost:8000/v1",
-    target="http://fed73cc1-us-east.lb.appdomain.cloud/v1",
+    target="http://localhost:8000/v1",
+    #target="http://fed73cc1-us-east.lb.appdomain.cloud/v1",
     data_type="emulated",
     max_seconds=30,
     data="prompt_tokens=512,generated_tokens=256",

From 81c62f7ee6c59d184dcf48c42473ea59ac8059eb Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 18:07:25 +0000
Subject: [PATCH 52/91] install editable guidellm

---
 src/automation/tasks/guidellm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index 2ca454c..e37b4e6 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -4,7 +4,8 @@
 import os
 
 DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
-GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main"
+#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main"
+GUIDELLM_PACKAGE = "-e git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm"
 
 class GuideLLMTask(BaseTask):
 

From 97e36cba71b28654a719e8e096c6c89d4cd948c0 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 18:30:38 +0000
Subject: [PATCH 53/91] print package list

---
 src/automation/tasks/guidellm.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index e37b4e6..f153003 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -39,14 +39,18 @@ def __init__(
 
         # Set packages, taking into account default packages
         # for the LMEvalTask and packages set in the config
+        print(self.guidellm_packages)
+        print(packages)
         if packages is not None:
             packages = list(set(packages + self.guidellm_packages))
         else:
             packages = self.guidellm_packages
 
+        print(packages)
         if "packages" in config_kwargs:
             packages = list(set(packages + config_kwargs.pop("packages")))
 
+        print(packages)
         # Initialize base parameters
         super().__init__(
             project_name=project_name,

From 063c8b971335f62c1d58ef489f7de6f3655000b1 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 18:40:39 +0000
Subject: [PATCH 54/91] added package print

---
 src/automation/tasks/base_task.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py
index 12ce3bc..1a605e0 100644
--- a/src/automation/tasks/base_task.py
+++ b/src/automation/tasks/base_task.py
@@ -25,6 +25,8 @@ def __init__(
         else:
             packages = self.base_packages
 
+        print(packages)
+
         self.project_name = project_name
         self.task_name = task_name
         self.docker_image = docker_image

From d6ef26605458668af477c1e0de7ed850df40c244 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Tue, 1 Jul 2025 19:09:18 +0000
Subject: [PATCH 55/91] older guidellm

---
 src/automation/tasks/guidellm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index f153003..fea7298 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -5,7 +5,8 @@
 
 DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
 #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main"
-GUIDELLM_PACKAGE = "-e git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm"
+GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@use-old-run"
+#GUIDELLM_PACKAGE = "-e git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm"
 
 class GuideLLMTask(BaseTask):
 

From 8c649103d1bd9c782bd23e32c5f0db3caab1dd9e Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 09:58:04 +0000
Subject: [PATCH 56/91] updated to use dev branch

---
 src/automation/tasks/guidellm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index fea7298..bf6d620 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -5,8 +5,8 @@
 
 DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
 #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main"
-GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@use-old-run"
-#GUIDELLM_PACKAGE = "-e git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm"
+#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@use-old-run"
+GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]"
 
 class GuideLLMTask(BaseTask):
 

From 7dee38bb400493cf046bd20866d1d078bdac5922 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 10:53:33 +0000
Subject: [PATCH 57/91] redo with custom branch

---
 src/automation/tasks/guidellm.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index bf6d620..4031794 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -4,9 +4,8 @@
 import os
 
 DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
-#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main"
-#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@use-old-run"
-GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]"
+GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm"
+#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]"
 
 class GuideLLMTask(BaseTask):
 

From 263c2ff9a95c1e11d149433b01472df381f2f114 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 11:09:19 +0000
Subject: [PATCH 58/91] repo override

---
 src/automation/tasks/guidellm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index 4031794..f01a1dc 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -4,7 +4,8 @@
 import os
 
 DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
-GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm"
+GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm"
+#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm"
 #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]"
 
 class GuideLLMTask(BaseTask):

From 90e461b61a16204e45509e4c47d46d7e49239c4b Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 11:13:06 +0000
Subject: [PATCH 59/91] add packages to guidellm

---
 src/automation/tasks/guidellm.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index f01a1dc..9bca651 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -11,6 +11,9 @@
 class GuideLLMTask(BaseTask):
 
     guidellm_packages = [
+        "build>=1.0.0",
+        "setuptools>=61.0",
+        "setuptools-git-versioning>=2.0,<3",
         "vllm",
         GUIDELLM_PACKAGE,
         "hf_xet",

From 4f00a5a03df2b76900e6c31b4c33854178624ef7 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 11:28:38 +0000
Subject: [PATCH 60/91] update setup.py

---
 setup.py                         | 3 +++
 src/automation/tasks/guidellm.py | 6 +++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/setup.py b/setup.py
index c2112da..6387290 100644
--- a/setup.py
+++ b/setup.py
@@ -17,6 +17,9 @@
         #"google-cloud-storage>=1.13.2",
         "datasets",
         "pyhocon",
+        "build>=1.0.0",
+        "setuptools>=61.0",
+        "setuptools-git-versioning>=2.0,<3",
     ],
     python_requires=">=3.7",
 )
diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index 9bca651..e695a49 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -11,9 +11,9 @@
 class GuideLLMTask(BaseTask):
 
     guidellm_packages = [
-        "build>=1.0.0",
-        "setuptools>=61.0",
-        "setuptools-git-versioning>=2.0,<3",
+        #"build>=1.0.0",
+        #"setuptools>=61.0",
+        #"setuptools-git-versioning>=2.0,<3",
         "vllm",
         GUIDELLM_PACKAGE,
         "hf_xet",

From 14f84ce53bc761c0ae75ef894a474d601d340405 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 11:51:47 +0000
Subject: [PATCH 61/91] readd

---
 src/automation/tasks/guidellm.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index e695a49..c86fb14 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -1,4 +1,5 @@
 from automation.tasks import BaseTask
+from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
 from automation.configs import DEFAULT_DOCKER_IMAGE, DEFAULT_RESEARCH_BRANCH
 from typing import Optional, Sequence
 import os
@@ -51,6 +52,8 @@ def __init__(
             packages = self.guidellm_packages
 
         print(packages)
+        print(get_builtin_scenarios())
+        default_scenario = get_builtin_scenarios()[0]
         if "packages" in config_kwargs:
             packages = list(set(packages + config_kwargs.pop("packages")))
 

From ad2b4237504240727981ce9ca5c05a70ed5a929d Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 11:58:14 +0000
Subject: [PATCH 62/91] before vllm

---
 src/automation/tasks/guidellm.py                | 6 +++---
 src/automation/tasks/scripts/guidellm_script.py | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index c86fb14..9a7eb06 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -1,5 +1,5 @@
 from automation.tasks import BaseTask
-from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
+#from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
 from automation.configs import DEFAULT_DOCKER_IMAGE, DEFAULT_RESEARCH_BRANCH
 from typing import Optional, Sequence
 import os
@@ -52,8 +52,8 @@ def __init__(
             packages = self.guidellm_packages
 
         print(packages)
-        print(get_builtin_scenarios())
-        default_scenario = get_builtin_scenarios()[0]
+        #print(get_builtin_scenarios())
+        #default_scenario = get_builtin_scenarios()[0]
         if "packages" in config_kwargs:
             packages = list(set(packages + config_kwargs.pop("packages")))
 
diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 641c44b..6c03f47 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -61,6 +61,9 @@ def clean_hocon_value(v):
     print(gpu_count)
     print(os.getcwd())
 
+    from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
+    print(get_builtin_scenarios())
+    default_scenario = get_builtin_scenarios()[0]
     # Start vLLM server
     server_process, server_initialized, server_log = start_vllm_server(
         vllm_args,

From 98eb6f8ec241b7c476ff1f5cdb99acef1da7c245 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 12:19:28 +0000
Subject: [PATCH 63/91] removed vllm

---
 src/automation/tasks/guidellm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index 9a7eb06..877f282 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -15,7 +15,7 @@ class GuideLLMTask(BaseTask):
         #"build>=1.0.0",
         #"setuptools>=61.0",
         #"setuptools-git-versioning>=2.0,<3",
-        "vllm",
+        #"vllm",
         GUIDELLM_PACKAGE,
         "hf_xet",
     ]

From 10874d3d7f4a4aeeadb77855a1746da5236fc3b0 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 12:22:50 +0000
Subject: [PATCH 64/91] remove vllm

---
 src/automation/tasks/scripts/guidellm_script.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 6c03f47..f522815 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -2,7 +2,7 @@
 import sys
 from clearml import Task
 from automation.utils import resolve_model_id, cast_args, kill_process_tree
-from automation.vllm import start_vllm_server
+#from automation.vllm import start_vllm_server
 from pyhocon import ConfigFactory
 
 def main():

From 629d195d972510019892e8ab91174c50b535347c Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 12:32:34 +0000
Subject: [PATCH 65/91] cleanup

---
 src/automation/tasks/guidellm.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index 877f282..2dc6336 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -1,5 +1,4 @@
 from automation.tasks import BaseTask
-#from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
 from automation.configs import DEFAULT_DOCKER_IMAGE, DEFAULT_RESEARCH_BRANCH
 from typing import Optional, Sequence
 import os
@@ -52,8 +51,6 @@ def __init__(
             packages = self.guidellm_packages
 
         print(packages)
-        #print(get_builtin_scenarios())
-        #default_scenario = get_builtin_scenarios()[0]
         if "packages" in config_kwargs:
             packages = list(set(packages + config_kwargs.pop("packages")))
 

From 768d13589592297b379f5f22f82cb9b3a8a15b77 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 14:48:45 +0000
Subject: [PATCH 66/91] back to base

---
 setup.py                                        |  3 ---
 src/automation/standards/benchmarking/chat.json | 13 +++++++++++++
 src/automation/tasks/guidellm.py                |  5 +++--
 src/automation/tasks/scripts/guidellm_script.py | 14 ++++++++------
 4 files changed, 24 insertions(+), 11 deletions(-)
 create mode 100644 src/automation/standards/benchmarking/chat.json

diff --git a/setup.py b/setup.py
index 6387290..c2112da 100644
--- a/setup.py
+++ b/setup.py
@@ -17,9 +17,6 @@
         #"google-cloud-storage>=1.13.2",
         "datasets",
         "pyhocon",
-        "build>=1.0.0",
-        "setuptools>=61.0",
-        "setuptools-git-versioning>=2.0,<3",
     ],
     python_requires=">=3.7",
 )
diff --git a/src/automation/standards/benchmarking/chat.json b/src/automation/standards/benchmarking/chat.json
new file mode 100644
index 0000000..024438c
--- /dev/null
+++ b/src/automation/standards/benchmarking/chat.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 512,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 1024,
+        "output_tokens": 256,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 1024
+    }
+}
diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index 2dc6336..6b6c1cb 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -4,7 +4,8 @@
 import os
 
 DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
-GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm"
+GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git"
+#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm"
 #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm"
 #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]"
 
@@ -14,7 +15,7 @@ class GuideLLMTask(BaseTask):
         #"build>=1.0.0",
         #"setuptools>=61.0",
         #"setuptools-git-versioning>=2.0,<3",
-        #"vllm",
+        "vllm",
         GUIDELLM_PACKAGE,
         "hf_xet",
     ]
diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index f522815..d50c441 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -61,9 +61,9 @@ def clean_hocon_value(v):
     print(gpu_count)
     print(os.getcwd())
 
-    from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
-    print(get_builtin_scenarios())
-    default_scenario = get_builtin_scenarios()[0]
+    from pathlib import Path
+    filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json"))
+    current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
     # Start vLLM server
     server_process, server_initialized, server_log = start_vllm_server(
         vllm_args,
@@ -105,10 +105,12 @@ def clean_hocon_value(v):
     print(f"The vllm path is: {vllm_path}")
 
 
-    print(get_builtin_scenarios())
-    default_scenario = get_builtin_scenarios()[0]
+    #default_scenario = get_builtin_scenarios()[0]
+    #current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args))
 
-    current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args))
+    from pathlib import Path
+    filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json"))
+    current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
 
     #import time 
     #time.sleep(300)

From 09c3978c73f3b9522149f03144b47316cb65b8ee Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 14:56:05 +0000
Subject: [PATCH 67/91] readd

---
 src/automation/tasks/scripts/guidellm_script.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index d50c441..d1b0c95 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -62,6 +62,7 @@ def clean_hocon_value(v):
     print(os.getcwd())
 
     from pathlib import Path
+    from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
     filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json"))
     current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
     # Start vLLM server

From e64fb1226cb90b776c854e87e93910236b3317d1 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 15:03:06 +0000
Subject: [PATCH 68/91] readd start vllm server

---
 src/automation/tasks/scripts/guidellm_script.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index d1b0c95..bd70a5f 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -2,7 +2,7 @@
 import sys
 from clearml import Task
 from automation.utils import resolve_model_id, cast_args, kill_process_tree
-#from automation.vllm import start_vllm_server
+from automation.vllm import start_vllm_server
 from pyhocon import ConfigFactory
 
 def main():

From 873c222cbca8f62527ab932a4e81af91d9d3a37b Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 15:12:42 +0000
Subject: [PATCH 69/91] use guidellm branch

---
 src/automation/tasks/guidellm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index 6b6c1cb..90827f3 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -4,9 +4,9 @@
 import os
 
 DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
-GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git"
+#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git"
+GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm"
 #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm"
-#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm"
 #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]"
 
 class GuideLLMTask(BaseTask):

From 16b83bc8542b10417da077b5867b63d83f9f9582 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 16:30:51 +0000
Subject: [PATCH 70/91] base complete

---
 examples/guidellm_example.py     | 4 +++-
 src/automation/tasks/guidellm.py | 4 ++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index 08e042b..cd9af23 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -14,7 +14,9 @@
     #target="http://fed73cc1-us-east.lb.appdomain.cloud/v1",
     data_type="emulated",
     max_seconds=30,
-    data="prompt_tokens=512,generated_tokens=256",
+    #data="{'prompt_tokens': 512, 'generated_tokens': 256, 'output_tokens' : 256}",
+    data="prompt_tokens=512,generated_tokens=256,output_tokens=256",
+    #data="prompt_tokens=512,generated_tokens=256",
     branch = "update_guidellm",
     #vllm_kwargs={"enable-chunked-prefill": True}
 )
diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index 90827f3..c0ff8f7 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -4,8 +4,8 @@
 import os
 
 DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
-#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git"
-GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm"
+GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git"
+#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm"
 #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm"
 #GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]"
 

From 432031ed3c6827aeaa88332b2daa4538f907203c Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 16:57:35 +0000
Subject: [PATCH 71/91] test rag

---
 examples/guidellm_example.py                    | 11 -----------
 src/automation/standards/benchmarking/rag.json  | 13 +++++++++++++
 src/automation/tasks/scripts/guidellm_script.py |  8 ++++----
 3 files changed, 17 insertions(+), 15 deletions(-)
 create mode 100644 src/automation/standards/benchmarking/rag.json

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index cd9af23..f539192 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -11,22 +11,11 @@
     GUIDELLM__MAX_CONCURRENCY=256,
     GUIDELLM__REQUEST_TIMEOUT=21600,
     target="http://localhost:8000/v1",
-    #target="http://fed73cc1-us-east.lb.appdomain.cloud/v1",
     data_type="emulated",
     max_seconds=30,
-    #data="{'prompt_tokens': 512, 'generated_tokens': 256, 'output_tokens' : 256}",
     data="prompt_tokens=512,generated_tokens=256,output_tokens=256",
-    #data="prompt_tokens=512,generated_tokens=256",
     branch = "update_guidellm",
     #vllm_kwargs={"enable-chunked-prefill": True}
 )
 
-#from clearml import Task
-#task = Task.init(project_name="alexandre_debug", task_name="test_guidellm_task")
 task.execute_remotely("remote-upgrade-default")
-#task.execute_locally()
-import os
-import sys
-executable_path = os.path.dirname(sys.executable)
-vllm_path = os.path.join(executable_path, "vllm")
-print(f"The vllm path is: {vllm_path}")
diff --git a/src/automation/standards/benchmarking/rag.json b/src/automation/standards/benchmarking/rag.json
new file mode 100644
index 0000000..c7ee2f2
--- /dev/null
+++ b/src/automation/standards/benchmarking/rag.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 4096,
+        "prompt_tokens_stdev": 512,
+        "prompt_tokens_min": 2048,
+        "prompt_tokens_max": 6144,
+        "output_tokens": 512,
+        "output_tokens_stdev": 128,
+        "output_tokens_min": 1,
+        "output_tokens_max": 1024
+    }
+}
diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index bd70a5f..64dbae7 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -63,7 +63,7 @@ def clean_hocon_value(v):
 
     from pathlib import Path
     from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
-    filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json"))
+    filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "rag.json"))
     current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
     # Start vLLM server
     server_process, server_initialized, server_log = start_vllm_server(
@@ -109,9 +109,9 @@ def clean_hocon_value(v):
     #default_scenario = get_builtin_scenarios()[0]
     #current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args))
 
-    from pathlib import Path
-    filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json"))
-    current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
+    #from pathlib import Path
+    #filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json"))
+    #current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
 
     #import time 
     #time.sleep(300)

From e9117eacfa4b07678a7e4ea229d54d408d07a246 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 17:51:29 +0000
Subject: [PATCH 72/91] clean up

---
 src/automation/tasks/base_task.py             |  3 +-
 src/automation/tasks/guidellm.py              |  6 ----
 .../tasks/scripts/guidellm_script.py          | 32 -------------------
 3 files changed, 2 insertions(+), 39 deletions(-)

diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py
index 1a605e0..2e29030 100644
--- a/src/automation/tasks/base_task.py
+++ b/src/automation/tasks/base_task.py
@@ -8,7 +8,7 @@
 class BaseTask():
 
     #base_packages = ["git+https://github.com/neuralmagic/research.git"]
-    base_packages = ["git+https://github.com/neuralmagic/research.git@update_guidellm"]
+    #base_packages = ["git+https://github.com/neuralmagic/research.git@update_guidellm"]
 
     def __init__(
         self,
@@ -19,6 +19,7 @@ def __init__(
         packages: Optional[Sequence[str]]=None,
         task_type: str="training",
     ):
+        base_packages = [f"git+https://github.com/neuralmagic/research.git@{branch}"]
         
         if packages is not None:
             packages = list(set(packages + self.base_packages))
diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index c0ff8f7..f0f7758 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -5,16 +5,10 @@
 
 DEFAULT_SERVER_WAIT_TIME = 600 # 600 seconds = 10 minutes
 GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git"
-#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm"
-#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@clearml-guidellm#egg=guidellm"
-#GUIDELLM_PACKAGE = "git+https://github.com/neuralmagic/guidellm.git@main#egg=guidellm[dev]"
 
 class GuideLLMTask(BaseTask):
 
     guidellm_packages = [
-        #"build>=1.0.0",
-        #"setuptools>=61.0",
-        #"setuptools-git-versioning>=2.0,<3",
         "vllm",
         GUIDELLM_PACKAGE,
         "hf_xet",
diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 64dbae7..525aab0 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -88,7 +88,6 @@ def clean_hocon_value(v):
     import json
     import asyncio
     from pathlib import Path
-    #from guidellm.benchmark import benchmark_generative_text
     from guidellm.benchmark.output import GenerativeBenchmarksReport
     from guidellm.benchmark.entrypoints import benchmark_generative_text, benchmark_with_scenario
     from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
@@ -100,41 +99,10 @@ def clean_hocon_value(v):
     print("[DEBUG] Calling benchmark_generative_text with:")
     print(json.dumps(guidellm_args, indent=2))
 
-    #GenerativeBenchmarksReport()
     executable_path = os.path.dirname(sys.executable)
     vllm_path = os.path.join(executable_path, "vllm")
     print(f"The vllm path is: {vllm_path}")
 
-
-    #default_scenario = get_builtin_scenarios()[0]
-    #current_scenario = GenerativeTextScenario.from_builtin(default_scenario, dict(guidellm_args))
-
-    #from pathlib import Path
-    #filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "chat.json"))
-    #current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
-
-    #import time 
-    #time.sleep(300)
-    """
-    current_scenario = GenerativeTextScenario
-    print(current_scenario.model_fields["target"])
-    print(current_scenario.model_fields["model"])
-    overlap_keys = current_scenario.model_fields.keys() & dict(guidellm_args)
-    #overlap_keys = ["model"]
-    for element  in overlap_keys:
-        #print(element)
-        element_field_info = current_scenario.model_fields[element]
-        element_field_info.default = guidellm_args[element]
-        current_scenario.model_fields[element] = element_field_info
-        #print(element_field_info.annotation)
-    print(overlap_keys)
-
-    print(current_scenario.model_fields["target"])
-    print(current_scenario.model_fields["model"])
-
-    current_scenario = GenerativeTextScenario
-    """
-
     try:
         asyncio.run(
             benchmark_with_scenario(

From 9984a8ccc2e7d0660aee888fb676fff62f147658 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 17:54:11 +0000
Subject: [PATCH 73/91] base package as variable

---
 src/automation/tasks/base_task.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py
index 2e29030..4f20e67 100644
--- a/src/automation/tasks/base_task.py
+++ b/src/automation/tasks/base_task.py
@@ -22,9 +22,9 @@ def __init__(
         base_packages = [f"git+https://github.com/neuralmagic/research.git@{branch}"]
         
         if packages is not None:
-            packages = list(set(packages + self.base_packages))
+            packages = list(set(packages + base_packages))
         else:
-            packages = self.base_packages
+            packages = base_packages
 
         print(packages)
 

From b8b51e9b8c4e00606346b5301fe38bab8d5845c4 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 18:04:07 +0000
Subject: [PATCH 74/91] test default branch change

---
 src/automation/configs.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/automation/configs.py b/src/automation/configs.py
index 5c4bf22..a3cca2a 100644
--- a/src/automation/configs.py
+++ b/src/automation/configs.py
@@ -2,4 +2,5 @@
 DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest"
 #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
 DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"
-DEFAULT_RESEARCH_BRANCH = "main"
+#DEFAULT_RESEARCH_BRANCH = "main"
+DEFAULT_RESEARCH_BRANCH = "update_guidellm"

From b99afec4b02bd991d5a3ec18dda16f1edad18aee Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 18:53:20 +0000
Subject: [PATCH 75/91] update branch names

---
 examples/guidellm_example.py      | 2 +-
 src/automation/tasks/base_task.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index f539192..dab5604 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -14,7 +14,7 @@
     data_type="emulated",
     max_seconds=30,
     data="prompt_tokens=512,generated_tokens=256,output_tokens=256",
-    branch = "update_guidellm",
+    #branch = "update_guidellm",
     #vllm_kwargs={"enable-chunked-prefill": True}
 )
 
diff --git a/src/automation/tasks/base_task.py b/src/automation/tasks/base_task.py
index 4f20e67..74fa1ba 100644
--- a/src/automation/tasks/base_task.py
+++ b/src/automation/tasks/base_task.py
@@ -19,7 +19,8 @@ def __init__(
         packages: Optional[Sequence[str]]=None,
         task_type: str="training",
     ):
-        base_packages = [f"git+https://github.com/neuralmagic/research.git@{branch}"]
+        branch_name = branch or DEFAULT_RESEARCH_BRANCH
+        base_packages = [f"git+https://github.com/neuralmagic/research.git@{branch_name}"]
         
         if packages is not None:
             packages = list(set(packages + base_packages))

From b2c29184aedb5a9459ac7ef94b58ef611556ed66 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 19:03:56 +0000
Subject: [PATCH 76/91] use main branch in config

---
 src/automation/configs.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/automation/configs.py b/src/automation/configs.py
index a3cca2a..5c4bf22 100644
--- a/src/automation/configs.py
+++ b/src/automation/configs.py
@@ -2,5 +2,4 @@
 DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest"
 #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
 DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"
-#DEFAULT_RESEARCH_BRANCH = "main"
-DEFAULT_RESEARCH_BRANCH = "update_guidellm"
+DEFAULT_RESEARCH_BRANCH = "main"

From d1e686b74ed5ab6573d8e9a43f65bc7235c25e32 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 19:33:45 +0000
Subject: [PATCH 77/91] print the scenario

---
 src/automation/tasks/scripts/guidellm_script.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 525aab0..d49bb38 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -65,6 +65,7 @@ def clean_hocon_value(v):
     from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
     filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "rag.json"))
     current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
+    print(current_scenario.model_fields)
     # Start vLLM server
     server_process, server_initialized, server_log = start_vllm_server(
         vllm_args,

From 5d3e3ff29eadf4594826d5218bb8b0b19713a22b Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Wed, 2 Jul 2025 19:37:40 +0000
Subject: [PATCH 78/91] modify tokens

---
 examples/guidellm_example.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index dab5604..2a4d536 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -13,8 +13,9 @@
     target="http://localhost:8000/v1",
     data_type="emulated",
     max_seconds=30,
-    data="prompt_tokens=512,generated_tokens=256,output_tokens=256",
-    #branch = "update_guidellm",
+    #data="prompt_tokens=512,generated_tokens=256,output_tokens=256",
+    data="prompt_tokens=128,generated_tokens=128,output_tokens=128",
+    branch = "update_guidellm",
     #vllm_kwargs={"enable-chunked-prefill": True}
 )
 

From 3b0d86c0655adc3f3a838592b5e57064d1e5f7ca Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Thu, 3 Jul 2025 09:29:24 +0000
Subject: [PATCH 79/91] revert lmeval and setup.py, update vllm server log

---
 examples/lmeval_example.py | 4 +---
 setup.py                   | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/examples/lmeval_example.py b/examples/lmeval_example.py
index 7c8ee1e..fd07a7b 100644
--- a/examples/lmeval_example.py
+++ b/examples/lmeval_example.py
@@ -3,13 +3,11 @@
 task = LMEvalTask(
     project_name="alexandre_debug",
     task_name="test_lmeval_task",
-    branch = "update_guidellm",
     model_id="meta-llama/Llama-3.2-1B-Instruct",
     tasks="gsm8k",
     model_args="dtype=auto,max_model_len=8192",
     batch_size="auto",    
 )
 
-#task.execute_remotely("oneshot-a100x1")
-task.execute_remotely("remote-upgrade-default")
+task.execute_remotely("oneshot-a100x1")
 #task.execute_locally()
diff --git a/setup.py b/setup.py
index c2112da..755f2ea 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@
     ),
     install_requires=[
         "clearml==1.14.4",
-        #"google-cloud-storage>=1.13.2",
+        "google-cloud-storage>=1.13.2",
         "datasets",
         "pyhocon",
     ],

From a2d6eb5749019fb9eb865af021814168c600fc39 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Thu, 3 Jul 2025 09:54:02 +0000
Subject: [PATCH 80/91] readd default scenarios

---
 examples/guidellm_example.py                    | 6 +++---
 setup.py                                        | 2 +-
 src/automation/configs.py                       | 1 +
 src/automation/tasks/scripts/guidellm_script.py | 8 ++++++--
 src/automation/vllm/server.py                   | 7 +++++--
 5 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index 2a4d536..c5d5df4 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -13,10 +13,10 @@
     target="http://localhost:8000/v1",
     data_type="emulated",
     max_seconds=30,
-    #data="prompt_tokens=512,generated_tokens=256,output_tokens=256",
-    data="prompt_tokens=128,generated_tokens=128,output_tokens=128",
+    #config = "benchmarking_32k",
+    data="prompt_tokens=128,output_tokens=128",
     branch = "update_guidellm",
-    #vllm_kwargs={"enable-chunked-prefill": True}
+    vllm_kwargs={"enable-chunked-prefill": True}
 )
 
 task.execute_remotely("remote-upgrade-default")
diff --git a/setup.py b/setup.py
index 755f2ea..9a0a63c 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
     version="0.1.0",
     author="NM MLR",
     description="Automation tools",
-    #url="https://github.com/neuralmagic/research",
+    url="https://github.com/neuralmagic/research",
     package_dir={"": "src"},
     packages=find_packages(
         "src", include=["automation", "automation.*"], exclude=["*.__pycache__.*"]
diff --git a/src/automation/configs.py b/src/automation/configs.py
index 5c4bf22..dc67b98 100644
--- a/src/automation/configs.py
+++ b/src/automation/configs.py
@@ -3,3 +3,4 @@
 #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
 DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"
 DEFAULT_RESEARCH_BRANCH = "main"
+DEFAULT_GUIDELLM_SCENARIO = "rag"
diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index d49bb38..e45fd33 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -4,6 +4,7 @@
 from automation.utils import resolve_model_id, cast_args, kill_process_tree
 from automation.vllm import start_vllm_server
 from pyhocon import ConfigFactory
+from automation.configs import DEFAULT_GUIDELLM_SCENARIO
 
 def main():
     task = Task.current_task()
@@ -63,8 +64,11 @@ def clean_hocon_value(v):
 
     from pathlib import Path
     from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
-    filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", "rag.json"))
-    current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
+    if len(get_builtin_scenarios()) > 0:
+        current_scenario = GenerativeTextScenario.from_builtin(DEFAULT_GUIDELLM_SCENARIO, dict(guidellm_args))
+    else:
+        filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{DEFAULT_GUIDELLM_SCENARIO}.json"))
+        current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
     print(current_scenario.model_fields)
     # Start vLLM server
     server_process, server_initialized, server_log = start_vllm_server(
diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py
index 948c361..011ab1a 100644
--- a/src/automation/vllm/server.py
+++ b/src/automation/vllm/server.py
@@ -55,12 +55,15 @@ def start_vllm_server(
             subprocess_env[k] = str(v)
         else:
             if v == True or v == "True":
-                v = "true"
-            server_command.extend([f"--{k}", str(v)])
+                server_command.append(f"--{k}")
+            else:
+                server_command.extend([f"--{k}", str(v)])
+
 
     server_log_file_name = f"{SERVER_LOG_PREFIX}_{task.id}.txt"
     server_log_file = open(server_log_file_name, "w")
     print("Server command:", " ".join(server_command))
+    print(f"VLLM logs are located at: {server_log_file} in {os.getcwd()}")
     server_process = subprocess.Popen(server_command, stdout=server_log_file, stderr=server_log_file, shell=False, env=subprocess_env)
 
     delay = 5

From 81f519990163caabf8daa6bad13ba4cb7b7e6175 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Thu, 3 Jul 2025 10:33:10 +0000
Subject: [PATCH 81/91] change default guidellm json

---
 src/automation/configs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/automation/configs.py b/src/automation/configs.py
index dc67b98..0ba89f3 100644
--- a/src/automation/configs.py
+++ b/src/automation/configs.py
@@ -3,4 +3,4 @@
 #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
 DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"
 DEFAULT_RESEARCH_BRANCH = "main"
-DEFAULT_GUIDELLM_SCENARIO = "rag"
+DEFAULT_GUIDELLM_SCENARIO = "chat"

From 155033397f77ec0795e19c0a4d0d29182b591893 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Thu, 3 Jul 2025 10:56:06 +0000
Subject: [PATCH 82/91] add config examples json

---
 src/automation/configs.py                           |  3 ++-
 .../standards/benchmarking/benchmarking_128k.json   | 13 +++++++++++++
 .../standards/benchmarking/benchmarking_16k.json    | 13 +++++++++++++
 .../standards/benchmarking/benchmarking_32k.json    | 13 +++++++++++++
 .../standards/benchmarking/benchmarking_64k.json    | 13 +++++++++++++
 .../standards/benchmarking/benchmarking_chat.json   | 13 +++++++++++++
 .../benchmarking/benchmarking_code_completion.json  | 13 +++++++++++++
 .../benchmarking/benchmarking_code_fixing.json      | 13 +++++++++++++
 .../benchmarking_docstring_generation.json          | 13 +++++++++++++
 .../benchmarking/benchmarking_instruction.json      | 13 +++++++++++++
 .../benchmarking/benchmarking_long_rag.json         | 13 +++++++++++++
 .../standards/benchmarking/benchmarking_rag.json    | 13 +++++++++++++
 .../benchmarking/benchmarking_summarization.json    | 13 +++++++++++++
 13 files changed, 158 insertions(+), 1 deletion(-)
 create mode 100644 src/automation/standards/benchmarking/benchmarking_128k.json
 create mode 100644 src/automation/standards/benchmarking/benchmarking_16k.json
 create mode 100644 src/automation/standards/benchmarking/benchmarking_32k.json
 create mode 100644 src/automation/standards/benchmarking/benchmarking_64k.json
 create mode 100644 src/automation/standards/benchmarking/benchmarking_chat.json
 create mode 100644 src/automation/standards/benchmarking/benchmarking_code_completion.json
 create mode 100644 src/automation/standards/benchmarking/benchmarking_code_fixing.json
 create mode 100644 src/automation/standards/benchmarking/benchmarking_docstring_generation.json
 create mode 100644 src/automation/standards/benchmarking/benchmarking_instruction.json
 create mode 100644 src/automation/standards/benchmarking/benchmarking_long_rag.json
 create mode 100644 src/automation/standards/benchmarking/benchmarking_rag.json
 create mode 100644 src/automation/standards/benchmarking/benchmarking_summarization.json

diff --git a/src/automation/configs.py b/src/automation/configs.py
index 0ba89f3..75e0483 100644
--- a/src/automation/configs.py
+++ b/src/automation/configs.py
@@ -3,4 +3,5 @@
 #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
 DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"
 DEFAULT_RESEARCH_BRANCH = "main"
-DEFAULT_GUIDELLM_SCENARIO = "chat"
+#DEFAULT_GUIDELLM_SCENARIO = "chat"
+DEFAULT_GUIDELLM_SCENARIO = "benchmarking_summarization"
diff --git a/src/automation/standards/benchmarking/benchmarking_128k.json b/src/automation/standards/benchmarking/benchmarking_128k.json
new file mode 100644
index 0000000..13b8105
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_128k.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 128000,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 128000,
+        "output_tokens": 2048,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 2048
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_16k.json b/src/automation/standards/benchmarking/benchmarking_16k.json
new file mode 100644
index 0000000..f927a4a
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_16k.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 16000,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 16000,
+        "output_tokens": 2048,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 2048
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_32k.json b/src/automation/standards/benchmarking/benchmarking_32k.json
new file mode 100644
index 0000000..6543fd7
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_32k.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 32000,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 32000,
+        "output_tokens": 2048,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 2048
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_64k.json b/src/automation/standards/benchmarking/benchmarking_64k.json
new file mode 100644
index 0000000..871b210
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_64k.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 64000,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 64000,
+        "output_tokens": 2048,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 2048
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_chat.json b/src/automation/standards/benchmarking/benchmarking_chat.json
new file mode 100644
index 0000000..f4d0548
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_chat.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 512,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 512,
+        "output_tokens": 256,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 256
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_code_completion.json b/src/automation/standards/benchmarking/benchmarking_code_completion.json
new file mode 100644
index 0000000..6be35df
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_code_completion.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 256,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 256,
+        "output_tokens": 1024,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 1024
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_code_fixing.json b/src/automation/standards/benchmarking/benchmarking_code_fixing.json
new file mode 100644
index 0000000..bceff14
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_code_fixing.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 1024,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 1024,
+        "output_tokens": 1024,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 1024
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_docstring_generation.json b/src/automation/standards/benchmarking/benchmarking_docstring_generation.json
new file mode 100644
index 0000000..0eda212
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_docstring_generation.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 768,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 768,
+        "output_tokens": 128,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 128
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_instruction.json b/src/automation/standards/benchmarking/benchmarking_instruction.json
new file mode 100644
index 0000000..0fac491
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_instruction.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 256,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 256,
+        "output_tokens": 128,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 128
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_long_rag.json b/src/automation/standards/benchmarking/benchmarking_long_rag.json
new file mode 100644
index 0000000..4fe719a
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_long_rag.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 10240,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 10240,
+        "output_tokens": 1536,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 1536
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_rag.json b/src/automation/standards/benchmarking/benchmarking_rag.json
new file mode 100644
index 0000000..9525b09
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_rag.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 1024,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 1024,
+        "output_tokens": 128,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 128
+    }
+}
diff --git a/src/automation/standards/benchmarking/benchmarking_summarization.json b/src/automation/standards/benchmarking/benchmarking_summarization.json
new file mode 100644
index 0000000..9525b09
--- /dev/null
+++ b/src/automation/standards/benchmarking/benchmarking_summarization.json
@@ -0,0 +1,13 @@
+{
+    "rate_type": "sweep",
+    "data": {
+        "prompt_tokens": 1024,
+        "prompt_tokens_stdev": 128,
+        "prompt_tokens_min": 1,
+        "prompt_tokens_max": 1024,
+        "output_tokens": 128,
+        "output_tokens_stdev": 64,
+        "output_tokens_min": 1,
+        "output_tokens_max": 128
+    }
+}

From 420137d174bd72aeddc2aa556735f0df47641636 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Thu, 3 Jul 2025 11:16:19 +0000
Subject: [PATCH 83/91] use original default

---
 examples/guidellm_example.py | 4 ++--
 src/automation/configs.py    | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index c5d5df4..0ebc151 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -11,10 +11,10 @@
     GUIDELLM__MAX_CONCURRENCY=256,
     GUIDELLM__REQUEST_TIMEOUT=21600,
     target="http://localhost:8000/v1",
-    data_type="emulated",
+    #data_type="emulated",
     max_seconds=30,
     #config = "benchmarking_32k",
-    data="prompt_tokens=128,output_tokens=128",
+    #data="prompt_tokens=128,output_tokens=128",
     branch = "update_guidellm",
     vllm_kwargs={"enable-chunked-prefill": True}
 )
diff --git a/src/automation/configs.py b/src/automation/configs.py
index 75e0483..094f478 100644
--- a/src/automation/configs.py
+++ b/src/automation/configs.py
@@ -3,5 +3,5 @@
 #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
 DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"
 DEFAULT_RESEARCH_BRANCH = "main"
-#DEFAULT_GUIDELLM_SCENARIO = "chat"
-DEFAULT_GUIDELLM_SCENARIO = "benchmarking_summarization"
+DEFAULT_GUIDELLM_SCENARIO = "chat"
+#DEFAULT_GUIDELLM_SCENARIO = "benchmarking_summarization"

From 9d284c978ec16ac1f16cce4c8cb1eb18563dbfe2 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Thu, 3 Jul 2025 11:22:01 +0000
Subject: [PATCH 84/91] add log

---
 src/automation/tasks/scripts/guidellm_script.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index e45fd33..07905e3 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -64,6 +64,7 @@ def clean_hocon_value(v):
 
     from pathlib import Path
     from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
+    print(get_builtin_scenarios())
     if len(get_builtin_scenarios()) > 0:
         current_scenario = GenerativeTextScenario.from_builtin(DEFAULT_GUIDELLM_SCENARIO, dict(guidellm_args))
     else:

From e863516b9d86a4572893bc2a580a1583011ad8ea Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Thu, 3 Jul 2025 12:02:24 +0000
Subject: [PATCH 85/91] include user scenario

---
 src/automation/tasks/scripts/guidellm_script.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index 07905e3..efa91fb 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -64,9 +64,12 @@ def clean_hocon_value(v):
 
     from pathlib import Path
     from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
-    print(get_builtin_scenarios())
-    if len(get_builtin_scenarios()) > 0:
-        current_scenario = GenerativeTextScenario.from_builtin(DEFAULT_GUIDELLM_SCENARIO, dict(guidellm_args))
+    user_scenario = guidellm_args.get("scenario", "")
+    if user_scenario: 
+        filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{user_scenario}.json"))
+        current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
+    #elif len(get_builtin_scenarios()) > 0:
+    #    current_scenario = GenerativeTextScenario.from_builtin(get_builtin_scenarios()[0], dict(guidellm_args))
     else:
         filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{DEFAULT_GUIDELLM_SCENARIO}.json"))
         current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))

From 3703e62d0b2ab0d25e5f5fdf1902dd49013768be Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Thu, 3 Jul 2025 12:14:37 +0000
Subject: [PATCH 86/91] revert lmeval example

---
 examples/lmeval_example.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/lmeval_example.py b/examples/lmeval_example.py
index fd07a7b..688c355 100644
--- a/examples/lmeval_example.py
+++ b/examples/lmeval_example.py
@@ -6,7 +6,7 @@
     model_id="meta-llama/Llama-3.2-1B-Instruct",
     tasks="gsm8k",
     model_args="dtype=auto,max_model_len=8192",
-    batch_size="auto",    
+    batch_size="auto",
 )
 
 task.execute_remotely("oneshot-a100x1")

From d1b985ac627f5a2f1979679b1486536ceca27b71 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Thu, 3 Jul 2025 12:25:15 +0000
Subject: [PATCH 87/91] add file error handling

---
 .../tasks/scripts/guidellm_script.py          | 39 ++++++++-----------
 src/automation/vllm/server.py                 |  6 ---
 2 files changed, 17 insertions(+), 28 deletions(-)

diff --git a/src/automation/tasks/scripts/guidellm_script.py b/src/automation/tasks/scripts/guidellm_script.py
index efa91fb..35269a9 100644
--- a/src/automation/tasks/scripts/guidellm_script.py
+++ b/src/automation/tasks/scripts/guidellm_script.py
@@ -55,25 +55,6 @@ def clean_hocon_value(v):
 
     gpu_count = int(guidellm_args.get("gpu_count", 1)) 
 
-    print(vllm_args)
-    print(model_id)
-    print(guidellm_args["target"])
-    print(args["Args"]["server_wait_time"])
-    print(gpu_count)
-    print(os.getcwd())
-
-    from pathlib import Path
-    from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
-    user_scenario = guidellm_args.get("scenario", "")
-    if user_scenario: 
-        filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{user_scenario}.json"))
-        current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
-    #elif len(get_builtin_scenarios()) > 0:
-    #    current_scenario = GenerativeTextScenario.from_builtin(get_builtin_scenarios()[0], dict(guidellm_args))
-    else:
-        filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{DEFAULT_GUIDELLM_SCENARIO}.json"))
-        current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
-    print(current_scenario.model_fields)
     # Start vLLM server
     server_process, server_initialized, server_log = start_vllm_server(
         vllm_args,
@@ -97,15 +78,29 @@ def clean_hocon_value(v):
     import json
     import asyncio
     from pathlib import Path
-    from guidellm.benchmark.output import GenerativeBenchmarksReport
-    from guidellm.benchmark.entrypoints import benchmark_generative_text, benchmark_with_scenario
+    from guidellm.benchmark.entrypoints import benchmark_with_scenario
     from guidellm.benchmark.scenario import GenerativeTextScenario, get_builtin_scenarios
 
+    user_scenario = guidellm_args.get("scenario", "")
+    if user_scenario:
+        filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{user_scenario}.json"))
+        if os.path.exists(filepath):
+            current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
+        else:
+            raise ValueError(f"Scenario path {filepath} does not exist")
+    #elif len(get_builtin_scenarios()) > 0:
+    #    to be used when get_builtin_scenarios() bug is fiexed
+    #    current_scenario = GenerativeTextScenario.from_builtin(get_builtin_scenarios()[0], dict(guidellm_args))
+    else:
+        filepath = Path(os.path.join(".", "src", "automation", "standards", "benchmarking", f"{DEFAULT_GUIDELLM_SCENARIO}.json"))
+        current_scenario = GenerativeTextScenario.from_file(filepath, dict(guidellm_args))
+    print(current_scenario.model_fields)
+
     # Ensure output_path is set and consistent
     output_path = Path(guidellm_args.get("output_path", "guidellm-output.json"))
     guidellm_args["output_path"] = str(output_path)
 
-    print("[DEBUG] Calling benchmark_generative_text with:")
+    print("[DEBUG] Calling benchmark_with_scenario with:")
     print(json.dumps(guidellm_args, indent=2))
 
     executable_path = os.path.dirname(sys.executable)
diff --git a/src/automation/vllm/server.py b/src/automation/vllm/server.py
index 011ab1a..2e7d321 100644
--- a/src/automation/vllm/server.py
+++ b/src/automation/vllm/server.py
@@ -32,12 +32,6 @@ def start_vllm_server(
 
     parsed_target = urlparse(target)
     print(f"vllm path is: {vllm_path}")
-    """
-    server_command = [
-        f"{vllm_path}", "serve", 
-        "Qwen/Qwen2.5-1.5B-Instruct",
-    ]
-    """
 
     server_command = [
         f"{vllm_path}", "serve", 

From e60aab1acb640b5e9bccc2908c2d3be190bb43e0 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Thu, 3 Jul 2025 12:28:24 +0000
Subject: [PATCH 88/91] removed package prints

---
 src/automation/tasks/guidellm.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/automation/tasks/guidellm.py b/src/automation/tasks/guidellm.py
index f0f7758..a85eb83 100644
--- a/src/automation/tasks/guidellm.py
+++ b/src/automation/tasks/guidellm.py
@@ -38,18 +38,14 @@ def __init__(
 
         # Set packages, taking into account default packages
         # for the LMEvalTask and packages set in the config
-        print(self.guidellm_packages)
-        print(packages)
         if packages is not None:
             packages = list(set(packages + self.guidellm_packages))
         else:
             packages = self.guidellm_packages
 
-        print(packages)
         if "packages" in config_kwargs:
             packages = list(set(packages + config_kwargs.pop("packages")))
 
-        print(packages)
         # Initialize base parameters
         super().__init__(
             project_name=project_name,

From 515a1dbe60a15902bc3ae3406dbdcb76f6f3eeb3 Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Thu, 3 Jul 2025 13:16:19 +0000
Subject: [PATCH 89/91] default config

---
 examples/guidellm_example.py | 5 ++---
 src/automation/configs.py    | 2 --
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index 0ebc151..ea0c932 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -11,10 +11,9 @@
     GUIDELLM__MAX_CONCURRENCY=256,
     GUIDELLM__REQUEST_TIMEOUT=21600,
     target="http://localhost:8000/v1",
-    #data_type="emulated",
     max_seconds=30,
-    #config = "benchmarking_32k",
-    #data="prompt_tokens=128,output_tokens=128",
+    scenario = "benchmarking_32kz",
+    data="prompt_tokens=128,output_tokens=128",
     branch = "update_guidellm",
     vllm_kwargs={"enable-chunked-prefill": True}
 )
diff --git a/src/automation/configs.py b/src/automation/configs.py
index 094f478..96087ab 100644
--- a/src/automation/configs.py
+++ b/src/automation/configs.py
@@ -1,7 +1,5 @@
-#DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_5:latest"
 DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest"
 #DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
 DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"
 DEFAULT_RESEARCH_BRANCH = "main"
 DEFAULT_GUIDELLM_SCENARIO = "chat"
-#DEFAULT_GUIDELLM_SCENARIO = "benchmarking_summarization"

From ac9ef63ffb42eea42ccf76ba7cebddad7bebd63f Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Thu, 3 Jul 2025 13:33:35 +0000
Subject: [PATCH 90/91] readd output path

---
 src/automation/configs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/automation/configs.py b/src/automation/configs.py
index 96087ab..0bb90f6 100644
--- a/src/automation/configs.py
+++ b/src/automation/configs.py
@@ -1,5 +1,5 @@
 DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest"
-#DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
-DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"
+DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
+#DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"
 DEFAULT_RESEARCH_BRANCH = "main"
 DEFAULT_GUIDELLM_SCENARIO = "chat"

From 69638eae4c4ff224d06cf26a513c0fb5951e394f Mon Sep 17 00:00:00 2001
From: chibu <>
Date: Thu, 3 Jul 2025 14:51:21 +0000
Subject: [PATCH 91/91] onpremise settings

---
 examples/guidellm_example.py | 9 ++++-----
 src/automation/configs.py    | 1 -
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/examples/guidellm_example.py b/examples/guidellm_example.py
index ea0c932..fe2e297 100644
--- a/examples/guidellm_example.py
+++ b/examples/guidellm_example.py
@@ -1,21 +1,20 @@
-
 from automation.tasks import GuideLLMTask
 
 task = GuideLLMTask(
     project_name="alexandre_debug",
     task_name="test_guidellm_task",
-    #model="meta-llama/Llama-3.2-1B-Instruct",
-    model="Qwen/Qwen2.5-1.5B-Instruct",
+    model="meta-llama/Llama-3.2-1B-Instruct",
     rate_type="throughput",
     backend="aiohttp_server",
     GUIDELLM__MAX_CONCURRENCY=256,
     GUIDELLM__REQUEST_TIMEOUT=21600,
     target="http://localhost:8000/v1",
     max_seconds=30,
-    scenario = "benchmarking_32kz",
+    #scenario = "benchmarking_32k",
     data="prompt_tokens=128,output_tokens=128",
     branch = "update_guidellm",
     vllm_kwargs={"enable-chunked-prefill": True}
 )
 
-task.execute_remotely("remote-upgrade-default")
+task.execute_remotely("oneshot-a100x1")
+#task.execute_locally()
diff --git a/src/automation/configs.py b/src/automation/configs.py
index 0bb90f6..10aa396 100644
--- a/src/automation/configs.py
+++ b/src/automation/configs.py
@@ -1,5 +1,4 @@
 DEFAULT_DOCKER_IMAGE = "498127099666.dkr.ecr.us-east-1.amazonaws.com/mlops/k8s-research-cuda12_8:latest"
 DEFAULT_OUTPUT_URI = "gs://neuralmagic-clearml"
-#DEFAULT_OUTPUT_URI = "http://10.128.20.60:8081"
 DEFAULT_RESEARCH_BRANCH = "main"
 DEFAULT_GUIDELLM_SCENARIO = "chat"