changed file names to indiciate the workload, addressed comments on the PR for offline inference

ssraghavan-oci · ssraghavan-oci · commit e701cc46b067 · 2025-04-24T10:05:14.000-04:00
diff --git a/docs/sample_blueprints/offline-inference-infra/README.md b/docs/sample_blueprints/offline-inference-infra/README.md
@@ -66,20 +66,20 @@ Notes : Make sure your output object storage is in the same tenancy as your stac
         "mount_location": "/models",
         "volume_size_in_gbs": 500,
         "include": [
-          "new_example_sglang.yaml",
+          "offline_sglang_example.yaml",
           "NousResearch/Meta-Llama-3.1-8B"
         ]
       }
     ],
     "output_object_storage": [
       {
         "bucket_name": "inference_output",
-        "mount_location": "/mlcommons_output",
+        "mount_location": "/benchmarking_output",
         "volume_size_in_gbs": 200
       }
     ],
     "recipe_container_command_args": [
-      "/models/new_example_sglang.yaml"
+      "/models/offline_sglang_example.yaml"
     ],
     "recipe_replica_count": 1,
     "recipe_container_port": "8000",
@@ -93,7 +93,7 @@ Notes : Make sure your output object storage is in the same tenancy as your stac
 ```
 
 ---
-### [Sample Blueprint (Job Mode for Offline vLLM Inference)](dhttps://github.com/oracle-quickstart/oci-ai-blueprints/blob/offline-inference-benchmark/docs/sample_blueprints/offline-inference-infra/offline_deployment_sglang.json)
+### [Sample Blueprint (Job Mode for Offline vLLM Inference)](dhttps://github.com/oracle-quickstart/oci-ai-blueprints/blob/offline-inference-benchmark/docs/sample_blueprints/offline-inference-infra/offline_deployment_vllm.json)
 
 ```json
 {
@@ -116,7 +116,7 @@ Notes : Make sure your output object storage is in the same tenancy as your stac
     "output_object_storage": [
       {
         "bucket_name": "inference_output",
-        "mount_location": "/mlcommons_output",
+        "mount_location": "/benchmarking_output",
         "volume_size_in_gbs": 200
       }
     ],
@@ -161,7 +161,7 @@ experiment_name: "sglang-bench-doc-test-new"
 run_name: "llama3-8b-sglang-test"
 
 
-save_metrics_path: /mlcommons_output/benchmark_output_llama3_sglang.json
+save_metrics_path: /benchmarking_output/benchmark_output_llama3_sglang.json
 
 ```
 ## [Sample Config File - 2 vLLM (`offline_vllm_example.yaml`)](https://github.com/oracle-quickstart/oci-ai-blueprints/blob/offline-inference-benchmark/docs/sample_blueprints/offline-inference-infra/offline_vllm_example.yaml)
@@ -194,7 +194,7 @@ distributed_executor_backend: mp
 mlflow_uri: http://mlflow-benchmarking.corrino-oci.com:5000
 experiment_name: test-bm-suite-doc
 run_name: llama3-vllm-test
-save_metrics_path:  /mlcommons_output/benchmark_output_llama3_vllm.json
+save_metrics_path:  /benchmarking_output/benchmark_output_llama3_vllm.json
 
 ```
 
diff --git a/docs/sample_blueprints/offline-inference-infra/offline_sglang_example.yaml b/docs/sample_blueprints/offline-inference-infra/offline_sglang_example.yaml
@@ -20,5 +20,5 @@ experiment_name: "sglang-bench-doc-test-new"
 run_name: "llama3-8b-sglang-test"
 
 
-save_metrics_path: /mlcommons_output/benchmark_output_llama3_sglang.json
+save_metrics_path: /benchmarking_output/benchmark_output_llama3_sglang.json
 
diff --git a/docs/sample_blueprints/online-inference-infra/llama3_public_online.yaml b/docs/sample_blueprints/online-inference-infra/llama3_public_online.yaml
@@ -0,0 +1,17 @@
+benchmark_type: online
+model: /models/NousResearch/Meta-Llama-3.1-8B-Instruct  # Updated model path
+input_len: 64
+output_len: 32
+max_requests: 5
+timeout: 300
+num_concurrent: 1
+results_dir: /online_output
+llm_api: openai
+llm_api_key: dummy-key
+llm_api_base: https://llama8bobjvllm.129-80-16-111.nip.io/v1  # Updated to HTTPS
+experiment_name: local-bench
+run_name: llama3-test
+mlflow_uri: http://mlflow-benchmarking.corrino-oci.com:5000
+llmperf_path: /opt/llmperf-src
+metadata: test=public-endpoint
+save_metrics_path: /online_output/benchmark_output_llama3_online_public.json
diff --git a/docs/sample_blueprints/online-inference-infra/online_deployment.json b/docs/sample_blueprints/online-inference-infra/online_deployment.json
@@ -0,0 +1,35 @@
+{
+    "recipe_id": "online_infernece_llmperf",
+    "recipe_mode": "job",
+    "deployment_name": "a1",
+    "recipe_image_uri": "iad.ocir.io/iduyx1qnmway/corrino-devops-repository:llm-benchmark-0409-v4",
+    "recipe_node_shape": "VM.Standard.E4.Flex",
+    "recipe_node_pool_size": 1,
+    "recipe_flex_shape_ocpu_count": 32,
+    "recipe_flex_shape_memory_size_in_gbs": 256,
+    "recipe_node_boot_volume_size_in_gbs": 200,
+    "recipe_ephemeral_storage_size": 150,
+    "input_object_storage": [
+      {
+        "par": "https://objectstorage.ap-melbourne-1.oraclecloud.com/p/0T99iRADcM08aVpumM6smqMIcnIJTFtV2D8ZIIWidUP9eL8GSRyDMxOb9Va9rmRc/n/iduyx1qnmway/b/mymodels/o/",
+        "mount_location": "/models",
+        "volume_size_in_gbs": 500,
+        "include": [
+          "llama3_public_online.yaml"
+        ]
+      }
+    ],
+    "output_object_storage": [
+      {
+        "bucket_name": "inference_output",
+        "mount_location": "/online_output",
+        "volume_size_in_gbs": 200
+      }
+    ],
+    "recipe_container_command_args": [
+      "/models/llama3_public_online.yaml"
+    ],
+    "recipe_replica_count": 1,
+    "recipe_container_port": "5678"
+  }
+  
diff --git a/docs/sample_blueprints/online-inference-infra/online_example.yaml b/docs/sample_blueprints/online-inference-infra/online_example.yaml

Original file line number	Diff line number	Diff line change
`@@ -20,5 +20,5 @@ experiment_name: "sglang-bench-doc-test-new"`
`20`	`20`	`run_name: "llama3-8b-sglang-test"`
`21`	`21`
`22`	`22`
`23`		`-save_metrics_path: /mlcommons_output/benchmark_output_llama3_sglang.json`
	`23`	`+save_metrics_path: /benchmarking_output/benchmark_output_llama3_sglang.json`
`24`	`24`