Skip to content

Commit e701cc4

Browse files
changed file names to indiciate the workload, addressed comments on the PR for offline inference
1 parent 13e490c commit e701cc4

File tree

5 files changed

+60
-24
lines changed

5 files changed

+60
-24
lines changed

docs/sample_blueprints/offline-inference-infra/README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,20 +66,20 @@ Notes : Make sure your output object storage is in the same tenancy as your stac
6666
"mount_location": "/models",
6767
"volume_size_in_gbs": 500,
6868
"include": [
69-
"new_example_sglang.yaml",
69+
"offline_sglang_example.yaml",
7070
"NousResearch/Meta-Llama-3.1-8B"
7171
]
7272
}
7373
],
7474
"output_object_storage": [
7575
{
7676
"bucket_name": "inference_output",
77-
"mount_location": "/mlcommons_output",
77+
"mount_location": "/benchmarking_output",
7878
"volume_size_in_gbs": 200
7979
}
8080
],
8181
"recipe_container_command_args": [
82-
"/models/new_example_sglang.yaml"
82+
"/models/offline_sglang_example.yaml"
8383
],
8484
"recipe_replica_count": 1,
8585
"recipe_container_port": "8000",
@@ -93,7 +93,7 @@ Notes : Make sure your output object storage is in the same tenancy as your stac
9393
```
9494

9595
---
96-
### [Sample Blueprint (Job Mode for Offline vLLM Inference)](dhttps://github.com/oracle-quickstart/oci-ai-blueprints/blob/offline-inference-benchmark/docs/sample_blueprints/offline-inference-infra/offline_deployment_sglang.json)
96+
### [Sample Blueprint (Job Mode for Offline vLLM Inference)](dhttps://github.com/oracle-quickstart/oci-ai-blueprints/blob/offline-inference-benchmark/docs/sample_blueprints/offline-inference-infra/offline_deployment_vllm.json)
9797

9898
```json
9999
{
@@ -116,7 +116,7 @@ Notes : Make sure your output object storage is in the same tenancy as your stac
116116
"output_object_storage": [
117117
{
118118
"bucket_name": "inference_output",
119-
"mount_location": "/mlcommons_output",
119+
"mount_location": "/benchmarking_output",
120120
"volume_size_in_gbs": 200
121121
}
122122
],
@@ -161,7 +161,7 @@ experiment_name: "sglang-bench-doc-test-new"
161161
run_name: "llama3-8b-sglang-test"
162162

163163

164-
save_metrics_path: /mlcommons_output/benchmark_output_llama3_sglang.json
164+
save_metrics_path: /benchmarking_output/benchmark_output_llama3_sglang.json
165165

166166
```
167167
## [Sample Config File - 2 vLLM (`offline_vllm_example.yaml`)](https://github.com/oracle-quickstart/oci-ai-blueprints/blob/offline-inference-benchmark/docs/sample_blueprints/offline-inference-infra/offline_vllm_example.yaml)
@@ -194,7 +194,7 @@ distributed_executor_backend: mp
194194
mlflow_uri: http://mlflow-benchmarking.corrino-oci.com:5000
195195
experiment_name: test-bm-suite-doc
196196
run_name: llama3-vllm-test
197-
save_metrics_path: /mlcommons_output/benchmark_output_llama3_vllm.json
197+
save_metrics_path: /benchmarking_output/benchmark_output_llama3_vllm.json
198198
199199
```
200200

docs/sample_blueprints/offline-inference-infra/new_example_sglang.yaml renamed to docs/sample_blueprints/offline-inference-infra/offline_sglang_example.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,5 @@ experiment_name: "sglang-bench-doc-test-new"
2020
run_name: "llama3-8b-sglang-test"
2121

2222

23-
save_metrics_path: /mlcommons_output/benchmark_output_llama3_sglang.json
23+
save_metrics_path: /benchmarking_output/benchmark_output_llama3_sglang.json
2424

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
benchmark_type: online
2+
model: /models/NousResearch/Meta-Llama-3.1-8B-Instruct # Updated model path
3+
input_len: 64
4+
output_len: 32
5+
max_requests: 5
6+
timeout: 300
7+
num_concurrent: 1
8+
results_dir: /online_output
9+
llm_api: openai
10+
llm_api_key: dummy-key
11+
llm_api_base: https://llama8bobjvllm.129-80-16-111.nip.io/v1 # Updated to HTTPS
12+
experiment_name: local-bench
13+
run_name: llama3-test
14+
mlflow_uri: http://mlflow-benchmarking.corrino-oci.com:5000
15+
llmperf_path: /opt/llmperf-src
16+
metadata: test=public-endpoint
17+
save_metrics_path: /online_output/benchmark_output_llama3_online_public.json
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
{
2+
"recipe_id": "online_infernece_llmperf",
3+
"recipe_mode": "job",
4+
"deployment_name": "a1",
5+
"recipe_image_uri": "iad.ocir.io/iduyx1qnmway/corrino-devops-repository:llm-benchmark-0409-v4",
6+
"recipe_node_shape": "VM.Standard.E4.Flex",
7+
"recipe_node_pool_size": 1,
8+
"recipe_flex_shape_ocpu_count": 32,
9+
"recipe_flex_shape_memory_size_in_gbs": 256,
10+
"recipe_node_boot_volume_size_in_gbs": 200,
11+
"recipe_ephemeral_storage_size": 150,
12+
"input_object_storage": [
13+
{
14+
"par": "https://objectstorage.ap-melbourne-1.oraclecloud.com/p/0T99iRADcM08aVpumM6smqMIcnIJTFtV2D8ZIIWidUP9eL8GSRyDMxOb9Va9rmRc/n/iduyx1qnmway/b/mymodels/o/",
15+
"mount_location": "/models",
16+
"volume_size_in_gbs": 500,
17+
"include": [
18+
"llama3_public_online.yaml"
19+
]
20+
}
21+
],
22+
"output_object_storage": [
23+
{
24+
"bucket_name": "inference_output",
25+
"mount_location": "/online_output",
26+
"volume_size_in_gbs": 200
27+
}
28+
],
29+
"recipe_container_command_args": [
30+
"/models/llama3_public_online.yaml"
31+
],
32+
"recipe_replica_count": 1,
33+
"recipe_container_port": "5678"
34+
}
35+

docs/sample_blueprints/online-inference-infra/online_example.yaml

Lines changed: 0 additions & 16 deletions
This file was deleted.

0 commit comments

Comments
 (0)