From 7602b3e9b71d308eadec35878348adc9b4c73539 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Wed, 23 Apr 2025 13:20:54 -0700
Subject: [PATCH 1/4] fixed model deployment watch command

---
 ai-quick-actions/troubleshooting-tips.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ai-quick-actions/troubleshooting-tips.md b/ai-quick-actions/troubleshooting-tips.md
index 78743502..a8914f7b 100644
--- a/ai-quick-actions/troubleshooting-tips.md
+++ b/ai-quick-actions/troubleshooting-tips.md
@@ -40,7 +40,7 @@ To successfully debug an issue, always select logging while creating model deplo
 
 Once the model deployment is intiated, you can monitor the logs by running on your notebook terminal- 
 
-`ads watch <your modeldepoyment ocid> --auth resource_principal`
+`ads opctl watch <your modeldepoyment ocid> --auth resource_principal`
 
 To fetch the model deployment ocid - 
 1. Go to model deployments tab on AI Quick Actions

From e0c0a16d05c1518ef92fd9e79a32cd2772a53df0 Mon Sep 17 00:00:00 2001
From: Liz Johnson <liz.j.johnson@oracle.com>
Date: Mon, 28 Apr 2025 14:06:04 -0700
Subject: [PATCH 2/4] added documentation for embedding support

---
 .../multimodel-deployment-tips.md             | 31 ++++++++++++++++---
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/ai-quick-actions/multimodel-deployment-tips.md b/ai-quick-actions/multimodel-deployment-tips.md
index 08d5d677..0b0e1cbe 100644
--- a/ai-quick-actions/multimodel-deployment-tips.md
+++ b/ai-quick-actions/multimodel-deployment-tips.md
@@ -360,8 +360,13 @@ ads aqua deployment create [OPTIONS]
 
 `--models [str]`
 
-The String representation of a JSON array, where each object defines a model’s OCID and the number of GPUs assigned to it. The gpu count should always be a **power of two (e.g., 1, 2, 4, 8)**. <br>
-Example: `'[{"model_id":"<model_ocid>", "gpu_count":1},{"model_id":"<model_ocid>", "gpu_count":1}]'` for  `VM.GPU.A10.2` shape. <br>
+The String representation of a JSON array, where each object defines a model’s OCID, number of GPUs assigned to it. <br> The gpu count should always be a **power of two (e.g., 1, 2, 4, 8)**. 
+
+Example: `'[{"model_id":"<model_ocid>", "gpu_count":1},{"model_id":"<model_ocid>", "gpu_count":1}]'` for  `VM.GPU.A10.2` shape.
+
+For deploying embedding models, model_task must be specified. For best practice, model_task should be supplied. (Supported tasks: text_generation, image_text_to_text, code_synthesis, text_embedding)
+
+Example: `'[{"model_id":"<model_ocid>", "gpu_count":1, "model_task": "text_generation"},{"model_id":"<model_ocid>", "gpu_count":1, "model_task": "image_text_to_text"}]'` for  `VM.GPU.A10.2` shape.
 
 
 `--instance_shape [str]`
@@ -439,7 +444,8 @@ ads aqua deployment create \
   --container_image_uri "dsmc://odsc-vllm-serving:0.6.4.post1.2" \
   --models '[{"model_id":"ocid1.log.oc1.iad.<ocid>", "gpu_count":1}, {"model_id":"ocid1.log.oc1.iad.<ocid>", "gpu_count":1}]' \
   --instance_shape "VM.GPU.A10.2" \
-  --display_name "modelDeployment_multmodel_model1_model2"
+  --display_name "modelDeployment_multmodel_model1_model2" \
+  --env_var '{"MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions"}'
 
 ```
 
@@ -499,7 +505,8 @@ ads aqua deployment create \
   --models '[{"model_id":"ocid1.log.oc1.iad.<ocid>", "gpu_count":1}, {"model_id":"ocid1.log.oc1.iad.<ocid>", "gpu_count":1}]' \
   --env-var '{"MODEL_DEPLOY_PREDICT_ENDPOINT":"/v1/chat/completions"}' \
   --instance_shape "VM.GPU.A10.2" \
-  --display_name "modelDeployment_multmodel_model1_model2"
+  --display_name "modelDeployment_multmodel_model1_model2" \
+  --env_var '{"MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/chat/completions"}'
 
 ```
 
@@ -550,7 +557,23 @@ ads aqua deployment create \
         "MULTI_MODEL_CONFIG": "{\"models\": [{\"params\": \"--served-model-name mistralai/Mistral-7B-v0.1 --seed 42 --tensor-parallel-size 1 --max-model-len 4096\", \"model_path\": \"service_models/Mistral-7B-v0.1/78814a9/artifact\"}, {\"params\": \"--served-model-name tiiuae/falcon-7b --seed 42 --tensor-parallel-size 1 --trust-remote-code\", \"model_path\": \"service_models/falcon-7b/f779652/artifact\"}]}",
         "MODEL_DEPLOY_ENABLE_STREAMING": "true",
 ```
+#### Create MultiModel (1 Embedding Model, 1 LLM) deployment with `/v1/completions`
+
+Note: will need to pass {"route": "v1/embeddings"} as a header for all inference requests to embedding model
+
+```
+headers={'route':'/v1/embeddings','Content-Type':'application/json'}
+```
+- for /v1/chat/completions, modify "MODEL_DEPLOY_PREDICT_ENDPOINT"
+```bash
+ads aqua deployment create \
+  --container_image_uri "dsmc://odsc-vllm-serving:0.6.4.post1.2" \
+  --models '[{"model_id":"ocid1.log.oc1.iad.<ocid>", "gpu_count":1, "model_task": "embedding"}, {"model_id":"ocid1.log.oc1.iad.<ocid>", "gpu_count":1, "model_task": "text_generation"}]' \
+  --instance_shape "VM.GPU.A10.2" \
+  --display_name "modelDeployment_multmodel_model1_model2" \
+  --env_var '{"MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions"}'
 
+```
 
 ## Manage MultiModel Deployments
 

From 255033ca0b0f148e6c59d4427050952fba7e10b4 Mon Sep 17 00:00:00 2001
From: Liz Johnson <eli3jo.art@gmail.com>
Date: Mon, 28 Apr 2025 14:42:53 -0700
Subject: [PATCH 3/4] Update multimodel-deployment-tips.md

---
 ai-quick-actions/multimodel-deployment-tips.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ai-quick-actions/multimodel-deployment-tips.md b/ai-quick-actions/multimodel-deployment-tips.md
index 0b0e1cbe..0c09fe3b 100644
--- a/ai-quick-actions/multimodel-deployment-tips.md
+++ b/ai-quick-actions/multimodel-deployment-tips.md
@@ -366,7 +366,7 @@ Example: `'[{"model_id":"<model_ocid>", "gpu_count":1},{"model_id":"<model_ocid>
 
 For deploying embedding models, model_task must be specified. For best practice, model_task should be supplied. (Supported tasks: text_generation, image_text_to_text, code_synthesis, text_embedding)
 
-Example: `'[{"model_id":"<model_ocid>", "gpu_count":1, "model_task": "text_generation"},{"model_id":"<model_ocid>", "gpu_count":1, "model_task": "image_text_to_text"}]'` for  `VM.GPU.A10.2` shape.
+Example: `'[{"model_id":"<model_ocid_of_embedding_model>", "gpu_count":1, "model_task": "embedding"},{"model_id":"<model_ocid_of_image_text_to_text>", "gpu_count":1, "model_task": "image_text_to_text"}]'` for  `VM.GPU.A10.2` shape.
 
 
 `--instance_shape [str]`
@@ -1208,4 +1208,4 @@ For other operations related to **Evaluation**, such as listing evaluations and
 | mistralai/Mistral-7B-v0.1 | BM.GPU.L40S-NC.4 | 1 | --max-model-len 4096 |
 | mistralai/Mistral-7B-v0.1 | BM.GPU.L40S-NC.4 | 2 |  |
 | tiiuae/falcon-7b | VM.GPU.A10.2 | 1 | --trust-remote-code |
-| tiiuae/falcon-7b | BM.GPU.A10.4 | 1 | --trust-remote-code |
\ No newline at end of file
+| tiiuae/falcon-7b | BM.GPU.A10.4 | 1 | --trust-remote-code |

From 52bd714e5b6743aca028ce9ef2f4ec948dc544dd Mon Sep 17 00:00:00 2001
From: Liz Johnson <eli3jo.art@gmail.com>
Date: Mon, 28 Apr 2025 14:44:36 -0700
Subject: [PATCH 4/4] Update multimodel-deployment-tips.md

---
 ai-quick-actions/multimodel-deployment-tips.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ai-quick-actions/multimodel-deployment-tips.md b/ai-quick-actions/multimodel-deployment-tips.md
index 0c09fe3b..eeaea1e1 100644
--- a/ai-quick-actions/multimodel-deployment-tips.md
+++ b/ai-quick-actions/multimodel-deployment-tips.md
@@ -366,7 +366,7 @@ Example: `'[{"model_id":"<model_ocid>", "gpu_count":1},{"model_id":"<model_ocid>
 
 For deploying embedding models, model_task must be specified. For best practice, model_task should be supplied. (Supported tasks: text_generation, image_text_to_text, code_synthesis, text_embedding)
 
-Example: `'[{"model_id":"<model_ocid_of_embedding_model>", "gpu_count":1, "model_task": "embedding"},{"model_id":"<model_ocid_of_image_text_to_text>", "gpu_count":1, "model_task": "image_text_to_text"}]'` for  `VM.GPU.A10.2` shape.
+Example: `'[{"model_id":"<ocid_of_embedding_model>", "gpu_count":1, "model_task": "embedding"},{"model_id":"<ocid_of_image_text_to_text_model>", "gpu_count":1, "model_task": "image_text_to_text"}]'` for  `VM.GPU.A10.2` shape.
 
 
 `--instance_shape [str]`