elastic · Jan-Kazlouski-elastic · Nov 11, 2025 · Oct 23, 2025 · Oct 23, 2025 · Oct 24, 2025
diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
diff --git a/output/schema/schema.json b/output/schema/schema.json
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
@@ -1445,6 +1445,8 @@ export class GoogleVertexAIServiceSettings {
    * If `streaming_url` is not provided, `url` is also used for streaming `completion` and `chat_completion`.
    * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `url` (or `streaming_url`).
    * At least one of `url` or `streaming_url` must be provided for Google Model Garden endpoint usage.
+   * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).
+   * Information on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `url`.
    */
   url?: string
   /**
@@ -1453,6 +1455,8 @@ export class GoogleVertexAIServiceSettings {
    * If `url` is not provided, `streaming_url` is also used for non-streaming `completion` requests.
    * If `provider` is not provided or set to `google` (Google Vertex AI), do not set `streaming_url` (or `url`).
    * At least one of `streaming_url` or `url` must be provided for Google Model Garden endpoint usage.
+   * Certain providers require separate URLs for streaming and non-streaming operations (e.g., Anthropic, Mistral, AI21). Others support both operation types through a single URL (e.g., Meta, Hugging Face).
+   * Information on constructing the URL for various providers can be found in the Google Model Garden documentation for the model, or on the endpoint’s `Sample request` page. The request examples also illustrate the proper formatting for the `streaming_url`.
    */
   streaming_url?: string
   /**
@@ -1497,7 +1501,11 @@ export class GoogleVertexAIServiceSettings {
 
 export enum GoogleModelGardenProvider {
   google,
-  anthropic
+  anthropic,
+  meta,
+  hugging_face,
+  mistral,
+  ai21
 }
 
 export class GoogleVertexAITaskSettings {

diff --git a/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml b/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample10.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden Meta shared endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_meta_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Meta's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_meta_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "meta",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml b/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample11.yaml
@@ -0,0 +1,13 @@
+summary: A completion task for Google Model Garden Hugging Face dedicated endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_hugging_face_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "hugging_face",
+          "service_account_json": "service-account-json",
+          "url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml b/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample12.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden Hugging Face dedicated endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "hugging_face",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample13.yaml b/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample13.yaml
@@ -0,0 +1,13 @@
+summary: A completion task for Google Model Garden Hugging Face shared endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_hugging_face_completion` to create an inference endpoint to perform a `completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_hugging_face_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "hugging_face",
+          "service_account_json": "service-account-json",
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample14.yaml b/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample14.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden Hugging Face shared endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Hugging Face's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_hugging_face_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "hugging_face",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample15.yaml b/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample15.yaml
@@ -0,0 +1,15 @@
+summary: A completion task for Google Model Garden Mistral serverless endpoint with separate URLs for streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Mistral model documentation for instructions on how to construct URLs.
+method_request: 'PUT _inference/completion/google_model_garden_mistral_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "model_id": "mistral-small-2503",
+          "service_account_json": "service-account-json",
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:rawPredict",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict"
+      }
+  }
diff --git a/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample16.yaml b/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample16.yaml
@@ -0,0 +1,14 @@
+summary: A chat_completion task for Google Model Garden Mistral serverless endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's serverless model hosted on Google Model Garden with single streaming URL provided. See the Mistral model documentation for instructions on how to construct the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "model_id": "mistral-small-2503",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/mistralai/models/%MODEL_ID%:streamRawPredict"
+      }
+  }
diff --git a/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample17.yaml b/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample17.yaml
@@ -0,0 +1,13 @@
+summary: A completion task for Google Model Garden Mistral dedicated endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_mistral_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "service_account_json": "service-account-json",
+          "url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample18.yaml b/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample18.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden Mistral dedicated endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden dedicated endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%ENDPOINT_ID%.%LOCATION_ID%-%PROJECT_ID%.prediction.vertexai.goog/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample19.yaml b/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample19.yaml
@@ -0,0 +1,13 @@
+summary: A completion task for Google Model Garden Mistral shared endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_mistral_completion` to create an inference endpoint to perform a `completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single URL provided for both streaming and non-streaming tasks. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/completion/google_model_garden_mistral_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "service_account_json": "service-account-json",
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample20.yaml b/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample20.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden Mistral shared endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_mistral_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Mistral's model hosted on Google Model Garden shared endpoint with single streaming URL provided. See the endpoint's `Sample request` page for the variable values used in the URL.
+method_request: 'PUT _inference/chat_completion/google_model_garden_mistral_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "mistral",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/%ENDPOINT_ID%/chat/completions"
+      }
+  }
diff --git a/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample21.yaml b/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample21.yaml
@@ -0,0 +1,14 @@
+summary: A completion task for Google Model Garden AI21 serverless endpoint with separate URLs for streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_ai21_completion` to create an inference endpoint to perform a `completion` task using AI21's model hosted on Google Model Garden serverless endpoint with separate URLs for streaming and non-streaming tasks. See the AI21 model documentation for instructions on how to construct URLs.
+method_request: 'PUT _inference/completion/google_model_garden_ai21_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "ai21",
+          "service_account_json": "service-account-json",
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:rawPredict",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict"
+      }
+  }
diff --git a/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample22.yaml b/...tion/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample22.yaml
@@ -0,0 +1,13 @@
+summary: A chat_completion task for Google Model Garden AI21 serverless endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_ai21_chat_completion` to create an inference endpoint to perform a `chat_completion` task using AI21's model hosted on Google Model Garden serverless endpoint with single streaming URL provided. See the AI21 model documentation for instructions on how to construct URLs.
+method_request: 'PUT _inference/chat_completion/google_model_garden_ai21_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "ai21",
+          "service_account_json": "service-account-json",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/ai21/models/%MODEL_ID%:streamRawPredict"
+      }
+  }
diff --git a/...ation/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml b/...ation/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample3.yaml
@@ -1,5 +1,5 @@
-summary: A completion task for Google Model Garden Anthropic endpoint
-description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's model hosted on Google Model Garden.
+summary: A completion task for Google Model Garden Anthropic serverless endpoint with separate URLs for streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_anthropic_completion` to create an inference endpoint to perform a `completion` task using Anthropic's serverless model hosted on Google Model Garden with separate URLs for streaming and non-streaming tasks. See the Anthropic model documentation for instructions on how to construct URLs.
 method_request: 'PUT _inference/completion/google_model_garden_anthropic_completion'
 # type: "request"
 value: |-
@@ -8,8 +8,8 @@ value: |-
       "service_settings": {
           "provider": "anthropic",
           "service_account_json": "service-account-json",
-          "url": "https://url:rawPredict",
-          "streaming_url": "https://streaming_url:streamRawPredict"
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:rawPredict",
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict"
       },
       "task_settings": {
           "max_tokens": 128

diff --git a/...ation/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml b/...ation/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample4.yaml
@@ -1,5 +1,5 @@
-summary: A chat_completion task for Google Model Garden Anthropic endpoint
-description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's model hosted on Google Model Garden.
+summary: A chat_completion task for Google Model Garden Anthropic serverless endpoint with single streaming URL provided
+description: Run `PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion` to create an inference endpoint to perform a `chat_completion` task using Anthropic's serverless model hosted on Google Model Garden with single streaming URL provided. See the Anthropic model documentation for instructions on how to construct the URL.
 method_request: 'PUT _inference/chat_completion/google_model_garden_anthropic_chat_completion'
 # type: "request"
 value: |-
@@ -8,8 +8,7 @@ value: |-
       "service_settings": {
           "provider": "anthropic",
           "service_account_json": "service-account-json",
-          "url": "https://url:rawPredict",
-          "streaming_url": "https://streaming_url:streamRawPredict"
+          "streaming_url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/publishers/anthropic/models/%MODEL_ID%:streamRawPredict"
       },
       "task_settings": {
           "max_tokens": 128

diff --git a/...ation/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml b/...ation/inference/put_googlevertexai/examples/request/PutGoogleVertexAiRequestExample5.yaml
@@ -0,0 +1,14 @@
+summary: A completion task for Google Model Garden Meta serverless endpoint with single URL provided for both streaming and non-streaming tasks
+description: Run `PUT _inference/completion/google_model_garden_meta_completion` to create an inference endpoint to perform a `completion` task using Meta's serverless model hosted on Google Model Garden with single URL provided for both streaming and non-streaming tasks. See the Meta model documentation for instructions on how to construct the URL.
+method_request: 'PUT _inference/completion/google_model_garden_meta_completion'
+# type: "request"
+value: |-
+  {
+      "service": "googlevertexai",
+      "service_settings": {
+          "provider": "meta",
+          "model_id": "meta/llama-3.3-70b-instruct-maas",
+          "service_account_json": "service-account-json",
+          "url": "https://%LOCATION_ID%-aiplatform.googleapis.com/v1/projects/%PROJECT_ID%/locations/%LOCATION_ID%/endpoints/openapi/chat/completions"
+      }
+  }