From 47f6623fe1eddfd100e1b1c9dd38059e285481f7 Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Thu, 13 Nov 2025 09:37:42 +0000 Subject: [PATCH 1/2] Update Google model provider docs --- .../_includes/provider.vectorizer.py | 124 +++++++++--------- .../model-providers/google/embeddings.md | 78 ++++++----- 2 files changed, 112 insertions(+), 90 deletions(-) diff --git a/docs/weaviate/model-providers/_includes/provider.vectorizer.py b/docs/weaviate/model-providers/_includes/provider.vectorizer.py index 5a8308a56..b2df158e8 100644 --- a/docs/weaviate/model-providers/_includes/provider.vectorizer.py +++ b/docs/weaviate/model-providers/_includes/provider.vectorizer.py @@ -249,15 +249,13 @@ client.collections.create( "DemoCollection", # highlight-start - vector_config=[ - Configure.Vectors.text2vec_palm( - name="title_vector", - source_properties=["title"], - project_id="", - # (Optional) To manually set the model ID - model_id="gemini-embedding-001" - ) - ], + vector_config=Configure.Vectors.text2vec_palm( + name="title_vector", + source_properties=["title"], + project_id="", # Required for Vertex AI + # (Optional) To manually set the model ID + model_id="gemini-embedding-001" + ), # highlight-end # Additional parameters not shown ) @@ -272,15 +270,12 @@ client.collections.create( "DemoCollection", # highlight-start - vector_config=[ - Configure.Vectors.text2vec_palm( - name="title_vector", - source_properties=["title"], - project_id="", - # (Optional) To manually set the model ID - model_id="gemini-embedding-001" - ) - ], + vector_config=Configure.Vectors.text2vec_google_aistudio( + name="title_vector", + source_properties=["title"], + # (Optional) To manually set the model ID + model_id="text-embedding-004" + ), # highlight-end # Additional parameters not shown ) @@ -292,19 +287,35 @@ # START FullVectorizerGoogle from weaviate.classes.config import Configure +# For Vertex AI client.collections.create( "DemoCollection", # highlight-start - vector_config=[ - Configure.Vectors.text2vec_palm( - name="title_vector", - source_properties=["title"], - project_id="", # Required for Vertex AI - # # Further options - # model_id="", + vector_config=Configure.Vectors.text2vec_palm( + name="title_vector", + source_properties=["title"], + project_id="", # Required for Vertex AI + # Further options + # model_id="", # api_endpoint="", - ) - ], + ), + # highlight-end + # Additional parameters not shown +) + +# clean up +client.collections.delete("DemoCollection") + +# For Google AI Studio (Gemini API) +client.collections.create( + "DemoCollection", + # highlight-start + vector_config=Configure.Vectors.text2vec_google_aistudio( + name="title_vector", + source_properties=["title"], + # Further options + model_id="text-embedding-004", + ), # highlight-end # Additional parameters not shown ) @@ -323,21 +334,19 @@ Property(name="title", data_type=DataType.TEXT), Property(name="poster", data_type=DataType.BLOB), ], - vector_config=[ - Configure.Vectors.multi2vec_palm( - name="title_vector", - # Define the fields to be used for the vectorization - using image_fields, text_fields, video_fields - image_fields=[ - Multi2VecField(name="poster", weight=0.9) - ], - text_fields=[ - Multi2VecField(name="title", weight=0.1) - ], - # video_fields=[], - project_id="", # Required for Vertex AI - location="", # Required for Vertex AI - ) - ], + vector_config=Configure.Vectors.multi2vec_palm( + name="title_vector", + # Define the fields to be used for the vectorization - using image_fields, text_fields, video_fields + image_fields=[ + Multi2VecField(name="poster", weight=0.9) + ], + text_fields=[ + Multi2VecField(name="title", weight=0.1) + ], + # video_fields=[], + project_id="", # Required for Vertex AI + location="", # Required for Vertex AI + ), # highlight-end # Additional parameters not shown ) @@ -357,23 +366,20 @@ Property(name="description", data_type=DataType.TEXT), Property(name="poster", data_type=DataType.BLOB), ], - vector_config=[ - Configure.Vectors.multi2vec_palm( - name="title_vector", - project_id="", # Required for Vertex AI - location="us-central1", - # model_id="", - # dimensions=512, - image_fields=[ - Multi2VecField(name="poster", weight=0.9) - ], - text_fields=[ - Multi2VecField(name="title", weight=0.1) - ], - # video_fields=[] - # video_interval_seconds=20 - ) - ], + vector_config=Configure.Vectors.multi2vec_palm( + project_id="", # Required for Vertex AI + location="us-central1", + # model_id="", + # dimensions=512, + image_fields=[ + Multi2VecField(name="poster", weight=0.9) + ], + text_fields=[ + Multi2VecField(name="title", weight=0.1) + ], + # video_fields=[] + # video_interval_seconds=20 + ), # highlight-end # Additional parameters not shown ) diff --git a/docs/weaviate/model-providers/google/embeddings.md b/docs/weaviate/model-providers/google/embeddings.md index 8f49b8854..678913771 100644 --- a/docs/weaviate/model-providers/google/embeddings.md +++ b/docs/weaviate/model-providers/google/embeddings.md @@ -27,6 +27,13 @@ At [import time](#data-import), Weaviate generates text object embeddings and sa ![Embedding integration illustration](../_includes/integration_google_embedding.png) +:::tip Which Google service should I use? + +- **Google AI Studio (Gemini API)**: Simpler setup, ideal for prototyping and development. Get started quickly with just an API key. +- **Vertex AI**: Enterprise-grade service with more features, better for production deployments with advanced requirements. + +::: + :::info Gemini API availability At the time of writing (November 2023), Gemini API is not available in all regions. See [this page](https://ai.google.dev/gemini-api/docs/available-regions) for the latest information. ::: @@ -60,9 +67,11 @@ This integration is enabled by default on Weaviate Cloud (WCD) serverless instan You must provide valid API credentials to Weaviate for the appropriate integration. -#### Gemini API +#### Google AI Studio (Gemini API) -Go to [Google Gemini API](https://aistudio.google.com/app/apikey/?utm_source=weaviate&utm_medium=referral&utm_campaign=partnerships&utm_content=) to sign up and obtain an API key. +1. Go to [Google AI Studio](https://aistudio.google.com/app/apikey/?utm_source=weaviate&utm_medium=referral&utm_campaign=partnerships&utm_content=) to sign up and obtain an API key +2. In the "Credentials" section, create credentials for "Gemini for Google Cloud API" +3. Use the `X-Goog-Studio-Api-Key` header to provide your API key to Weaviate #### Vertex AI @@ -75,12 +84,11 @@ import UseGoogleAuthInstructions from './_includes/use_google_auth_instructions. If you have the [Google Cloud CLI tool](https://cloud.google.com/cli) installed and set up, you can view your token by running the following command: - ```shell gcloud auth print-access-token ``` -#### Token expiry for Vertex AI users +##### Token expiry for Vertex AI users import GCPTokenExpiryNotes from '/_includes/gcp.token.expiry.notes.mdx'; @@ -90,7 +98,7 @@ import GCPTokenExpiryNotes from '/_includes/gcp.token.expiry.notes.mdx'; Provide the API key to Weaviate at runtime, as shown in the examples below. -Note the separate headers that are available for [Gemini API](#gemini-api) and [Vertex AI](#vertex-ai) users. +Note the separate headers that are available for [Google AI Studio (Gemini API)](#google-ai-studio-gemini-api) and [Vertex AI](#vertex-ai) users. import ApiKeyNote from '../_includes/google-api-key-note.md'; @@ -131,20 +139,23 @@ import ApiKeyNote from '../_includes/google-api-key-note.md'; [Configure a Weaviate index](../../manage-collections/vector-config.mdx#specify-a-vectorizer) as follows to use a Google embedding model: -Note that the required parameters differ between Vertex AI and Gemini API. +:::info Important: Different vectorizers for different services +- **Google AI Studio (Gemini API)**: Use `text2vec_google_aistudio()` +- **Vertex AI**: Use `text2vec_palm()` or `text2vec_google()` +::: You can [specify](#vectorizer-parameters) one of the [available models](#available-models) for Weaviate to use. The [default model](#available-models) is used if no model is specified. -### Vertex AI +### Google AI Studio (Gemini API) -Vertex AI users must provide the Google Cloud project ID in the vectorizer configuration. +For Google AI Studio, use the `text2vec_google_aistudio()` vectorizer. No `project_id` or `api_endpoint` is required. @@ -152,8 +163,8 @@ Vertex AI users must provide the Google Cloud project ID in the vectorizer confi @@ -161,22 +172,24 @@ Vertex AI users must provide the Google Cloud project ID in the vectorizer confi -### Gemini API +### Vertex AI + +For Vertex AI, use the `text2vec_palm()` vectorizer. You must provide your Google Cloud `project_id`. @@ -184,8 +197,8 @@ Vertex AI users must provide the Google Cloud project ID in the vectorizer confi @@ -193,8 +206,8 @@ Vertex AI users must provide the Google Cloud project ID in the vectorizer confi @@ -214,10 +227,13 @@ import VectorizationBehavior from '/_includes/vectorization.behavior.mdx'; The following examples show how to configure Google-specific options. -- `projectId` (Only required if using Vertex AI): e.g. `cloud-large-language-models` -- `apiEndpoint` (Optional): e.g. `us-central1-aiplatform.googleapis.com` -- `modelId` (Optional): e.g. `gemini-embedding-001` - +**Google AI Studio (Gemini API) parameters:** +- `modelId` (Optional): e.g. `text-embedding-004`, `gemini-embedding-001` + +**Vertex AI parameters:** +- `projectId` (Required): Your Google Cloud project ID, e.g. `cloud-large-language-models` +- `apiEndpoint` (Optional): Regional endpoint, e.g. `us-central1-aiplatform.googleapis.com` +- `modelId` (Optional): e.g. `gemini-embedding-001`, `text-embedding-005` @@ -376,7 +392,12 @@ The query below returns the `n` best scoring objects from the database, set by ` ### Available models -Vertex AI: +**Google AI Studio (Gemini API):** +- `text-embedding-004` (recommended) +- `gemini-embedding-001` (default) + - `embedding-001` (deprecated name for `gemini-embedding-001`) + +**Vertex AI:** - `gemini-embedding-001` (default, added in 1.29.9, 1.30.11, 1.31.5 and onwards) - `text-embedding-005` (added in v1.29.9, 1.30.11, 1.31.5 and onwards) - `text-multilingual-embedding-002` (added in v1.29.9, 1.30.11, 1.31.5 and onwards) @@ -397,11 +418,6 @@ The following models have been deprecated by Google and are no longer supported. -Gemini API: -- `gemini-embedding-001` (default) - - `embedding-001` (deprecated name for `gemini-embedding-001`) -- `text-embedding-004` - ## Further resources ### Other integrations From 041671cf9545951ffcd74d878337cb4a88fcdc04 Mon Sep 17 00:00:00 2001 From: Ivan Despot <66276597+g-despot@users.noreply.github.com> Date: Thu, 13 Nov 2025 14:24:15 +0100 Subject: [PATCH 2/2] Update docs --- .../_includes/provider.vectorizer.py | 16 ++++++++-------- .../model-providers/google/embeddings.md | 10 ++++++---- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/docs/weaviate/model-providers/_includes/provider.vectorizer.py b/docs/weaviate/model-providers/_includes/provider.vectorizer.py index b2df158e8..63c412cbf 100644 --- a/docs/weaviate/model-providers/_includes/provider.vectorizer.py +++ b/docs/weaviate/model-providers/_includes/provider.vectorizer.py @@ -8,8 +8,8 @@ client = weaviate.connect_to_local( headers={ - "X-OpenAI-Api-Key": os.environ["OPENAI_APIKEY"], - "X-Cohere-Api-Key": os.environ["COHERE_APIKEY"], + "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"], + "X-Cohere-Api-Key": os.environ["COHERE_API_KEY"], } ) @@ -249,12 +249,12 @@ client.collections.create( "DemoCollection", # highlight-start - vector_config=Configure.Vectors.text2vec_palm( + vector_config=Configure.Vectors.text2vec_google( name="title_vector", source_properties=["title"], project_id="", # Required for Vertex AI # (Optional) To manually set the model ID - model_id="gemini-embedding-001" + model="gemini-embedding-001" ), # highlight-end # Additional parameters not shown @@ -274,7 +274,7 @@ name="title_vector", source_properties=["title"], # (Optional) To manually set the model ID - model_id="text-embedding-004" + model="text-embedding-004" ), # highlight-end # Additional parameters not shown @@ -291,13 +291,13 @@ client.collections.create( "DemoCollection", # highlight-start - vector_config=Configure.Vectors.text2vec_palm( + vector_config=Configure.Vectors.text2vec_google( name="title_vector", source_properties=["title"], project_id="", # Required for Vertex AI # Further options - # model_id="", - # api_endpoint="", + # model="", + # api_endpoint="", ), # highlight-end # Additional parameters not shown diff --git a/docs/weaviate/model-providers/google/embeddings.md b/docs/weaviate/model-providers/google/embeddings.md index 678913771..1b2876cba 100644 --- a/docs/weaviate/model-providers/google/embeddings.md +++ b/docs/weaviate/model-providers/google/embeddings.md @@ -69,8 +69,8 @@ You must provide valid API credentials to Weaviate for the appropriate integrati #### Google AI Studio (Gemini API) -1. Go to [Google AI Studio](https://aistudio.google.com/app/apikey/?utm_source=weaviate&utm_medium=referral&utm_campaign=partnerships&utm_content=) to sign up and obtain an API key -2. In the "Credentials" section, create credentials for "Gemini for Google Cloud API" +1. Go to [Google AI Studio](https://aistudio.google.com/app/apikey/?utm_source=weaviate&utm_medium=referral&utm_campaign=partnerships&utm_content=) +2. In the "API Keys" section create a new API key 3. Use the `X-Goog-Studio-Api-Key` header to provide your API key to Weaviate #### Vertex AI @@ -140,8 +140,10 @@ import ApiKeyNote from '../_includes/google-api-key-note.md'; [Configure a Weaviate index](../../manage-collections/vector-config.mdx#specify-a-vectorizer) as follows to use a Google embedding model: :::info Important: Different vectorizers for different services + - **Google AI Studio (Gemini API)**: Use `text2vec_google_aistudio()` -- **Vertex AI**: Use `text2vec_palm()` or `text2vec_google()` +- **Vertex AI**: Use `text2vec_google()` + ::: You can [specify](#vectorizer-parameters) one of the [available models](#available-models) for Weaviate to use. The [default model](#available-models) is used if no model is specified. @@ -182,7 +184,7 @@ For Google AI Studio, use the `text2vec_google_aistudio()` vectorizer. No `proje ### Vertex AI -For Vertex AI, use the `text2vec_palm()` vectorizer. You must provide your Google Cloud `project_id`. +For Vertex AI, use the `text2vec_google()` vectorizer. You must provide your Google Cloud `project_id`.