diff --git a/docs/weaviate/model-providers/_includes/provider.vectorizer.py b/docs/weaviate/model-providers/_includes/provider.vectorizer.py index 5a8308a56..63c412cbf 100644 --- a/docs/weaviate/model-providers/_includes/provider.vectorizer.py +++ b/docs/weaviate/model-providers/_includes/provider.vectorizer.py @@ -8,8 +8,8 @@ client = weaviate.connect_to_local( headers={ - "X-OpenAI-Api-Key": os.environ["OPENAI_APIKEY"], - "X-Cohere-Api-Key": os.environ["COHERE_APIKEY"], + "X-OpenAI-Api-Key": os.environ["OPENAI_API_KEY"], + "X-Cohere-Api-Key": os.environ["COHERE_API_KEY"], } ) @@ -249,15 +249,13 @@ client.collections.create( "DemoCollection", # highlight-start - vector_config=[ - Configure.Vectors.text2vec_palm( - name="title_vector", - source_properties=["title"], - project_id="", - # (Optional) To manually set the model ID - model_id="gemini-embedding-001" - ) - ], + vector_config=Configure.Vectors.text2vec_google( + name="title_vector", + source_properties=["title"], + project_id="", # Required for Vertex AI + # (Optional) To manually set the model ID + model="gemini-embedding-001" + ), # highlight-end # Additional parameters not shown ) @@ -272,15 +270,12 @@ client.collections.create( "DemoCollection", # highlight-start - vector_config=[ - Configure.Vectors.text2vec_palm( - name="title_vector", - source_properties=["title"], - project_id="", - # (Optional) To manually set the model ID - model_id="gemini-embedding-001" - ) - ], + vector_config=Configure.Vectors.text2vec_google_aistudio( + name="title_vector", + source_properties=["title"], + # (Optional) To manually set the model ID + model="text-embedding-004" + ), # highlight-end # Additional parameters not shown ) @@ -292,19 +287,35 @@ # START FullVectorizerGoogle from weaviate.classes.config import Configure +# For Vertex AI client.collections.create( "DemoCollection", # highlight-start - vector_config=[ - Configure.Vectors.text2vec_palm( - name="title_vector", - source_properties=["title"], - project_id="", # Required for Vertex AI - # # Further options - # model_id="", - # api_endpoint="", - ) - ], + vector_config=Configure.Vectors.text2vec_google( + name="title_vector", + source_properties=["title"], + project_id="", # Required for Vertex AI + # Further options + # model="", + # api_endpoint="", + ), + # highlight-end + # Additional parameters not shown +) + +# clean up +client.collections.delete("DemoCollection") + +# For Google AI Studio (Gemini API) +client.collections.create( + "DemoCollection", + # highlight-start + vector_config=Configure.Vectors.text2vec_google_aistudio( + name="title_vector", + source_properties=["title"], + # Further options + model_id="text-embedding-004", + ), # highlight-end # Additional parameters not shown ) @@ -323,21 +334,19 @@ Property(name="title", data_type=DataType.TEXT), Property(name="poster", data_type=DataType.BLOB), ], - vector_config=[ - Configure.Vectors.multi2vec_palm( - name="title_vector", - # Define the fields to be used for the vectorization - using image_fields, text_fields, video_fields - image_fields=[ - Multi2VecField(name="poster", weight=0.9) - ], - text_fields=[ - Multi2VecField(name="title", weight=0.1) - ], - # video_fields=[], - project_id="", # Required for Vertex AI - location="", # Required for Vertex AI - ) - ], + vector_config=Configure.Vectors.multi2vec_palm( + name="title_vector", + # Define the fields to be used for the vectorization - using image_fields, text_fields, video_fields + image_fields=[ + Multi2VecField(name="poster", weight=0.9) + ], + text_fields=[ + Multi2VecField(name="title", weight=0.1) + ], + # video_fields=[], + project_id="", # Required for Vertex AI + location="", # Required for Vertex AI + ), # highlight-end # Additional parameters not shown ) @@ -357,23 +366,20 @@ Property(name="description", data_type=DataType.TEXT), Property(name="poster", data_type=DataType.BLOB), ], - vector_config=[ - Configure.Vectors.multi2vec_palm( - name="title_vector", - project_id="", # Required for Vertex AI - location="us-central1", - # model_id="", - # dimensions=512, - image_fields=[ - Multi2VecField(name="poster", weight=0.9) - ], - text_fields=[ - Multi2VecField(name="title", weight=0.1) - ], - # video_fields=[] - # video_interval_seconds=20 - ) - ], + vector_config=Configure.Vectors.multi2vec_palm( + project_id="", # Required for Vertex AI + location="us-central1", + # model_id="", + # dimensions=512, + image_fields=[ + Multi2VecField(name="poster", weight=0.9) + ], + text_fields=[ + Multi2VecField(name="title", weight=0.1) + ], + # video_fields=[] + # video_interval_seconds=20 + ), # highlight-end # Additional parameters not shown ) diff --git a/docs/weaviate/model-providers/google/embeddings.md b/docs/weaviate/model-providers/google/embeddings.md index 8f49b8854..1b2876cba 100644 --- a/docs/weaviate/model-providers/google/embeddings.md +++ b/docs/weaviate/model-providers/google/embeddings.md @@ -27,6 +27,13 @@ At [import time](#data-import), Weaviate generates text object embeddings and sa ![Embedding integration illustration](../_includes/integration_google_embedding.png) +:::tip Which Google service should I use? + +- **Google AI Studio (Gemini API)**: Simpler setup, ideal for prototyping and development. Get started quickly with just an API key. +- **Vertex AI**: Enterprise-grade service with more features, better for production deployments with advanced requirements. + +::: + :::info Gemini API availability At the time of writing (November 2023), Gemini API is not available in all regions. See [this page](https://ai.google.dev/gemini-api/docs/available-regions) for the latest information. ::: @@ -60,9 +67,11 @@ This integration is enabled by default on Weaviate Cloud (WCD) serverless instan You must provide valid API credentials to Weaviate for the appropriate integration. -#### Gemini API +#### Google AI Studio (Gemini API) -Go to [Google Gemini API](https://aistudio.google.com/app/apikey/?utm_source=weaviate&utm_medium=referral&utm_campaign=partnerships&utm_content=) to sign up and obtain an API key. +1. Go to [Google AI Studio](https://aistudio.google.com/app/apikey/?utm_source=weaviate&utm_medium=referral&utm_campaign=partnerships&utm_content=) +2. In the "API Keys" section create a new API key +3. Use the `X-Goog-Studio-Api-Key` header to provide your API key to Weaviate #### Vertex AI @@ -75,12 +84,11 @@ import UseGoogleAuthInstructions from './_includes/use_google_auth_instructions. If you have the [Google Cloud CLI tool](https://cloud.google.com/cli) installed and set up, you can view your token by running the following command: - ```shell gcloud auth print-access-token ``` -#### Token expiry for Vertex AI users +##### Token expiry for Vertex AI users import GCPTokenExpiryNotes from '/_includes/gcp.token.expiry.notes.mdx'; @@ -90,7 +98,7 @@ import GCPTokenExpiryNotes from '/_includes/gcp.token.expiry.notes.mdx'; Provide the API key to Weaviate at runtime, as shown in the examples below. -Note the separate headers that are available for [Gemini API](#gemini-api) and [Vertex AI](#vertex-ai) users. +Note the separate headers that are available for [Google AI Studio (Gemini API)](#google-ai-studio-gemini-api) and [Vertex AI](#vertex-ai) users. import ApiKeyNote from '../_includes/google-api-key-note.md'; @@ -131,20 +139,25 @@ import ApiKeyNote from '../_includes/google-api-key-note.md'; [Configure a Weaviate index](../../manage-collections/vector-config.mdx#specify-a-vectorizer) as follows to use a Google embedding model: -Note that the required parameters differ between Vertex AI and Gemini API. +:::info Important: Different vectorizers for different services + +- **Google AI Studio (Gemini API)**: Use `text2vec_google_aistudio()` +- **Vertex AI**: Use `text2vec_google()` + +::: You can [specify](#vectorizer-parameters) one of the [available models](#available-models) for Weaviate to use. The [default model](#available-models) is used if no model is specified. -### Vertex AI +### Google AI Studio (Gemini API) -Vertex AI users must provide the Google Cloud project ID in the vectorizer configuration. +For Google AI Studio, use the `text2vec_google_aistudio()` vectorizer. No `project_id` or `api_endpoint` is required. @@ -152,8 +165,8 @@ Vertex AI users must provide the Google Cloud project ID in the vectorizer confi @@ -161,22 +174,24 @@ Vertex AI users must provide the Google Cloud project ID in the vectorizer confi -### Gemini API +### Vertex AI + +For Vertex AI, use the `text2vec_google()` vectorizer. You must provide your Google Cloud `project_id`. @@ -184,8 +199,8 @@ Vertex AI users must provide the Google Cloud project ID in the vectorizer confi @@ -193,8 +208,8 @@ Vertex AI users must provide the Google Cloud project ID in the vectorizer confi @@ -214,10 +229,13 @@ import VectorizationBehavior from '/_includes/vectorization.behavior.mdx'; The following examples show how to configure Google-specific options. -- `projectId` (Only required if using Vertex AI): e.g. `cloud-large-language-models` -- `apiEndpoint` (Optional): e.g. `us-central1-aiplatform.googleapis.com` -- `modelId` (Optional): e.g. `gemini-embedding-001` - +**Google AI Studio (Gemini API) parameters:** +- `modelId` (Optional): e.g. `text-embedding-004`, `gemini-embedding-001` + +**Vertex AI parameters:** +- `projectId` (Required): Your Google Cloud project ID, e.g. `cloud-large-language-models` +- `apiEndpoint` (Optional): Regional endpoint, e.g. `us-central1-aiplatform.googleapis.com` +- `modelId` (Optional): e.g. `gemini-embedding-001`, `text-embedding-005` @@ -376,7 +394,12 @@ The query below returns the `n` best scoring objects from the database, set by ` ### Available models -Vertex AI: +**Google AI Studio (Gemini API):** +- `text-embedding-004` (recommended) +- `gemini-embedding-001` (default) + - `embedding-001` (deprecated name for `gemini-embedding-001`) + +**Vertex AI:** - `gemini-embedding-001` (default, added in 1.29.9, 1.30.11, 1.31.5 and onwards) - `text-embedding-005` (added in v1.29.9, 1.30.11, 1.31.5 and onwards) - `text-multilingual-embedding-002` (added in v1.29.9, 1.30.11, 1.31.5 and onwards) @@ -397,11 +420,6 @@ The following models have been deprecated by Google and are no longer supported. -Gemini API: -- `gemini-embedding-001` (default) - - `embedding-001` (deprecated name for `gemini-embedding-001`) -- `text-embedding-004` - ## Further resources ### Other integrations