From 972ae62dcc16a85da01222026bbbd1b19dc2cf24 Mon Sep 17 00:00:00 2001 From: Guiners Date: Thu, 24 Jul 2025 14:59:04 +0200 Subject: [PATCH 01/10] adding live_code_exec_with_txt with test --- genai/live/live_code_exec_with_txt.py | 69 +++++++++++++++++++++++++++ genai/live/requirements.txt | 4 +- genai/live/test_live_examples.py | 5 ++ 3 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 genai/live/live_code_exec_with_txt.py diff --git a/genai/live/live_code_exec_with_txt.py b/genai/live/live_code_exec_with_txt.py new file mode 100644 index 00000000000..cea64aa97c9 --- /dev/null +++ b/genai/live/live_code_exec_with_txt.py @@ -0,0 +1,69 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio + + +async def generate_content() -> list[str]: + # [START googlegenaisdk_live_code_exec_with_txt] + from google import genai + from google.genai.types import ( + LiveConnectConfig, + Modality, + Tool, + ToolCodeExecution, + Content, + Part + ) + + config = LiveConnectConfig( + response_modalities=[Modality.TEXT], + tools=[Tool(code_execution=ToolCodeExecution())], + ) + + client = genai.Client() + # model_id = "gemini-live-2.5-flash" #todo + model_id = "gemini-2.0-flash-live-preview-04-09" + + async with client.aio.live.connect(model=model_id, config=config) as session: + text_input = "Compute the largest prime palindrome under 10, " + print("> ", text_input, "\n") + await session.send_client_content(turns=Content(parts=[Part(text=text_input)])) + + response = [] + + async for chunk in session.receive(): + if chunk.server_content: + if chunk.text is not None: + response.append(chunk.text) + # print(chunk.text) + + model_turn = chunk.server_content.model_turn + if model_turn: + for part in model_turn.parts: + if part.executable_code is not None: + print(part.executable_code.code) + + if part.code_execution_result is not None: + print(part.code_execution_result.output) + + print("".join(response)) + # Example output: + # STRING + # [END googlegenaisdk_live_code_exec_with_txt] + return response + + +if __name__ == "__main__": + asyncio.run(generate_content()) diff --git a/genai/live/requirements.txt b/genai/live/requirements.txt index c12e6a7e2f7..a0dc792ecf6 100644 --- a/genai/live/requirements.txt +++ b/genai/live/requirements.txt @@ -1,3 +1,5 @@ google-genai==1.20.0 scipy==1.15.3 -websockets==15.0.1 \ No newline at end of file +websockets==15.0.1 +soundfile==0.13.1 +librosa==0.11.0 \ No newline at end of file diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index ce382539861..7ae4bbd75ea 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -25,6 +25,7 @@ import live_websocket_textgen_with_audio import live_websocket_textgen_with_txt import live_with_txt +import live_code_exec_with_txt os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" @@ -55,3 +56,7 @@ async def test_live_websocket_audiogen_with_txt() -> None: @pytest.mark.asyncio async def test_live_websocket_audiotranscript_with_txt() -> None: assert await live_websocket_audiotranscript_with_txt.generate_content() + +@pytest.mark.asyncio +async def test_live_code_exec_with_txt() -> None: + assert await live_code_exec_with_txt.generate_content() \ No newline at end of file From 11a09e49b348e72f1a8e0e19b5e03f464f4de4dd Mon Sep 17 00:00:00 2001 From: Guiners Date: Thu, 24 Jul 2025 15:09:42 +0200 Subject: [PATCH 02/10] adding live_code_exec_with_txt with test --- genai/live/live_code_exec_with_txt.py | 9 ++++----- genai/live/requirements.txt | 2 +- genai/live/test_live_examples.py | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/genai/live/live_code_exec_with_txt.py b/genai/live/live_code_exec_with_txt.py index cea64aa97c9..b67b2840b3e 100644 --- a/genai/live/live_code_exec_with_txt.py +++ b/genai/live/live_code_exec_with_txt.py @@ -24,8 +24,8 @@ async def generate_content() -> list[str]: Tool, ToolCodeExecution, Content, - Part - ) + Part, + ) config = LiveConnectConfig( response_modalities=[Modality.TEXT], @@ -37,9 +37,9 @@ async def generate_content() -> list[str]: model_id = "gemini-2.0-flash-live-preview-04-09" async with client.aio.live.connect(model=model_id, config=config) as session: - text_input = "Compute the largest prime palindrome under 10, " + text_input = "Compute the largest prime palindrome under 10" print("> ", text_input, "\n") - await session.send_client_content(turns=Content(parts=[Part(text=text_input)])) + await session.send_client_content(turns=Content(role="user",parts=[Part(text=text_input)])) response = [] @@ -47,7 +47,6 @@ async def generate_content() -> list[str]: if chunk.server_content: if chunk.text is not None: response.append(chunk.text) - # print(chunk.text) model_turn = chunk.server_content.model_turn if model_turn: diff --git a/genai/live/requirements.txt b/genai/live/requirements.txt index a0dc792ecf6..6ef3b264665 100644 --- a/genai/live/requirements.txt +++ b/genai/live/requirements.txt @@ -1,4 +1,4 @@ -google-genai==1.20.0 +google-genai==1.27.0 scipy==1.15.3 websockets==15.0.1 soundfile==0.13.1 diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index 7ae4bbd75ea..93e1ffa6f07 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -59,4 +59,4 @@ async def test_live_websocket_audiotranscript_with_txt() -> None: @pytest.mark.asyncio async def test_live_code_exec_with_txt() -> None: - assert await live_code_exec_with_txt.generate_content() \ No newline at end of file + assert await live_code_exec_with_txt.generate_content() From 4c066e64d18e9631a2a79b2601d0245c814ef496 Mon Sep 17 00:00:00 2001 From: Guiners Date: Thu, 24 Jul 2025 15:38:15 +0200 Subject: [PATCH 03/10] adding live_code_exec_with_txt with test --- genai/live/live_code_exec_with_txt.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/genai/live/live_code_exec_with_txt.py b/genai/live/live_code_exec_with_txt.py index b67b2840b3e..e4e794af0d1 100644 --- a/genai/live/live_code_exec_with_txt.py +++ b/genai/live/live_code_exec_with_txt.py @@ -27,19 +27,19 @@ async def generate_content() -> list[str]: Part, ) + client = genai.Client() + # model_id = "gemini-live-2.5-flash" #todo + model_id = "gemini-2.0-flash-live-preview-04-09" config = LiveConnectConfig( response_modalities=[Modality.TEXT], tools=[Tool(code_execution=ToolCodeExecution())], ) - - client = genai.Client() - # model_id = "gemini-live-2.5-flash" #todo - model_id = "gemini-2.0-flash-live-preview-04-09" - async with client.aio.live.connect(model=model_id, config=config) as session: text_input = "Compute the largest prime palindrome under 10" print("> ", text_input, "\n") - await session.send_client_content(turns=Content(role="user",parts=[Part(text=text_input)])) + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=text_input)]) + ) response = [] @@ -59,7 +59,8 @@ async def generate_content() -> list[str]: print("".join(response)) # Example output: - # STRING + # > Compute the largest prime palindrome under 10 + # Final Answer: The final answer is $\boxed{7}$ # [END googlegenaisdk_live_code_exec_with_txt] return response From 8d4753a432311f282128a2ff2c6e10328fc7fa4f Mon Sep 17 00:00:00 2001 From: Guiners Date: Thu, 24 Jul 2025 17:43:20 +0200 Subject: [PATCH 04/10] adding live_code_exec_with_txt with test --- genai/live/live_func_call_with_txt.py | 40 +++++++ genai/live/live_ground_googsearch_with_txt.py | 65 +++++++++++ genai/live/live_txt_with_audio.py | 107 ++++++++++++++++++ genai/live/openAI.py | 55 +++++++++ genai/live/test_live_examples.py | 7 ++ 5 files changed, 274 insertions(+) create mode 100644 genai/live/live_func_call_with_txt.py create mode 100644 genai/live/live_ground_googsearch_with_txt.py create mode 100644 genai/live/live_txt_with_audio.py create mode 100644 genai/live/openAI.py diff --git a/genai/live/live_func_call_with_txt.py b/genai/live/live_func_call_with_txt.py new file mode 100644 index 00000000000..f0e7bcde5d7 --- /dev/null +++ b/genai/live/live_func_call_with_txt.py @@ -0,0 +1,40 @@ +import asyncio +from google import genai +from google.genai import types + +client = genai.Client() +# model = "gemini-live-2.5-flash" +model = "gemini-live-2.5-flash-preview-native-audio" + +# Simple function definitions +turn_on_the_lights = {"name": "turn_on_the_lights"} +turn_off_the_lights = {"name": "turn_off_the_lights"} + +tools = [{"function_declarations": [turn_on_the_lights, turn_off_the_lights]}] +config = {"response_modalities": ["TEXT"], "tools": tools} + +async def main(): + #TODO after the meeting add confing to the meeting + + async with client.aio.live.connect(model=model, config=config) as session: + prompt = "Turn on the lights please" + await session.send_client_content(turns={"parts": [{"text": prompt}]}) + + async for chunk in session.receive(): + if chunk.server_content: + if chunk.text is not None: + print(chunk.text) + elif chunk.tool_call: + function_responses = [] + for fc in tool_call.function_calls: + function_response = types.FunctionResponse( + name=fc.name, + response={ "result": "ok" } # simple, hard-coded function response + ) + function_responses.append(function_response) + + await session.send_tool_response(function_responses=function_responses) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/genai/live/live_ground_googsearch_with_txt.py b/genai/live/live_ground_googsearch_with_txt.py new file mode 100644 index 00000000000..9a43a2c6c08 --- /dev/null +++ b/genai/live/live_ground_googsearch_with_txt.py @@ -0,0 +1,65 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import asyncio +async def generate_content() -> list[str]: + # [START googlegenaisdk_live_ground_googsearch_with_txt] + from google import genai + from google.genai.types import ( + LiveConnectConfig, + Modality, + Tool, + GoogleSearch, + Content, + Part + ) + + client = genai.Client() + # model = "gemini-live-2.5-flash" #todo + model_id = "gemini-2.0-flash-live-preview-04-09" + config = LiveConnectConfig( + response_modalities=[Modality.TEXT], + tools=[Tool(google_search=GoogleSearch())], + ) + async with client.aio.live.connect(model=model_id, config=config) as session: + text_input = "When did the last Brazil vs. Argentina soccer match happen?" + await session.send_client_content(turns=Content(role="user", parts=[Part(text=text_input)])) + + response = [] + + async for chunk in session.receive(): + if chunk.server_content: + if chunk.text is not None: + response.append(chunk.text) + + # The model might generate and execute Python code to use Search + model_turn = chunk.server_content.model_turn + if model_turn: + for part in model_turn.parts: + if part.executable_code is not None: + print(part.executable_code.code) + + if part.code_execution_result is not None: + print(part.code_execution_result.output) + + print("".join(response)) + # Example output: + # > When did the last Brazil vs. Argentina soccer match happen? + # The last Brazil vs. Argentina soccer match was on March 25, 2025, a 2026 World Cup qualifier, where Argentina defeated Brazil 4-1. + # [END googlegenaisdk_live_ground_googsearch_with_txt] + return response + +if __name__ == "__main__": + asyncio.run(generate_content()) diff --git a/genai/live/live_txt_with_audio.py b/genai/live/live_txt_with_audio.py new file mode 100644 index 00000000000..cc082f7f5b2 --- /dev/null +++ b/genai/live/live_txt_with_audio.py @@ -0,0 +1,107 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +# Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav +# Install helpers for converting files: pip install librosa soundfile + +import asyncio +import io +from pathlib import Path + +import requests +from google import genai +from google.genai import types +import os +import soundfile as sf +import librosa + + +audio_url = "https://storage.googleapis.com/generativeai-downloads/data/16000.wav" + +client = genai.Client() + +# config = {"response_modalities": ["TEXT"]} +config = types.LiveConnectConfig(response_modalities = [types.Modality.TEXT]) +# model = "gemini-live-2.5-flash-preview-native-audio" +model = "gemini-2.0-flash-live-preview-04-09" + +# model = "gemini-live-2.5-flash" + +# def generate_content() -> str: +# from google import genai +# # [START googlegenaisdk_thinking_textgen_with_txt] +# # client = genai.Client( +# # vertexai=True, project='cloud-ai-devrel-softserve', location='us-central1' +# # ) +# # response = client.models.generate_content( +# # model="gemini-2.5-pro", +# # contents="solve x^2 + 4x + 4 = 0", +# # ) +# +# client = genai.Client( +# vertexai=True, +# project=os.environ["GOOGLE_CLOUD_PROJECT"], +# location=os.environ["GOOGLE_CLOUD_LOCATION"], +# ) +# # model = "gemini-live-2.5-flash" +# model = "gemini-live-2.5-flash-preview-native-audio" +# config = {"response_modalities": ["TEXT"]} +# response = client.models.generate_content( +# model=model, +# contents="solve x^2 + 4x + 4 = 0", +# ) +# print(response.text) +# generate_content() + +async def main(): + # config = {"response_modalities": ["TEXT"]} + + # async with client.aio.live.connect(model=model, config=config) as session: + async with client.aio.live.connect(model=model) as session: + #TODO after the meeting add confing to the meeting + try: + audio_url = "https://storage.googleapis.com/generativeai-downloads/data/16000.wav" + response = requests.get(audio_url) + response.raise_for_status() + buffer = io.BytesIO(response.content) + y, sr = librosa.load(buffer, sr=16000) + sf.write(buffer, y, sr, format="RAW", subtype="PCM_16") + buffer.seek(0) + audio_bytes = buffer.read() + + # buffer = io.BytesIO() + # y, sr = librosa.load("hello_gemini_are_you_there.wav", sr=16000) + # sf.write(buffer, y, sr, format="RAW", subtype="PCM_16") + # buffer.seek(0) + # audio_bytes = buffer.read() + + except requests.exceptions.RequestException as e: + print(f"Error fetching audio from URL: {e}") + + except Exception as e: + print(f"An unexpected error occurred: {e}") + # If you've pre-converted to sample.pcm using ffmpeg, use this instead: + # audio_bytes = Path("sample.pcm").read_bytes() + await session.send_realtime_input( + audio=types.Blob(data=audio_bytes, mime_type="audio/pcm;rate=16000") + ) + + async for response in session.receive(): + if response.text is not None: + print(response.text) + + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/genai/live/openAI.py b/genai/live/openAI.py new file mode 100644 index 00000000000..80af5d86445 --- /dev/null +++ b/genai/live/openAI.py @@ -0,0 +1,55 @@ +import os + +from pydantic import BaseModel +from openai import OpenAI + +import openai + +from google.auth import default +import google.auth.transport.requests + +# TODO(developer): Update and un-comment below lines +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] +location = "us-central1" + +# Programmatically get an access token +credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) +credentials.refresh(google.auth.transport.requests.Request()) +# Note: the credential lives for 1 hour by default (https://cloud.google.com/docs/authentication/token-types#at-lifetime); after expiration, it must be refreshed. + +############################## +# Choose one of the following: +############################## + +# If you are calling a Gemini model, set the ENDPOINT_ID variable to use openapi. +ENDPOINT_ID = "openapi" + +# If you are calling a self-deployed model from Model Garden, set the +# ENDPOINT_ID variable and set the client's base URL to use your endpoint. +# ENDPOINT_ID = "YOUR_ENDPOINT_ID" + +# OpenAI Client +client = openai.OpenAI( + base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/{ENDPOINT_ID}", + api_key=credentials.token, +) + +class CalendarEvent(BaseModel): + name: str + date: str + participants: list[str] + + +completion = client.beta.chat.completions.parse( + model="google/gemini-2.0-flash-001", + # model="google/gemini-2.5-flash-preview-04-17", + messages=[ + {"role": "system", "content": "Extract the event information."}, + {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."}, + ], + response_format=CalendarEvent, +) + + + +print(completion.choices[0].message.parsed) diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index 93e1ffa6f07..9548db71952 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -26,6 +26,7 @@ import live_websocket_textgen_with_txt import live_with_txt import live_code_exec_with_txt +import live_ground_googsearch_with_txt os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" @@ -57,6 +58,12 @@ async def test_live_websocket_audiogen_with_txt() -> None: async def test_live_websocket_audiotranscript_with_txt() -> None: assert await live_websocket_audiotranscript_with_txt.generate_content() + @pytest.mark.asyncio async def test_live_code_exec_with_txt() -> None: assert await live_code_exec_with_txt.generate_content() + + +@pytest.mark.asyncio +async def test_live_ground_googsearch_with_txt() -> None: + assert await live_ground_googsearch_with_txt.generate_content() From 36114d55095e65fae7da822d17325f58b23cceee Mon Sep 17 00:00:00 2001 From: Guiners Date: Thu, 24 Jul 2025 18:09:53 +0200 Subject: [PATCH 05/10] adding live_ground_googsearch_with_txt with test --- genai/live/live_ground_googsearch_with_txt.py | 9 ++++++-- genai/live/live_txt_with_audio.py | 22 +++++++++---------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/genai/live/live_ground_googsearch_with_txt.py b/genai/live/live_ground_googsearch_with_txt.py index 9a43a2c6c08..d4243d099e4 100644 --- a/genai/live/live_ground_googsearch_with_txt.py +++ b/genai/live/live_ground_googsearch_with_txt.py @@ -14,6 +14,8 @@ import asyncio + + async def generate_content() -> list[str]: # [START googlegenaisdk_live_ground_googsearch_with_txt] from google import genai @@ -23,7 +25,7 @@ async def generate_content() -> list[str]: Tool, GoogleSearch, Content, - Part + Part, ) client = genai.Client() @@ -35,7 +37,9 @@ async def generate_content() -> list[str]: ) async with client.aio.live.connect(model=model_id, config=config) as session: text_input = "When did the last Brazil vs. Argentina soccer match happen?" - await session.send_client_content(turns=Content(role="user", parts=[Part(text=text_input)])) + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=text_input)]) + ) response = [] @@ -61,5 +65,6 @@ async def generate_content() -> list[str]: # [END googlegenaisdk_live_ground_googsearch_with_txt] return response + if __name__ == "__main__": asyncio.run(generate_content()) diff --git a/genai/live/live_txt_with_audio.py b/genai/live/live_txt_with_audio.py index cc082f7f5b2..c2efdce7bc3 100644 --- a/genai/live/live_txt_with_audio.py +++ b/genai/live/live_txt_with_audio.py @@ -72,21 +72,21 @@ async def main(): async with client.aio.live.connect(model=model) as session: #TODO after the meeting add confing to the meeting try: - audio_url = "https://storage.googleapis.com/generativeai-downloads/data/16000.wav" - response = requests.get(audio_url) - response.raise_for_status() - buffer = io.BytesIO(response.content) - y, sr = librosa.load(buffer, sr=16000) - sf.write(buffer, y, sr, format="RAW", subtype="PCM_16") - buffer.seek(0) - audio_bytes = buffer.read() - - # buffer = io.BytesIO() - # y, sr = librosa.load("hello_gemini_are_you_there.wav", sr=16000) + # audio_url = "https://storage.googleapis.com/generativeai-downloads/data/16000.wav" + # response = requests.get(audio_url) + # response.raise_for_status() + # buffer = io.BytesIO(response.content) + # y, sr = librosa.load(buffer, sr=16000) # sf.write(buffer, y, sr, format="RAW", subtype="PCM_16") # buffer.seek(0) # audio_bytes = buffer.read() + buffer = io.BytesIO() + y, sr = librosa.load("hello_gemini_are_you_there.wav", sr=16000) + sf.write(buffer, y, sr, format="RAW", subtype="PCM_16") + buffer.seek(0) + audio_bytes = buffer.read() + except requests.exceptions.RequestException as e: print(f"Error fetching audio from URL: {e}") From 4327734149a1d147218b09356cfd8f086a0d0b92 Mon Sep 17 00:00:00 2001 From: Guiners Date: Thu, 24 Jul 2025 19:05:03 +0200 Subject: [PATCH 06/10] adding live_ground_ragengine_with_txt with test --- genai/live/live_ground_ragengine_with_txt.py | 72 ++++++++++++++++++++ genai/live/test_live_examples.py | 6 ++ 2 files changed, 78 insertions(+) create mode 100644 genai/live/live_ground_ragengine_with_txt.py diff --git a/genai/live/live_ground_ragengine_with_txt.py b/genai/live/live_ground_ragengine_with_txt.py new file mode 100644 index 00000000000..d912daf648f --- /dev/null +++ b/genai/live/live_ground_ragengine_with_txt.py @@ -0,0 +1,72 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import os + +import vertexai + + +async def generate_content() -> list[str]: + # [START googlegenaisdk_live_ground_ragengine_with_txt] + from google import genai + from google.genai.types import (Content, LiveConnectConfig, HttpOptions, Modality, Part, Tool, Retrieval, + VertexRagStore, VertexRagStoreRagResource) + # from vertexai import rag + # vertexai.init(project=os.environ["GOOGLE_CLOUD_PROJECT"], location=os.environ["GOOGLE_CLOUD_LOCATION"]) + + client = genai.Client() + # model_id = "gemini-live-2.5-flash" + model_id = "gemini-2.0-flash-live-preview-04-09" + + + # rag_store = VertexRagStore( + # rag_resources=[ + # VertexRagStoreRagResource( + # rag_corpus= # Use memory corpus if you want to store context. #todo ask Sampath should I create it? + # ) + # ], + # # Set `store_context` to true to allow Live API sink context into your memory corpus. + # store_context=True + # ) + # config = LiveConnectConfig(response_modalities=[Modality.TEXT], + # tools=[Tool( + # retrieval=Retrieval( + # vertex_rag_store=rag_store))]) + config = LiveConnectConfig(response_modalities=[Modality.TEXT]) + + async with client.aio.live.connect(model=model_id, config=config) as session: + text_input = "What year did Mariusz Pudzianowski win World's Strongest Man?" + print("> ", text_input, "\n") + + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=text_input)]) + ) + + response = [] + + async for message in session.receive(): + if message.text: + response.append(message.text) + continue + + print("".join(response)) + # Example output: + # > What year did Mariusz Pudzianowski win World's Strongest Man? + # Mariusz Pudzianowski won World's Strongest Man in 2002, 2003, 2005, 2007, and 2008. + # [END googlegenaisdk_live_ground_ragengine_with_txt] + return response + +if __name__ == "__main__": + asyncio.run(generate_content()) diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index 9548db71952..a3041744917 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -27,6 +27,7 @@ import live_with_txt import live_code_exec_with_txt import live_ground_googsearch_with_txt +import live_ground_ragengine_with_txt os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" @@ -67,3 +68,8 @@ async def test_live_code_exec_with_txt() -> None: @pytest.mark.asyncio async def test_live_ground_googsearch_with_txt() -> None: assert await live_ground_googsearch_with_txt.generate_content() + + +@pytest.mark.asyncio +async def test_live_ground_ragengine_with_txt() -> None: + assert await live_ground_ragengine_with_txt.generate_content() From 8317d3afb38074e5bdc565b03706efd101bc53c3 Mon Sep 17 00:00:00 2001 From: Guiners Date: Thu, 24 Jul 2025 19:30:13 +0200 Subject: [PATCH 07/10] adding live_func_call_with_txt with test --- genai/live/live_func_call_with_txt.py | 62 +++++++++++++------- genai/live/live_ground_ragengine_with_txt.py | 36 ++++++++---- genai/live/live_txt_with_audio.py | 7 ++- genai/live/live_with_txt.py | 4 +- genai/live/openAI.py | 7 ++- genai/live/requirements.txt | 2 +- genai/live/test_live_examples.py | 7 +++ 7 files changed, 83 insertions(+), 42 deletions(-) diff --git a/genai/live/live_func_call_with_txt.py b/genai/live/live_func_call_with_txt.py index f0e7bcde5d7..59fd52911ab 100644 --- a/genai/live/live_func_call_with_txt.py +++ b/genai/live/live_func_call_with_txt.py @@ -1,40 +1,58 @@ import asyncio -from google import genai -from google.genai import types -client = genai.Client() -# model = "gemini-live-2.5-flash" -model = "gemini-live-2.5-flash-preview-native-audio" -# Simple function definitions -turn_on_the_lights = {"name": "turn_on_the_lights"} -turn_off_the_lights = {"name": "turn_off_the_lights"} - -tools = [{"function_declarations": [turn_on_the_lights, turn_off_the_lights]}] -config = {"response_modalities": ["TEXT"], "tools": tools} - -async def main(): - #TODO after the meeting add confing to the meeting - - async with client.aio.live.connect(model=model, config=config) as session: - prompt = "Turn on the lights please" - await session.send_client_content(turns={"parts": [{"text": prompt}]}) +async def generate_content() -> list[str]: + # [START googlegenaisdk_live_func_call_with_txt] + from google import genai + from google.genai.types import ( + LiveConnectConfig, + Modality, + Tool, + FunctionDeclaration, + FunctionResponse, + ) + + client = genai.Client() + # model = "gemini-live-2.5-flash" + model_id = "gemini-2.0-flash-live-preview-04-09" + + turn_on_the_lights = FunctionDeclaration(name="turn_on_the_lights") + turn_off_the_lights = FunctionDeclaration(name="turn_off_the_lights") + + config = LiveConnectConfig( + response_modalities=[Modality.TEXT], + tools=[Tool(function_declarations=[turn_on_the_lights, turn_off_the_lights])], + ) + async with client.aio.live.connect(model=model_id, config=config) as session: + text_input = "Turn on the lights please" + print("> ", text_input, "\n") + await session.send_client_content(turns={"parts": [{"text": text_input}]}) async for chunk in session.receive(): if chunk.server_content: if chunk.text is not None: print(chunk.text) + elif chunk.tool_call: function_responses = [] - for fc in tool_call.function_calls: - function_response = types.FunctionResponse( + for fc in chunk.tool_call.function_calls: + function_response = FunctionResponse( name=fc.name, - response={ "result": "ok" } # simple, hard-coded function response + response={ + "result": "ok" + }, # simple, hard-coded function response ) function_responses.append(function_response) + print(function_response.response["result"]) await session.send_tool_response(function_responses=function_responses) + # Example output: + # > Turn on the lights please + # ok + # [END googlegenaisdk_live_func_call_with_txt] + return function_responses + if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(generate_content()) diff --git a/genai/live/live_ground_ragengine_with_txt.py b/genai/live/live_ground_ragengine_with_txt.py index d912daf648f..03ccc473388 100644 --- a/genai/live/live_ground_ragengine_with_txt.py +++ b/genai/live/live_ground_ragengine_with_txt.py @@ -21,8 +21,18 @@ async def generate_content() -> list[str]: # [START googlegenaisdk_live_ground_ragengine_with_txt] from google import genai - from google.genai.types import (Content, LiveConnectConfig, HttpOptions, Modality, Part, Tool, Retrieval, - VertexRagStore, VertexRagStoreRagResource) + from google.genai.types import ( + Content, + LiveConnectConfig, + HttpOptions, + Modality, + Part, + Tool, + Retrieval, + VertexRagStore, + VertexRagStoreRagResource, + ) + # from vertexai import rag # vertexai.init(project=os.environ["GOOGLE_CLOUD_PROJECT"], location=os.environ["GOOGLE_CLOUD_LOCATION"]) @@ -30,7 +40,6 @@ async def generate_content() -> list[str]: # model_id = "gemini-live-2.5-flash" model_id = "gemini-2.0-flash-live-preview-04-09" - # rag_store = VertexRagStore( # rag_resources=[ # VertexRagStoreRagResource( @@ -47,19 +56,19 @@ async def generate_content() -> list[str]: config = LiveConnectConfig(response_modalities=[Modality.TEXT]) async with client.aio.live.connect(model=model_id, config=config) as session: - text_input = "What year did Mariusz Pudzianowski win World's Strongest Man?" - print("> ", text_input, "\n") + text_input = "What year did Mariusz Pudzianowski win World's Strongest Man?" + print("> ", text_input, "\n") - await session.send_client_content( - turns=Content(role="user", parts=[Part(text=text_input)]) - ) + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=text_input)]) + ) - response = [] + response = [] - async for message in session.receive(): - if message.text: - response.append(message.text) - continue + async for message in session.receive(): + if message.text: + response.append(message.text) + continue print("".join(response)) # Example output: @@ -68,5 +77,6 @@ async def generate_content() -> list[str]: # [END googlegenaisdk_live_ground_ragengine_with_txt] return response + if __name__ == "__main__": asyncio.run(generate_content()) diff --git a/genai/live/live_txt_with_audio.py b/genai/live/live_txt_with_audio.py index c2efdce7bc3..6a3f75d6628 100644 --- a/genai/live/live_txt_with_audio.py +++ b/genai/live/live_txt_with_audio.py @@ -33,7 +33,7 @@ client = genai.Client() # config = {"response_modalities": ["TEXT"]} -config = types.LiveConnectConfig(response_modalities = [types.Modality.TEXT]) +config = types.LiveConnectConfig(response_modalities=[types.Modality.TEXT]) # model = "gemini-live-2.5-flash-preview-native-audio" model = "gemini-2.0-flash-live-preview-04-09" @@ -65,12 +65,13 @@ # print(response.text) # generate_content() + async def main(): # config = {"response_modalities": ["TEXT"]} # async with client.aio.live.connect(model=model, config=config) as session: async with client.aio.live.connect(model=model) as session: - #TODO after the meeting add confing to the meeting + # TODO after the meeting add confing to the meeting try: # audio_url = "https://storage.googleapis.com/generativeai-downloads/data/16000.wav" # response = requests.get(audio_url) @@ -104,4 +105,4 @@ async def main(): if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) diff --git a/genai/live/live_with_txt.py b/genai/live/live_with_txt.py index a3c75188439..fd412af7740 100644 --- a/genai/live/live_with_txt.py +++ b/genai/live/live_with_txt.py @@ -35,7 +35,9 @@ async def generate_content() -> list[str]: ) as session: text_input = "Hello? Gemini, are you there?" print("> ", text_input, "\n") - await session.send_client_content(turns=Content(role="user", parts=[Part(text=text_input)])) + await session.send_client_content( + turns=Content(role="user", parts=[Part(text=text_input)]) + ) response = [] diff --git a/genai/live/openAI.py b/genai/live/openAI.py index 80af5d86445..7071f1c04ac 100644 --- a/genai/live/openAI.py +++ b/genai/live/openAI.py @@ -34,6 +34,7 @@ api_key=credentials.token, ) + class CalendarEvent(BaseModel): name: str date: str @@ -45,11 +46,13 @@ class CalendarEvent(BaseModel): # model="google/gemini-2.5-flash-preview-04-17", messages=[ {"role": "system", "content": "Extract the event information."}, - {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."}, + { + "role": "user", + "content": "Alice and Bob are going to a science fair on Friday.", + }, ], response_format=CalendarEvent, ) - print(completion.choices[0].message.parsed) diff --git a/genai/live/requirements.txt b/genai/live/requirements.txt index 6ef3b264665..75e86aa3d7e 100644 --- a/genai/live/requirements.txt +++ b/genai/live/requirements.txt @@ -2,4 +2,4 @@ google-genai==1.27.0 scipy==1.15.3 websockets==15.0.1 soundfile==0.13.1 -librosa==0.11.0 \ No newline at end of file +librosa==0.11.0 diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index a3041744917..a3cdc84e096 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -28,6 +28,7 @@ import live_code_exec_with_txt import live_ground_googsearch_with_txt import live_ground_ragengine_with_txt +import live_func_call_with_txt os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "True" os.environ["GOOGLE_CLOUD_LOCATION"] = "us-central1" @@ -73,3 +74,9 @@ async def test_live_ground_googsearch_with_txt() -> None: @pytest.mark.asyncio async def test_live_ground_ragengine_with_txt() -> None: assert await live_ground_ragengine_with_txt.generate_content() + + +@pytest.mark.asyncio +async def test_live_func_call_with_txt() -> None: + assert await live_func_call_with_txt.generate_content() + \ No newline at end of file From 9a1e37b8fea0dc4a1b970d7b5f099830e912e787 Mon Sep 17 00:00:00 2001 From: Guiners Date: Fri, 25 Jul 2025 14:21:43 +0200 Subject: [PATCH 08/10] adding live_func_call_with_txt with test --- genai/live/live_txt_with_audio.py | 108 ------------------------------ genai/live/openAI.py | 58 ---------------- 2 files changed, 166 deletions(-) delete mode 100644 genai/live/live_txt_with_audio.py delete mode 100644 genai/live/openAI.py diff --git a/genai/live/live_txt_with_audio.py b/genai/live/live_txt_with_audio.py deleted file mode 100644 index 6a3f75d6628..00000000000 --- a/genai/live/live_txt_with_audio.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# Test file: https://storage.googleapis.com/generativeai-downloads/data/16000.wav -# Install helpers for converting files: pip install librosa soundfile - -import asyncio -import io -from pathlib import Path - -import requests -from google import genai -from google.genai import types -import os -import soundfile as sf -import librosa - - -audio_url = "https://storage.googleapis.com/generativeai-downloads/data/16000.wav" - -client = genai.Client() - -# config = {"response_modalities": ["TEXT"]} -config = types.LiveConnectConfig(response_modalities=[types.Modality.TEXT]) -# model = "gemini-live-2.5-flash-preview-native-audio" -model = "gemini-2.0-flash-live-preview-04-09" - -# model = "gemini-live-2.5-flash" - -# def generate_content() -> str: -# from google import genai -# # [START googlegenaisdk_thinking_textgen_with_txt] -# # client = genai.Client( -# # vertexai=True, project='cloud-ai-devrel-softserve', location='us-central1' -# # ) -# # response = client.models.generate_content( -# # model="gemini-2.5-pro", -# # contents="solve x^2 + 4x + 4 = 0", -# # ) -# -# client = genai.Client( -# vertexai=True, -# project=os.environ["GOOGLE_CLOUD_PROJECT"], -# location=os.environ["GOOGLE_CLOUD_LOCATION"], -# ) -# # model = "gemini-live-2.5-flash" -# model = "gemini-live-2.5-flash-preview-native-audio" -# config = {"response_modalities": ["TEXT"]} -# response = client.models.generate_content( -# model=model, -# contents="solve x^2 + 4x + 4 = 0", -# ) -# print(response.text) -# generate_content() - - -async def main(): - # config = {"response_modalities": ["TEXT"]} - - # async with client.aio.live.connect(model=model, config=config) as session: - async with client.aio.live.connect(model=model) as session: - # TODO after the meeting add confing to the meeting - try: - # audio_url = "https://storage.googleapis.com/generativeai-downloads/data/16000.wav" - # response = requests.get(audio_url) - # response.raise_for_status() - # buffer = io.BytesIO(response.content) - # y, sr = librosa.load(buffer, sr=16000) - # sf.write(buffer, y, sr, format="RAW", subtype="PCM_16") - # buffer.seek(0) - # audio_bytes = buffer.read() - - buffer = io.BytesIO() - y, sr = librosa.load("hello_gemini_are_you_there.wav", sr=16000) - sf.write(buffer, y, sr, format="RAW", subtype="PCM_16") - buffer.seek(0) - audio_bytes = buffer.read() - - except requests.exceptions.RequestException as e: - print(f"Error fetching audio from URL: {e}") - - except Exception as e: - print(f"An unexpected error occurred: {e}") - # If you've pre-converted to sample.pcm using ffmpeg, use this instead: - # audio_bytes = Path("sample.pcm").read_bytes() - await session.send_realtime_input( - audio=types.Blob(data=audio_bytes, mime_type="audio/pcm;rate=16000") - ) - - async for response in session.receive(): - if response.text is not None: - print(response.text) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/genai/live/openAI.py b/genai/live/openAI.py deleted file mode 100644 index 7071f1c04ac..00000000000 --- a/genai/live/openAI.py +++ /dev/null @@ -1,58 +0,0 @@ -import os - -from pydantic import BaseModel -from openai import OpenAI - -import openai - -from google.auth import default -import google.auth.transport.requests - -# TODO(developer): Update and un-comment below lines -project_id = os.environ["GOOGLE_CLOUD_PROJECT"] -location = "us-central1" - -# Programmatically get an access token -credentials, _ = default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) -credentials.refresh(google.auth.transport.requests.Request()) -# Note: the credential lives for 1 hour by default (https://cloud.google.com/docs/authentication/token-types#at-lifetime); after expiration, it must be refreshed. - -############################## -# Choose one of the following: -############################## - -# If you are calling a Gemini model, set the ENDPOINT_ID variable to use openapi. -ENDPOINT_ID = "openapi" - -# If you are calling a self-deployed model from Model Garden, set the -# ENDPOINT_ID variable and set the client's base URL to use your endpoint. -# ENDPOINT_ID = "YOUR_ENDPOINT_ID" - -# OpenAI Client -client = openai.OpenAI( - base_url=f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/{ENDPOINT_ID}", - api_key=credentials.token, -) - - -class CalendarEvent(BaseModel): - name: str - date: str - participants: list[str] - - -completion = client.beta.chat.completions.parse( - model="google/gemini-2.0-flash-001", - # model="google/gemini-2.5-flash-preview-04-17", - messages=[ - {"role": "system", "content": "Extract the event information."}, - { - "role": "user", - "content": "Alice and Bob are going to a science fair on Friday.", - }, - ], - response_format=CalendarEvent, -) - - -print(completion.choices[0].message.parsed) From bdc23e4c779a68d6f7d865e5061fe66480412953 Mon Sep 17 00:00:00 2001 From: Guiners Date: Mon, 28 Jul 2025 12:01:39 +0200 Subject: [PATCH 09/10] adding live_func_call_with_txt with test --- genai/live/live_ground_ragengine_with_txt.py | 39 +++++++------------ .../live/live_websocket_audiogen_with_txt.py | 28 +++++++------ ...live_websocket_audiotranscript_with_txt.py | 24 +++++++----- .../live/live_websocket_textgen_with_audio.py | 24 +++++++----- genai/live/live_websocket_textgen_with_txt.py | 20 +++++----- genai/live/requirements-test.txt | 1 + genai/live/requirements.txt | 2 +- genai/live/test_live_examples.py | 35 +++++++++++++++-- 8 files changed, 104 insertions(+), 69 deletions(-) diff --git a/genai/live/live_ground_ragengine_with_txt.py b/genai/live/live_ground_ragengine_with_txt.py index 03ccc473388..038e33c50be 100644 --- a/genai/live/live_ground_ragengine_with_txt.py +++ b/genai/live/live_ground_ragengine_with_txt.py @@ -11,20 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import asyncio -import os - -import vertexai -async def generate_content() -> list[str]: +async def generate_content(memory_corpus: str) -> list[str]: # [START googlegenaisdk_live_ground_ragengine_with_txt] from google import genai from google.genai.types import ( Content, LiveConnectConfig, - HttpOptions, Modality, Part, Tool, @@ -33,27 +28,23 @@ async def generate_content() -> list[str]: VertexRagStoreRagResource, ) - # from vertexai import rag - # vertexai.init(project=os.environ["GOOGLE_CLOUD_PROJECT"], location=os.environ["GOOGLE_CLOUD_LOCATION"]) - client = genai.Client() # model_id = "gemini-live-2.5-flash" model_id = "gemini-2.0-flash-live-preview-04-09" - # rag_store = VertexRagStore( - # rag_resources=[ - # VertexRagStoreRagResource( - # rag_corpus= # Use memory corpus if you want to store context. #todo ask Sampath should I create it? - # ) - # ], - # # Set `store_context` to true to allow Live API sink context into your memory corpus. - # store_context=True - # ) - # config = LiveConnectConfig(response_modalities=[Modality.TEXT], - # tools=[Tool( - # retrieval=Retrieval( - # vertex_rag_store=rag_store))]) - config = LiveConnectConfig(response_modalities=[Modality.TEXT]) + rag_store = VertexRagStore( + rag_resources=[ + VertexRagStoreRagResource( + rag_corpus=memory_corpus # Use memory corpus if you want to store context. + ) + ], + # Set `store_context` to true to allow Live API sink context into your memory corpus. + store_context=True, + ) + config = LiveConnectConfig( + response_modalities=[Modality.TEXT], + tools=[Tool(retrieval=Retrieval(vertex_rag_store=rag_store))], + ) async with client.aio.live.connect(model=model_id, config=config) as session: text_input = "What year did Mariusz Pudzianowski win World's Strongest Man?" @@ -79,4 +70,4 @@ async def generate_content() -> list[str]: if __name__ == "__main__": - asyncio.run(generate_content()) + asyncio.run(generate_content("memory_corpus")) diff --git a/genai/live/live_websocket_audiogen_with_txt.py b/genai/live/live_websocket_audiogen_with_txt.py index f7b6f07e5f8..277d4d5f8ba 100644 --- a/genai/live/live_websocket_audiogen_with_txt.py +++ b/genai/live/live_websocket_audiogen_with_txt.py @@ -20,7 +20,9 @@ def get_bearer_token() -> str: import google.auth from google.auth.transport.requests import Request - creds, _ = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) + creds, _ = google.auth.default( + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) auth_req = Request() creds.refresh(auth_req) bearer_token = creds.token @@ -55,9 +57,7 @@ async def generate_content() -> str: # Websocket Configuration WEBSOCKET_HOST = "us-central1-aiplatform.googleapis.com" - WEBSOCKET_SERVICE_URL = ( - f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" - ) + WEBSOCKET_SERVICE_URL = f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" # Websocket Authentication headers = { @@ -66,9 +66,7 @@ async def generate_content() -> str: } # Model Configuration - model_path = ( - f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" - ) + model_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" model_generation_config = { "response_modalities": ["AUDIO"], "speech_config": { @@ -77,7 +75,9 @@ async def generate_content() -> str: }, } - async with connect(WEBSOCKET_SERVICE_URL, additional_headers=headers) as websocket_session: + async with connect( + WEBSOCKET_SERVICE_URL, additional_headers=headers + ) as websocket_session: # 1. Send setup configuration websocket_config = { "setup": { @@ -120,7 +120,9 @@ async def generate_content() -> str: server_content = response_chunk.get("serverContent") if not server_content: # This might indicate an error or an unexpected message format - print(f"Received non-serverContent message or empty content: {response_chunk}") + print( + f"Received non-serverContent message or empty content: {response_chunk}" + ) break # Collect audio chunks @@ -129,7 +131,9 @@ async def generate_content() -> str: for part in model_turn["parts"]: if part["inlineData"]["mimeType"] == "audio/pcm": audio_chunk = base64.b64decode(part["inlineData"]["data"]) - aggregated_response_parts.append(np.frombuffer(audio_chunk, dtype=np.int16)) + aggregated_response_parts.append( + np.frombuffer(audio_chunk, dtype=np.int16) + ) # End of response if server_content.get("turnComplete"): @@ -137,7 +141,9 @@ async def generate_content() -> str: # Save audio to a file if aggregated_response_parts: - wavfile.write("output.wav", 24000, np.concatenate(aggregated_response_parts)) + wavfile.write( + "output.wav", 24000, np.concatenate(aggregated_response_parts) + ) # Example response: # Setup Response: {'setupComplete': {}} # Input: Hello? Gemini are you there? diff --git a/genai/live/live_websocket_audiotranscript_with_txt.py b/genai/live/live_websocket_audiotranscript_with_txt.py index 5192b81ef17..5304e1914bb 100644 --- a/genai/live/live_websocket_audiotranscript_with_txt.py +++ b/genai/live/live_websocket_audiotranscript_with_txt.py @@ -20,7 +20,9 @@ def get_bearer_token() -> str: import google.auth from google.auth.transport.requests import Request - creds, _ = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) + creds, _ = google.auth.default( + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) auth_req = Request() creds.refresh(auth_req) bearer_token = creds.token @@ -55,9 +57,7 @@ async def generate_content() -> str: # Websocket Configuration WEBSOCKET_HOST = "us-central1-aiplatform.googleapis.com" - WEBSOCKET_SERVICE_URL = ( - f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" - ) + WEBSOCKET_SERVICE_URL = f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" # Websocket Authentication headers = { @@ -66,9 +66,7 @@ async def generate_content() -> str: } # Model Configuration - model_path = ( - f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" - ) + model_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" model_generation_config = { "response_modalities": ["AUDIO"], "speech_config": { @@ -77,7 +75,9 @@ async def generate_content() -> str: }, } - async with connect(WEBSOCKET_SERVICE_URL, additional_headers=headers) as websocket_session: + async with connect( + WEBSOCKET_SERVICE_URL, additional_headers=headers + ) as websocket_session: # 1. Send setup configuration websocket_config = { "setup": { @@ -125,7 +125,9 @@ async def generate_content() -> str: server_content = response_chunk.get("serverContent") if not server_content: # This might indicate an error or an unexpected message format - print(f"Received non-serverContent message or empty content: {response_chunk}") + print( + f"Received non-serverContent message or empty content: {response_chunk}" + ) break # Transcriptions @@ -142,7 +144,9 @@ async def generate_content() -> str: for part in model_turn["parts"]: if part["inlineData"]["mimeType"] == "audio/pcm": audio_chunk = base64.b64decode(part["inlineData"]["data"]) - aggregated_response_parts.append(np.frombuffer(audio_chunk, dtype=np.int16)) + aggregated_response_parts.append( + np.frombuffer(audio_chunk, dtype=np.int16) + ) # End of response if server_content.get("turnComplete"): diff --git a/genai/live/live_websocket_textgen_with_audio.py b/genai/live/live_websocket_textgen_with_audio.py index de6fd9d55c3..f91cff35b57 100644 --- a/genai/live/live_websocket_textgen_with_audio.py +++ b/genai/live/live_websocket_textgen_with_audio.py @@ -20,7 +20,9 @@ def get_bearer_token() -> str: import google.auth from google.auth.transport.requests import Request - creds, _ = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) + creds, _ = google.auth.default( + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) auth_req = Request() creds.refresh(auth_req) bearer_token = creds.token @@ -65,9 +67,7 @@ def read_wavefile(filepath: str) -> tuple[str, str]: # Websocket Configuration WEBSOCKET_HOST = "us-central1-aiplatform.googleapis.com" - WEBSOCKET_SERVICE_URL = ( - f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" - ) + WEBSOCKET_SERVICE_URL = f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" # Websocket Authentication headers = { @@ -76,12 +76,12 @@ def read_wavefile(filepath: str) -> tuple[str, str]: } # Model Configuration - model_path = ( - f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" - ) + model_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" model_generation_config = {"response_modalities": ["TEXT"]} - async with connect(WEBSOCKET_SERVICE_URL, additional_headers=headers) as websocket_session: + async with connect( + WEBSOCKET_SERVICE_URL, additional_headers=headers + ) as websocket_session: # 1. Send setup configuration websocket_config = { "setup": { @@ -105,7 +105,9 @@ def read_wavefile(filepath: str) -> tuple[str, str]: return "Error: WebSocket setup failed." # 3. Send audio message - encoded_audio_message, mime_type = read_wavefile("hello_gemini_are_you_there.wav") + encoded_audio_message, mime_type = read_wavefile( + "hello_gemini_are_you_there.wav" + ) # Example audio message: "Hello? Gemini are you there?" user_message = { @@ -136,7 +138,9 @@ def read_wavefile(filepath: str) -> tuple[str, str]: server_content = response_chunk.get("serverContent") if not server_content: # This might indicate an error or an unexpected message format - print(f"Received non-serverContent message or empty content: {response_chunk}") + print( + f"Received non-serverContent message or empty content: {response_chunk}" + ) break # Collect text responses diff --git a/genai/live/live_websocket_textgen_with_txt.py b/genai/live/live_websocket_textgen_with_txt.py index b36487cc9a0..f8e88fa0521 100644 --- a/genai/live/live_websocket_textgen_with_txt.py +++ b/genai/live/live_websocket_textgen_with_txt.py @@ -20,7 +20,9 @@ def get_bearer_token() -> str: import google.auth from google.auth.transport.requests import Request - creds, _ = google.auth.default(scopes=["https://www.googleapis.com/auth/cloud-platform"]) + creds, _ = google.auth.default( + scopes=["https://www.googleapis.com/auth/cloud-platform"] + ) auth_req = Request() creds.refresh(auth_req) bearer_token = creds.token @@ -51,9 +53,7 @@ async def generate_content() -> str: # Websocket Configuration WEBSOCKET_HOST = "us-central1-aiplatform.googleapis.com" - WEBSOCKET_SERVICE_URL = ( - f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" - ) + WEBSOCKET_SERVICE_URL = f"wss://{WEBSOCKET_HOST}/ws/google.cloud.aiplatform.v1.LlmBidiService/BidiGenerateContent" # Websocket Authentication headers = { @@ -62,12 +62,12 @@ async def generate_content() -> str: } # Model Configuration - model_path = ( - f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" - ) + model_path = f"projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{GEMINI_MODEL_NAME}" model_generation_config = {"response_modalities": ["TEXT"]} - async with connect(WEBSOCKET_SERVICE_URL, additional_headers=headers) as websocket_session: + async with connect( + WEBSOCKET_SERVICE_URL, additional_headers=headers + ) as websocket_session: # 1. Send setup configuration websocket_config = { "setup": { @@ -110,7 +110,9 @@ async def generate_content() -> str: server_content = response_chunk.get("serverContent") if not server_content: # This might indicate an error or an unexpected message format - print(f"Received non-serverContent message or empty content: {response_chunk}") + print( + f"Received non-serverContent message or empty content: {response_chunk}" + ) break # Collect text responses diff --git a/genai/live/requirements-test.txt b/genai/live/requirements-test.txt index 4fb57f7f08d..e4ce134b76a 100644 --- a/genai/live/requirements-test.txt +++ b/genai/live/requirements-test.txt @@ -2,3 +2,4 @@ backoff==2.2.1 google-api-core==2.19.0 pytest==8.2.0 pytest-asyncio==0.25.3 +pytest-mock==3.12.0 diff --git a/genai/live/requirements.txt b/genai/live/requirements.txt index 75e86aa3d7e..6ef3b264665 100644 --- a/genai/live/requirements.txt +++ b/genai/live/requirements.txt @@ -2,4 +2,4 @@ google-genai==1.27.0 scipy==1.15.3 websockets==15.0.1 soundfile==0.13.1 -librosa==0.11.0 +librosa==0.11.0 \ No newline at end of file diff --git a/genai/live/test_live_examples.py b/genai/live/test_live_examples.py index a3cdc84e096..1e7988ec65a 100644 --- a/genai/live/test_live_examples.py +++ b/genai/live/test_live_examples.py @@ -19,7 +19,6 @@ import os import pytest - import live_websocket_audiogen_with_txt import live_websocket_audiotranscript_with_txt import live_websocket_textgen_with_audio @@ -36,6 +35,35 @@ # os.environ['GOOGLE_CLOUD_PROJECT'] = "add-your-project-name" +@pytest.fixture() +def mock_rag_components(mocker): + mock_client_cls = mocker.patch("google.genai.Client") + + from google.genai.types import VertexRagStore, VertexRagStoreRagResource + + mocker.patch( + "google.genai.types.VertexRagStoreRagResource", + side_effect=lambda rag_corpus: VertexRagStoreRagResource(rag_corpus=rag_corpus), + ) + mocker.patch( + "google.genai.types.VertexRagStore", + side_effect=lambda rag_resources, store_context: VertexRagStore( + rag_resources=rag_resources, store_context=store_context + ), + ) + + mock_session = mocker.AsyncMock() + mock_session.__aenter__.return_value = mock_session + mock_session.receive.return_value = iter( + [ + mocker.MagicMock( + text="Mariusz Pudzianowski won in 2002, 2003, 2005, 2007, and 2008." + ) + ] + ) + mock_client_cls.return_value.aio.live.connect.return_value = mock_session + + @pytest.mark.asyncio async def test_live_with_text() -> None: assert await live_with_txt.generate_content() @@ -72,11 +100,10 @@ async def test_live_ground_googsearch_with_txt() -> None: @pytest.mark.asyncio -async def test_live_ground_ragengine_with_txt() -> None: - assert await live_ground_ragengine_with_txt.generate_content() +async def test_live_ground_ragengine_with_txt(mock_rag_components) -> None: + assert await live_ground_ragengine_with_txt.generate_content("test_memory_corpus") @pytest.mark.asyncio async def test_live_func_call_with_txt() -> None: assert await live_func_call_with_txt.generate_content() - \ No newline at end of file From 3fcb28799f69f663313103eb9feb8f1ec4d3ad6c Mon Sep 17 00:00:00 2001 From: Guiners Date: Mon, 28 Jul 2025 12:09:22 +0200 Subject: [PATCH 10/10] adding live_func_call_with_txt with test --- genai/live/live_func_call_with_txt.py | 14 ++++++++++++++ genai/live/requirements.txt | 3 +-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/genai/live/live_func_call_with_txt.py b/genai/live/live_func_call_with_txt.py index 59fd52911ab..2109c7a17fc 100644 --- a/genai/live/live_func_call_with_txt.py +++ b/genai/live/live_func_call_with_txt.py @@ -1,3 +1,17 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import asyncio diff --git a/genai/live/requirements.txt b/genai/live/requirements.txt index 6ef3b264665..68f512d39e1 100644 --- a/genai/live/requirements.txt +++ b/genai/live/requirements.txt @@ -1,5 +1,4 @@ google-genai==1.27.0 scipy==1.15.3 websockets==15.0.1 -soundfile==0.13.1 -librosa==0.11.0 \ No newline at end of file +soundfile==0.13.1 \ No newline at end of file