feat: Add the ask_data_insights tool for natural language queries on BigQuery data

google-genai-bot · copybara-github · commit 47b88d2b06d2 · 2025-08-25T14:23:10.000-07:00
PiperOrigin-RevId: 799267061
diff --git a/contributing/samples/bigquery/README.md b/contributing/samples/bigquery/README.md
@@ -25,6 +25,16 @@ distributed via the `google.adk.tools.bigquery` module. These tools include:
 
   Runs a SQL query in BigQuery.
 
+1. `ask_data_insights`
+
+  Natural language-in, natural language-out tool that answers questions
+  about structured data in BigQuery. Provides a one-stop solution for generating
+  insights from data.
+
+  **Note**: This tool requires additional setup in your project. Please refer to
+  the official [Conversational Analytics API documentation](https://cloud.google.com/gemini/docs/conversational-analytics-api/overview)
+  for instructions.
+
 ## How to use
 
 Set up environment variables in your `.env` file for using
diff --git a/src/google/adk/tools/bigquery/bigquery_toolset.py b/src/google/adk/tools/bigquery/bigquery_toolset.py
@@ -21,6 +21,7 @@
 from google.adk.agents.readonly_context import ReadonlyContext
 from typing_extensions import override
 
+from . import data_insights_tool
 from . import metadata_tool
 from . import query_tool
 from ...tools.base_tool import BaseTool
@@ -80,6 +81,7 @@ async def get_tools(
             metadata_tool.list_dataset_ids,
             metadata_tool.list_table_ids,
             query_tool.get_execute_sql(self._tool_settings),
+            data_insights_tool.ask_data_insights,
         ]
     ]
 
diff --git a/src/google/adk/tools/bigquery/data_insights_tool.py b/src/google/adk/tools/bigquery/data_insights_tool.py
@@ -34,14 +34,16 @@ def ask_data_insights(
 ) -> Dict[str, Any]:
   """Answers questions about structured data in BigQuery tables using natural language.
 
-  This function takes auser's question (which can include conversational
-  history for context) andreferences to specific BigQuery tables, and sends
+  This function takes a user's question (which can include conversational
+  history for context) and references to specific BigQuery tables, and sends
   them to a stateless conversational API.
 
   The API uses a GenAI agent to understand the question, generate and execute
   SQL queries and Python code, and formulate an answer. This function returns a
   detailed, sequential log of this entire process, which includes any generated
-  SQL or Python code, the data retrieved, and the final text answer.
+  SQL or Python code, the data retrieved, and the final text answer. The final
+  answer is always in plain text, as the underlying API is instructed not to
+  generate any charts, graphs, images, or other visualizations.
 
   Use this tool to perform data analysis, get insights, or answer complex
   questions about the contents of specific BigQuery tables.
@@ -123,9 +125,22 @@ def ask_data_insights(
     }
     ca_url = f"https://geminidataanalytics.googleapis.com/v1alpha/projects/{project_id}/locations/{location}:chat"
 
+    instructions = """**INSTRUCTIONS - FOLLOW THESE RULES:**
+    1.  **CONTENT:** Your answer should present the supporting data and then provide a conclusion based on that data.
+    2.  **OUTPUT FORMAT:** Your entire response MUST be in plain text format ONLY.
+    3.  **NO CHARTS:** You are STRICTLY FORBIDDEN from generating any charts, graphs, images, or any other form of visualization.
+    """
+
+    final_query_text = f"""
+{instructions}
+
+**User Query and Context:**
+{user_query_with_context}
+"""
+
     ca_payload = {
         "project": f"projects/{project_id}",
-        "messages": [{"userMessage": {"text": user_query_with_context}}],
+        "messages": [{"userMessage": {"text": final_query_text}}],
         "inlineContext": {
             "datasourceReferences": {
                 "bq": {"tableReferences": table_references}
@@ -289,7 +304,7 @@ def _handle_data_response(
     schema = resp["result"]["schema"]
     headers = [field.get("name") for field in schema.get("fields", [])]
 
-    all_rows = resp["result"]["data"]
+    all_rows = resp["result"].get("data", [])
     total_rows = len(all_rows)
 
     compact_rows = []
diff --git a/tests/unittests/tools/bigquery/test_bigquery_client.py b/tests/unittests/tools/bigquery/test_bigquery_client.py
@@ -21,7 +21,6 @@
 from google.adk.tools.bigquery.client import get_bigquery_client
 from google.auth.exceptions import DefaultCredentialsError
 from google.oauth2.credentials import Credentials
-import pytest
 
 
 def test_bigquery_client_project():
diff --git a/tests/unittests/tools/bigquery/test_bigquery_toolset.py b/tests/unittests/tools/bigquery/test_bigquery_toolset.py
@@ -41,7 +41,7 @@ async def test_bigquery_toolset_tools_default():
   tools = await toolset.get_tools()
   assert tools is not None
 
-  assert len(tools) == 5
+  assert len(tools) == 6
   assert all([isinstance(tool, GoogleTool) for tool in tools])
 
   expected_tool_names = set([
@@ -50,6 +50,7 @@ async def test_bigquery_toolset_tools_default():
       "list_table_ids",
       "get_table_info",
       "execute_sql",
+      "ask_data_insights",
   ])
   actual_tool_names = set([tool.name for tool in tools])
   assert actual_tool_names == expected_tool_names

Original file line number	Diff line number	Diff line change
`@@ -21,6 +21,7 @@`
`21`	`21`	`from google.adk.agents.readonly_context import ReadonlyContext`
`22`	`22`	`from typing_extensions import override`
`23`	`23`
	`24`	`+from . import data_insights_tool`
`24`	`25`	`from . import metadata_tool`
`25`	`26`	`from . import query_tool`
`26`	`27`	`from ...tools.base_tool import BaseTool`
`@@ -80,6 +81,7 @@ async def get_tools(`
`80`	`81`	`metadata_tool.list_dataset_ids,`
`81`	`82`	`metadata_tool.list_table_ids,`
`82`	`83`	`query_tool.get_execute_sql(self._tool_settings),`
	`84`	`+ data_insights_tool.ask_data_insights,`
`83`	`85`	`]`
`84`	`86`	`]`
`85`	`87`