diff --git a/contributing/samples/adk_answering_agent/README.md b/contributing/samples/adk_answering_agent/README.md new file mode 100644 index 000000000..e2af591d6 --- /dev/null +++ b/contributing/samples/adk_answering_agent/README.md @@ -0,0 +1,83 @@ +# ADK Answering Agent + +The ADK Answering Agent is a Python-based agent designed to help answer questions in GitHub discussions for the `google/adk-python` repository. It uses a large language model to analyze open discussions, retrieve information from document store, generate response, and post a comment in the github discussion. + +This agent can be operated in three distinct modes: an interactive mode for local use, a batch script mode for oncall use, or as a fully automated GitHub Actions workflow (TBD). + +--- + +## Interactive Mode + +This mode allows you to run the agent locally to review its recommendations in real-time before any changes are made to your repository's issues. + +### Features +* **Web Interface**: The agent's interactive mode can be rendered in a web browser using the ADK's `adk web` command. +* **User Approval**: In interactive mode, the agent is instructed to ask for your confirmation before posting a comment to a GitHub issue. +* **Question & Answer**: You can ask ADK related questions, and the agent will provide answers based on its knowledge on ADK. + +### Running in Interactive Mode +To run the agent in interactive mode, first set the required environment variables. Then, execute the following command in your terminal: + +```bash +adk web +``` +This will start a local server and provide a URL to access the agent's web interface in your browser. + +--- + +## Batch Script Mode + +The `answer_discussions.py` is created for ADK oncall team to batch process discussions. + +### Features +* **Batch Process**: Taken either a number as the count of the recent discussions or a list of discussion numbers, the script will invoke the agent to answer all the specified discussions in one single run. + +### Running in Interactive Mode +To run the agent in batch script mode, first set the required environment variables. Then, execute the following command in your terminal: + +```bash +export PYTHONPATH=contributing/samples +python -m adk_answering_agent.answer_discussions --numbers 27 36 # Answer specified discussions +``` + +Or `python -m adk_answering_agent.answer_discussions --recent 10` to answer the 10 most recent updated discussions. + +--- + +## GitHub Workflow Mode + +The `main.py` is reserved for the Github Workflow. The detailed setup for the automatic workflow is TBD. + +--- + +## Setup and Configuration + +Whether running in interactive or workflow mode, the agent requires the following setup. + +### Dependencies +The agent requires the following Python libraries. + +```bash +pip install --upgrade pip +pip install google-adk requests +``` + +The agent also requires gcloud login: + +```bash +gcloud auth application-default login +``` + +### Environment Variables +The following environment variables are required for the agent to connect to the necessary services. + +* `GITHUB_TOKEN=YOUR_GITHUB_TOKEN`: **(Required)** A GitHub Personal Access Token with `issues:write` permissions. Needed for both interactive and workflow modes. +* `GOOGLE_GENAI_USE_VERTEXAI=TRUE`: **(Required)** Use Google Vertex AI for the authentication. +* `GOOGLE_CLOUD_PROJECT=YOUR_PROJECT_ID`: **(Required)** The Google Cloud project ID. +* `GOOGLE_CLOUD_LOCATION=LOCATION`: **(Required)** The Google Cloud region. +* `VERTEXAI_DATASTORE_ID=YOUR_DATASTORE_ID`: **(Required)** The Vertex AI datastore ID for the document store (i.e. knowledge base). +* `OWNER`: The GitHub organization or username that owns the repository (e.g., `google`). Needed for both modes. +* `REPO`: The name of the GitHub repository (e.g., `adk-python`). Needed for both modes. +* `INTERACTIVE`: Controls the agent's interaction mode. For the automated workflow, this is set to `0`. For interactive mode, it should be set to `1` or left unset. + +For local execution in interactive mode, you can place these variables in a `.env` file in the project's root directory. For the GitHub workflow, they should be configured as repository secrets. \ No newline at end of file diff --git a/contributing/samples/adk_answering_agent/__init__.py b/contributing/samples/adk_answering_agent/__init__.py new file mode 100644 index 000000000..c48963cdc --- /dev/null +++ b/contributing/samples/adk_answering_agent/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from . import agent diff --git a/contributing/samples/adk_answering_agent/agent.py b/contributing/samples/adk_answering_agent/agent.py new file mode 100644 index 000000000..96e30a739 --- /dev/null +++ b/contributing/samples/adk_answering_agent/agent.py @@ -0,0 +1,192 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any + +from adk_answering_agent.settings import IS_INTERACTIVE +from adk_answering_agent.settings import OWNER +from adk_answering_agent.settings import REPO +from adk_answering_agent.settings import VERTEXAI_DATASTORE_ID +from adk_answering_agent.utils import error_response +from adk_answering_agent.utils import run_graphql_query +from google.adk.agents import Agent +from google.adk.tools import VertexAiSearchTool +import requests + +if IS_INTERACTIVE: + APPROVAL_INSTRUCTION = ( + "Ask for user approval or confirmation for adding the comment." + ) +else: + APPROVAL_INSTRUCTION = ( + "**Do not** wait or ask for user approval or confirmation for adding the" + " comment." + ) + + +def get_discussion_and_comments(discussion_number: int) -> dict[str, Any]: + """Fetches a discussion and its comments using the GitHub GraphQL API. + + Args: + discussion_number: The number of the GitHub discussion. + + Returns: + A dictionary with the request status and the discussion details. + """ + print(f"Attempting to get discussion #{discussion_number} and its comments") + query = """ + query($owner: String!, $repo: String!, $discussionNumber: Int!) { + repository(owner: $owner, name: $repo) { + discussion(number: $discussionNumber) { + id + title + body + createdAt + closed + author { + login + } + # For each comment, fetch the latest 100 comments. + comments(last: 100) { + nodes { + id + body + createdAt + author { + login + } + # For each comment, fetch the latest 50 replies + replies(last: 50) { + nodes { + id + body + createdAt + author { + login + } + } + } + } + } + } + } + } + """ + variables = { + "owner": OWNER, + "repo": REPO, + "discussionNumber": discussion_number, + } + try: + response = run_graphql_query(query, variables) + if "errors" in response: + return error_response(str(response["errors"])) + discussion_data = ( + response.get("data", {}).get("repository", {}).get("discussion") + ) + if not discussion_data: + return error_response(f"Discussion #{discussion_number} not found.") + return {"status": "success", "discussion": discussion_data} + except requests.exceptions.RequestException as e: + return error_response(str(e)) + + +def add_comment_to_discussion( + discussion_id: str, comment_body: str +) -> dict[str, Any]: + """Adds a comment to a specific discussion. + + Args: + discussion_id: The GraphQL node ID of the discussion. + comment_body: The content of the comment in Markdown. + + Returns: + The status of the request and the new comment's details. + """ + print(f"Adding comment to discussion {discussion_id}") + query = """ + mutation($discussionId: ID!, $body: String!) { + addDiscussionComment(input: {discussionId: $discussionId, body: $body}) { + comment { + id + body + createdAt + author { + login + } + } + } + } + """ + variables = {"discussionId": discussion_id, "body": comment_body} + try: + response = run_graphql_query(query, variables) + if "errors" in response: + return error_response(str(response["errors"])) + new_comment = ( + response.get("data", {}).get("addDiscussionComment", {}).get("comment") + ) + return {"status": "success", "comment": new_comment} + except requests.exceptions.RequestException as e: + return error_response(str(e)) + + +root_agent = Agent( + model="gemini-2.5-pro", + name="adk_answering_agent", + description="Answer questions about ADK repo.", + instruction=f""" + You are a helpful assistant that responds to questions from the GitHub repository `{OWNER}/{REPO}` + based on information about Google ADK found in the document store. You can access the document store + using the `VertexAiSearchTool`. + + When user specifies a discussion number, here are the steps: + 1. Use the `get_discussion_and_comments` tool to get the details of the discussion including the comments. + 2. Focus on the latest comment but reference all comments if needed to understand the context. + * If there is no comment at all, just focus on the discussion title and body. + 3. If all the following conditions are met, try to add a comment to the discussion, otherwise, do not respond: + * The discussion is not closed. + * The latest comment is not from you or other agents (marked as "Response from XXX Agent"). + * The latest comment is asking a question or requesting information. + 4. Use the `VertexAiSearchTool` to find relevant information before answering. + + IMPORTANT: + * {APPROVAL_INSTRUCTION} + * Your response should be based on the information you found in the document store. Do not invent + information that is not in the document store. Do not invent citations which are not in the document store. + * If you can't find the answer or information in the document store, **do not** respond. + * Include a bolded note (e.g. "Response from ADK Answering Agent") in your comment + to indicate this comment was added by an ADK Answering Agent. + * Have an empty line between the note and the rest of your response. + * Inlclude a short summary of your response in the comment as a TLDR, e.g. "**TLDR**: ". + * Have a divider line between the TLDR and your detail response. + * Do not respond to any other discussion except the one specified by the user. + * Please include your justification for your decision in your output + to the user who is telling with you. + * If you uses citation from the document store, please provide a footnote + referencing the source document format it as: "[1] URL of the document". + * Replace the "gs://prefix/" part, e.g. "gs://adk-qa-bucket/", to be "https://github.com/google/" + * Add "blob/main/" after the repo name, e.g. "adk-python", "adk-docs", for example: + * If the original URL is "gs://adk-qa-bucket/adk-python/src/google/adk/version.py", + then the citation URL is "https://github.com/google/adk-python/blob/main/src/google/adk/version.py", + * If the original URL is "gs://adk-qa-bucket/adk-docs/docs/index.md", + then the citation URL is "https://github.com/google/adk-docs/blob/main/docs/index.md" + * If the file is a html file, replace the ".html" to be ".md" + """, + tools=[ + VertexAiSearchTool(data_store_id=VERTEXAI_DATASTORE_ID), + get_discussion_and_comments, + add_comment_to_discussion, + ], +) diff --git a/contributing/samples/adk_answering_agent/answer_discussions.py b/contributing/samples/adk_answering_agent/answer_discussions.py new file mode 100644 index 000000000..1aa737585 --- /dev/null +++ b/contributing/samples/adk_answering_agent/answer_discussions.py @@ -0,0 +1,172 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import asyncio +import sys +import time + +from adk_answering_agent import agent +from adk_answering_agent.settings import OWNER +from adk_answering_agent.settings import REPO +from adk_answering_agent.utils import call_agent_async +from adk_answering_agent.utils import run_graphql_query +from google.adk.runners import InMemoryRunner +import requests + +APP_NAME = "adk_discussion_answering_app" +USER_ID = "adk_discussion_answering_assistant" + + +async def list_most_recent_discussions(count: int = 1) -> list[int] | None: + """Fetches a specified number of the most recently updated discussions. + + Args: + count: The number of discussions to retrieve. Defaults to 1. + + Returns: + A list of discussion numbers. + """ + print( + f"Attempting to fetch the {count} most recently updated discussions from" + f" {OWNER}/{REPO}..." + ) + + query = """ + query($owner: String!, $repo: String!, $count: Int!) { + repository(owner: $owner, name: $repo) { + discussions( + first: $count + orderBy: {field: UPDATED_AT, direction: DESC} + ) { + nodes { + title + number + updatedAt + author { + login + } + } + } + } + } + """ + variables = {"owner": OWNER, "repo": REPO, "count": count} + + try: + response = run_graphql_query(query, variables) + + if "errors" in response: + print(f"Error from GitHub API: {response['errors']}", file=sys.stderr) + return None + + discussions = ( + response.get("data", {}) + .get("repository", {}) + .get("discussions", {}) + .get("nodes", []) + ) + return [d["number"] for d in discussions] + + except requests.exceptions.RequestException as e: + print(f"Request failed: {e}", file=sys.stderr) + return None + + +def process_arguments(): + """Parses command-line arguments.""" + parser = argparse.ArgumentParser( + description="A script that answer questions for Github discussions.", + epilog=( + "Example usage: \n" + "\tpython -m adk_answering_agent.answer_discussions --recent 10\n" + "\tpython -m adk_answering_agent.answer_discussions --numbers 21 31\n" + ), + formatter_class=argparse.RawTextHelpFormatter, + ) + + group = parser.add_mutually_exclusive_group(required=True) + + group.add_argument( + "--recent", + type=int, + metavar="COUNT", + help="Answer the N most recently updated discussion numbers.", + ) + + group.add_argument( + "--numbers", + type=int, + nargs="+", + metavar="NUM", + help="Answer a specific list of discussion numbers.", + ) + + if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + + return parser.parse_args() + + +async def main(): + args = process_arguments() + discussion_numbers = [] + + if args.recent: + discussion_numbers = await list_most_recent_discussions(count=args.recent) + elif args.numbers: + discussion_numbers = args.numbers + + if not discussion_numbers: + print("No discussions specified. Exiting...", file=sys.stderr) + sys.exit(1) + + print(f"Will try to answer discussions: {discussion_numbers}...") + + runner = InMemoryRunner( + agent=agent.root_agent, + app_name=APP_NAME, + ) + + for discussion_number in discussion_numbers: + print("#" * 80) + print(f"Starting to process discussion #{discussion_number}...") + # Create a new session for each discussion to avoid interference. + session = await runner.session_service.create_session( + app_name=APP_NAME, user_id=USER_ID + ) + prompt = ( + f"Please check discussion #{discussion_number} see if you can help" + " answer the question or provide some information!" + ) + response = await call_agent_async(runner, USER_ID, session.id, prompt) + print(f"<<<< Agent Final Output: {response}\n") + + +if __name__ == "__main__": + start_time = time.time() + print( + f"Start answering discussions for {OWNER}/{REPO} at" + f" {time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(start_time))}" + ) + print("-" * 80) + asyncio.run(main()) + print("-" * 80) + end_time = time.time() + print( + "Discussion answering finished at" + f" {time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(end_time))}", + ) + print("Total script execution time:", f"{end_time - start_time:.2f} seconds") diff --git a/contributing/samples/adk_answering_agent/main.py b/contributing/samples/adk_answering_agent/main.py new file mode 100644 index 000000000..735ebae79 --- /dev/null +++ b/contributing/samples/adk_answering_agent/main.py @@ -0,0 +1,66 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import asyncio +import time + +from adk_answering_agent import agent +from adk_answering_agent.settings import DISCUSSION_NUMBER +from adk_answering_agent.settings import OWNER +from adk_answering_agent.settings import REPO +from adk_answering_agent.utils import call_agent_async +from adk_answering_agent.utils import parse_number_string +from google.adk.runners import InMemoryRunner + +APP_NAME = "adk_answering_app" +USER_ID = "adk_answering_user" + + +async def main(): + runner = InMemoryRunner( + agent=agent.root_agent, + app_name=APP_NAME, + ) + session = await runner.session_service.create_session( + app_name=APP_NAME, user_id=USER_ID + ) + + discussion_number = parse_number_string(DISCUSSION_NUMBER) + if not discussion_number: + print(f"Error: Invalid discussion number received: {DISCUSSION_NUMBER}.") + return + + prompt = ( + f"Please check discussion #{discussion_number} see if you can help answer" + " the question or provide some information!" + ) + response = await call_agent_async(runner, USER_ID, session.id, prompt) + print(f"<<<< Agent Final Output: {response}\n") + + +if __name__ == "__main__": + start_time = time.time() + print( + f"Start Q&A checking on {OWNER}/{REPO} discussion #{DISCUSSION_NUMBER} at" + f" {time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(start_time))}" + ) + print("-" * 80) + asyncio.run(main()) + print("-" * 80) + end_time = time.time() + print( + "Q&A checking finished at" + f" {time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(end_time))}", + ) + print("Total script execution time:", f"{end_time - start_time:.2f} seconds") diff --git a/contributing/samples/adk_answering_agent/settings.py b/contributing/samples/adk_answering_agent/settings.py new file mode 100644 index 000000000..205f9f363 --- /dev/null +++ b/contributing/samples/adk_answering_agent/settings.py @@ -0,0 +1,36 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from dotenv import load_dotenv + +load_dotenv(override=True) + +GITHUB_BASE_URL = "https://api.github.com" +GITHUB_GRAPHQL_URL = GITHUB_BASE_URL + "/graphql" + +GITHUB_TOKEN = os.getenv("GITHUB_TOKEN") +if not GITHUB_TOKEN: + raise ValueError("GITHUB_TOKEN environment variable not set") + +VERTEXAI_DATASTORE_ID = os.getenv("VERTEXAI_DATASTORE_ID") +if not VERTEXAI_DATASTORE_ID: + raise ValueError("VERTEXAI_DATASTORE_ID environment variable not set") + +OWNER = os.getenv("OWNER", "google") +REPO = os.getenv("REPO", "adk-python") +DISCUSSION_NUMBER = os.getenv("DISCUSSION_NUMBER") + +IS_INTERACTIVE = os.getenv("INTERACTIVE", "1").lower() in ["true", "1"] diff --git a/contributing/samples/adk_answering_agent/utils.py b/contributing/samples/adk_answering_agent/utils.py new file mode 100644 index 000000000..c8321f94a --- /dev/null +++ b/contributing/samples/adk_answering_agent/utils.py @@ -0,0 +1,81 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from typing import Any + +from adk_answering_agent.settings import GITHUB_GRAPHQL_URL +from adk_answering_agent.settings import GITHUB_TOKEN +from google.adk.agents.run_config import RunConfig +from google.adk.runners import Runner +from google.genai import types +import requests + +headers = { + "Authorization": f"token {GITHUB_TOKEN}", + "Accept": "application/vnd.github.v3+json", +} + + +def error_response(error_message: str) -> dict[str, Any]: + return {"status": "error", "error_message": error_message} + + +def run_graphql_query(query: str, variables: dict[str, Any]) -> dict[str, Any]: + """Executes a GraphQL query.""" + payload = {"query": query, "variables": variables} + response = requests.post( + GITHUB_GRAPHQL_URL, headers=headers, json=payload, timeout=60 + ) + response.raise_for_status() + return response.json() + + +def parse_number_string(number_str: str | None, default_value: int = 0) -> int: + """Parse a number from the given string.""" + if not number_str: + return default_value + + try: + return int(number_str) + except ValueError: + print( + f"Warning: Invalid number string: {number_str}. Defaulting to" + f" {default_value}.", + file=sys.stderr, + ) + return default_value + + +async def call_agent_async( + runner: Runner, user_id: str, session_id: str, prompt: str +) -> str: + """Call the agent asynchronously with the user's prompt.""" + content = types.Content( + role="user", parts=[types.Part.from_text(text=prompt)] + ) + + final_response_text = "" + async for event in runner.run_async( + user_id=user_id, + session_id=session_id, + new_message=content, + run_config=RunConfig(save_input_blobs_as_artifacts=False), + ): + if event.content and event.content.parts: + if text := "".join(part.text or "" for part in event.content.parts): + if event.author != "user": + final_response_text += text + + return final_response_text