Skip to content

Commit c3b4cfd

Browse files
committed
feat: add support for GitHubRepoForkerTool
1 parent 64e4e6a commit c3b4cfd

File tree

6 files changed

+329
-3
lines changed

6 files changed

+329
-3
lines changed

integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44
import re
5+
import time
56
from typing import Any, Dict, Optional
67

78
import requests
@@ -60,7 +61,7 @@ def __init__(
6061
:param create_branch: If True, creates a fix branch based on the issue number
6162
"""
6263
error_message = "github_token must be a Secret"
63-
if not isinstance(github_token, Secret):
64+
if github_token is not None and not isinstance(github_token, Secret):
6465
raise TypeError(error_message)
6566

6667
self.github_token = github_token
@@ -274,8 +275,6 @@ def run(self, url: str) -> dict:
274275

275276
# Wait for fork completion if requested
276277
if self.wait_for_completion:
277-
import time
278-
279278
start_time = time.time()
280279

281280
while time.time() - start_time < self.max_wait_seconds:

integrations/github/src/haystack_integrations/prompts/github/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .file_editor_prompt import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA
66
from .issue_commenter_prompt import ISSUE_COMMENTER_PROMPT, ISSUE_COMMENTER_SCHEMA
77
from .pr_creator_prompt import PR_CREATOR_PROMPT, PR_CREATOR_SCHEMA
8+
from .repo_forker_prompt import REPO_FORKER_PROMPT, REPO_FORKER_SCHEMA
89
from .repo_viewer_prompt import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA
910
from .system_prompt import SYSTEM_PROMPT
1011

@@ -16,6 +17,8 @@
1617
"ISSUE_COMMENTER_SCHEMA",
1718
"PR_CREATOR_PROMPT",
1819
"PR_CREATOR_SCHEMA",
20+
"REPO_FORKER_PROMPT",
21+
"REPO_FORKER_SCHEMA",
1922
"REPO_VIEWER_PROMPT",
2023
"REPO_VIEWER_SCHEMA",
2124
"SYSTEM_PROMPT",
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# SPDX-FileCopyrightText: 2023-present deepset GmbH <[email protected]>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
REPO_FORKER_PROMPT = """Haystack-Agent uses this tool to fork GitHub repositories in order to contribute to issues.
5+
Haystack-Agent initiates a fork so it can freely make changes for contributions.
6+
A fork is required to open a pull request to the upstream repository.
7+
Haystack-Agent works by forking the repository associated with a given issue.
8+
9+
<usage>
10+
Pass a `url` string for the GitHub issue you want to work on in a fork.
11+
It is REQUIRED to pass `url` to use this tool.
12+
The structure must be "https://github.com/<repo-owner>/<repo-name>/issues/<issue-number>".
13+
14+
Examples:
15+
16+
- {"url": "https://github.com/deepset-ai/haystack/issues/9343"}
17+
- will fork the "deepset-ai/haystack" repository to work on issue 9343
18+
- {"url": "https://github.com/deepset-ai/haystack-core-integrations/issues/1685"}
19+
- will fork the "deepset-ai/haystack-core-integrations" repository to work on issue 1685
20+
</usage>
21+
22+
Haystack-Agent uses the `repo_forker` tool to create a copy (fork) of the target repository into its own account.
23+
Haystack-Agent ensures the issue URL is valid and points to a real GitHub issue.
24+
It parses the URL to identify the correct repository.
25+
26+
<thinking>
27+
- Does this issue belong to the repository I need to work on?
28+
- Can I extract the owner and repository name from the URL?
29+
- Why am I forking this repository? (e.g., to implement a fix, to add a feature)
30+
- Is there anything special about the branch or base state I should be aware of?
31+
</thinking>
32+
33+
Haystack-Agent reflects on the results after forking:
34+
<thinking>
35+
- Did the fork succeed? Is the fork visible in my account?
36+
- Can I access, clone, and push to my fork?
37+
- Are there any permissions or fork-specific settings to configure before proceeding?
38+
- Which branch will I be working on in the fork?
39+
</thinking>
40+
41+
IMPORTANT
42+
Haystack-Agent ONLY forks the repository mentioned in the given issue URL.
43+
Haystack-Agent does NOT attempt to fork organizations, user profiles, or non-issue URLs.
44+
Haystack-Agent knows that forking is a prerequisite to contributing changes and creating pull requests.
45+
46+
Haystack-Agent takes notes after the fork:
47+
<scratchpad>
48+
- Record the URL of the forked repository
49+
- Note the original issue being worked on
50+
- Document any post-fork steps (e.g., git cloning, installing dependencies)
51+
- Make note of any errors or special setup requirements
52+
</scratchpad>
53+
"""
54+
55+
REPO_FORKER_SCHEMA = {
56+
"properties": {
57+
"url": {"type": "string", "description": "URL of the GitHub issue to work on in the fork."},
58+
},
59+
"required": ["url"],
60+
"type": "object",
61+
}

integrations/github/src/haystack_integrations/tools/github/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
from .issue_commenter_tool import GitHubIssueCommenterTool
66
from .issue_viewer_tool import GitHubIssueViewerTool
77
from .pr_creator_tool import GitHubPRCreatorTool
8+
from .repo_forker_tool import GitHubRepoForkerTool
89
from .repo_viewer_tool import GitHubRepoViewerTool
910

1011
__all__ = [
1112
"GitHubFileEditorTool",
1213
"GitHubIssueCommenterTool",
1314
"GitHubIssueViewerTool",
1415
"GitHubPRCreatorTool",
16+
"GitHubRepoForkerTool",
1517
"GitHubRepoViewerTool",
1618
]
Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
# SPDX-FileCopyrightText: 2023-present deepset GmbH <[email protected]>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
from typing import Any, Callable, Dict, Optional, Union
5+
6+
from haystack.core.serialization import generate_qualified_class_name
7+
from haystack.tools import ComponentTool
8+
from haystack.utils import Secret, deserialize_secrets_inplace
9+
10+
from haystack_integrations.components.connectors.github.repo_forker import GitHubRepoForker
11+
from haystack_integrations.prompts.github.repo_forker_prompt import REPO_FORKER_PROMPT, REPO_FORKER_SCHEMA
12+
from haystack_integrations.tools.github.utils import deserialize_handlers, serialize_handlers
13+
14+
15+
class GitHubRepoForkerTool(ComponentTool):
16+
"""
17+
A tool for forking Github repository.
18+
"""
19+
20+
def __init__(
21+
self,
22+
*,
23+
name: Optional[str] = "repo_forker",
24+
description: Optional[str] = REPO_FORKER_PROMPT,
25+
parameters: Optional[Dict[str, Any]] = REPO_FORKER_SCHEMA,
26+
github_token: Optional[Secret] = None,
27+
repo: Optional[str] = None,
28+
branch: str = "main",
29+
raise_on_failure: bool = True,
30+
outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None,
31+
inputs_from_state: Optional[Dict[str, str]] = None,
32+
outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None,
33+
):
34+
"""
35+
Initialize the GitHub Repo Forker tool.
36+
37+
:param name: Optional name for the tool.
38+
:param description: Optional description.
39+
:param parameters: Optional JSON schema defining the parameters expected by the Tool.
40+
:param github_token: GitHub personal access token for API authentication
41+
:param repo: Default repository in owner/repo format
42+
:param branch: Default branch to work with
43+
:param raise_on_failure: If True, raises exceptions on API errors
44+
:param outputs_to_string:
45+
Optional dictionary defining how a tool outputs should be converted into a string.
46+
If the source is provided only the specified output key is sent to the handler.
47+
If the source is omitted the whole tool result is sent to the handler.
48+
Example: {
49+
"source": "docs", "handler": format_documents
50+
}
51+
:param inputs_from_state:
52+
Optional dictionary mapping state keys to tool parameter names.
53+
Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter.
54+
:param outputs_to_state:
55+
Optional dictionary defining how tool outputs map to keys within state as well as optional handlers.
56+
If the source is provided only the specified output key is sent to the handler.
57+
Example: {
58+
"documents": {"source": "docs", "handler": custom_handler}
59+
}
60+
If the source is omitted the whole tool result is sent to the handler.
61+
Example: {
62+
"documents": {"handler": custom_handler}
63+
}
64+
"""
65+
self.name = name
66+
self.description = description
67+
self.parameters = parameters
68+
self.github_token = github_token
69+
self.repo = repo
70+
self.branch = branch
71+
self.raise_on_failure = raise_on_failure
72+
self.outputs_to_string = outputs_to_string
73+
self.inputs_from_state = inputs_from_state
74+
self.outputs_to_state = outputs_to_state
75+
76+
repo_forker = GitHubRepoForker(
77+
github_token=github_token,
78+
raise_on_failure=raise_on_failure,
79+
)
80+
81+
super().__init__(
82+
component=repo_forker,
83+
name=name,
84+
description=description,
85+
parameters=parameters,
86+
outputs_to_string=self.outputs_to_string,
87+
inputs_from_state=self.inputs_from_state,
88+
outputs_to_state=self.outputs_to_state,
89+
)
90+
91+
def to_dict(self) -> Dict[str, Any]:
92+
"""
93+
Serializes the tool to a dictionary.
94+
95+
Returns:
96+
Dictionary with serialized data.
97+
"""
98+
serialized = {
99+
"name": self.name,
100+
"description": self.description,
101+
"parameters": self.parameters,
102+
"github_token": self.github_token.to_dict() if self.github_token else None,
103+
"repo": self.repo,
104+
"branch": self.branch,
105+
"raise_on_failure": self.raise_on_failure,
106+
"outputs_to_string": self.outputs_to_string,
107+
"inputs_from_state": self.inputs_from_state,
108+
"outputs_to_state": self.outputs_to_state,
109+
}
110+
111+
serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string)
112+
return {"type": generate_qualified_class_name(type(self)), "data": serialized}
113+
114+
@classmethod
115+
def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoForkerTool":
116+
"""
117+
Deserializes the tool from a dictionary.
118+
119+
:param data:
120+
Dictionary to deserialize from.
121+
:returns:
122+
Deserialized tool.
123+
"""
124+
inner_data = data["data"]
125+
deserialize_secrets_inplace(inner_data, keys=["github_token"])
126+
deserialize_handlers(inner_data)
127+
return cls(**inner_data)
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
# SPDX-FileCopyrightText: 2023-present deepset GmbH <[email protected]>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
from haystack_integrations.prompts.github.repo_forker_prompt import REPO_FORKER_PROMPT, REPO_FORKER_SCHEMA
5+
from haystack_integrations.tools.github.repo_forker_tool import GitHubRepoForkerTool
6+
from haystack_integrations.tools.github.utils import message_handler
7+
8+
9+
class TestGitHubRepoForkerTool:
10+
def test_init(self, monkeypatch):
11+
monkeypatch.setenv("GITHUB_TOKEN", "test-token")
12+
tool = GitHubRepoForkerTool()
13+
14+
assert tool.name == "repo_forker"
15+
assert tool.description == REPO_FORKER_PROMPT
16+
assert tool.parameters == REPO_FORKER_SCHEMA
17+
assert tool.github_token is None
18+
assert tool.repo is None
19+
assert tool.branch == "main"
20+
assert tool.raise_on_failure is True
21+
assert tool.outputs_to_string is None
22+
assert tool.inputs_from_state is None
23+
assert tool.outputs_to_state is None
24+
25+
def test_from_dict(self, monkeypatch):
26+
monkeypatch.setenv("GITHUB_TOKEN", "test-token")
27+
tool_dict = {
28+
"type": "haystack_integrations.tools.github.repo_forker_tool.GitHubRepoForkerTool",
29+
"data": {
30+
"name": "repo_forker",
31+
"description": REPO_FORKER_PROMPT,
32+
"parameters": REPO_FORKER_SCHEMA,
33+
"github_token": None,
34+
"repo": None,
35+
"branch": "main",
36+
"raise_on_failure": True,
37+
"outputs_to_string": None,
38+
"inputs_from_state": None,
39+
"outputs_to_state": None,
40+
},
41+
}
42+
tool = GitHubRepoForkerTool.from_dict(tool_dict)
43+
assert tool.name == "repo_forker"
44+
assert tool.description == REPO_FORKER_PROMPT
45+
assert tool.parameters == REPO_FORKER_SCHEMA
46+
assert tool.github_token is None
47+
assert tool.repo is None
48+
assert tool.branch == "main"
49+
assert tool.raise_on_failure is True
50+
assert tool.outputs_to_string is None
51+
assert tool.inputs_from_state is None
52+
assert tool.outputs_to_state is None
53+
54+
def test_to_dict(self, monkeypatch):
55+
monkeypatch.setenv("GITHUB_TOKEN", "test-token")
56+
tool = GitHubRepoForkerTool()
57+
tool_dict = tool.to_dict()
58+
assert tool_dict["type"] == "haystack_integrations.tools.github.repo_forker_tool.GitHubRepoForkerTool"
59+
assert tool_dict["data"]["name"] == "repo_forker"
60+
assert tool_dict["data"]["description"] == REPO_FORKER_PROMPT
61+
assert tool_dict["data"]["parameters"] == REPO_FORKER_SCHEMA
62+
assert tool_dict["data"]["github_token"] is None
63+
assert tool_dict["data"]["repo"] is None
64+
assert tool_dict["data"]["branch"] == "main"
65+
assert tool_dict["data"]["raise_on_failure"] is True
66+
assert tool_dict["data"]["outputs_to_string"] is None
67+
assert tool_dict["data"]["inputs_from_state"] is None
68+
assert tool_dict["data"]["outputs_to_state"] is None
69+
70+
def test_to_dict_with_extra_params(self, monkeypatch):
71+
monkeypatch.setenv("GITHUB_TOKEN", "test-token")
72+
tool = GitHubRepoForkerTool(
73+
github_token=None,
74+
repo="owner/repo",
75+
branch="dev",
76+
raise_on_failure=False,
77+
outputs_to_string={"source": "docs", "handler": message_handler},
78+
inputs_from_state={"repository": "repo"},
79+
outputs_to_state={"documents": {"source": "docs", "handler": message_handler}},
80+
)
81+
tool_dict = tool.to_dict()
82+
assert tool_dict["type"] == "haystack_integrations.tools.github.repo_forker_tool.GitHubRepoForkerTool"
83+
assert tool_dict["data"]["name"] == "repo_forker"
84+
assert tool_dict["data"]["description"] == REPO_FORKER_PROMPT
85+
assert tool_dict["data"]["parameters"] == REPO_FORKER_SCHEMA
86+
assert tool_dict["data"]["github_token"] is None
87+
assert tool_dict["data"]["repo"] == "owner/repo"
88+
assert tool_dict["data"]["branch"] == "dev"
89+
assert tool_dict["data"]["raise_on_failure"] is False
90+
assert (
91+
tool_dict["data"]["outputs_to_string"]["handler"]
92+
== "haystack_integrations.tools.github.utils.message_handler"
93+
)
94+
assert tool_dict["data"]["inputs_from_state"] == {"repository": "repo"}
95+
assert tool_dict["data"]["outputs_to_state"]["documents"]["source"] == "docs"
96+
assert (
97+
tool_dict["data"]["outputs_to_state"]["documents"]["handler"]
98+
== "haystack_integrations.tools.github.utils.message_handler"
99+
)
100+
101+
def test_from_dict_with_extra_params(self, monkeypatch):
102+
monkeypatch.setenv("GITHUB_TOKEN", "test-token")
103+
tool_dict = {
104+
"type": "haystack_integrations.tools.github.repo_forker_tool.GitHubRepoForkerTool",
105+
"data": {
106+
"name": "repo_forker",
107+
"description": REPO_FORKER_PROMPT,
108+
"parameters": REPO_FORKER_SCHEMA,
109+
"github_token": None,
110+
"repo": "owner/repo",
111+
"branch": "dev",
112+
"raise_on_failure": False,
113+
"outputs_to_string": {"handler": "haystack_integrations.tools.github.utils.message_handler"},
114+
"inputs_from_state": {"repository": "repo"},
115+
"outputs_to_state": {
116+
"documents": {
117+
"source": "docs",
118+
"handler": "haystack_integrations.tools.github.utils.message_handler",
119+
}
120+
},
121+
},
122+
}
123+
tool = GitHubRepoForkerTool.from_dict(tool_dict)
124+
assert tool.name == "repo_forker"
125+
assert tool.description == REPO_FORKER_PROMPT
126+
assert tool.parameters == REPO_FORKER_SCHEMA
127+
assert tool.github_token is None
128+
assert tool.repo == "owner/repo"
129+
assert tool.branch == "dev"
130+
assert tool.raise_on_failure is False
131+
assert tool.outputs_to_string["handler"] == message_handler
132+
assert tool.inputs_from_state == {"repository": "repo"}
133+
assert tool.outputs_to_state["documents"]["source"] == "docs"
134+
assert tool.outputs_to_state["documents"]["handler"] == message_handler

0 commit comments

Comments
 (0)