Skip to content

Commit 6adb8f1

Browse files
srini047mpangrazzisjrl
authored
feat: add support for GitHubRepoForkerTool (#1968)
* feat: add support for GitHubRepoForkerTool * fix: remove extra params * fix: revert token check * fix: typing issues * fix: test issue * fix: revert as per comments * Update integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py * Update integrations/github/src/haystack_integrations/tools/github/repo_forker_tool.py * fix: test failures * fix: formatting issue --------- Co-authored-by: Michele Pangrazzi <[email protected]> Co-authored-by: Sebastian Husch Lee <[email protected]>
1 parent 11a7f83 commit 6adb8f1

File tree

7 files changed

+313
-3
lines changed

7 files changed

+313
-3
lines changed

integrations/github/src/haystack_integrations/components/connectors/github/repo_forker.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,9 @@ def __init__(
6060
:param auto_sync: If True, syncs fork with original repository if it already exists
6161
:param create_branch: If True, creates a fix branch based on the issue number
6262
"""
63-
error_message = "github_token must be a Secret"
6463
if not isinstance(github_token, Secret):
65-
raise TypeError(error_message)
64+
msg = "github_token must be a Secret"
65+
raise TypeError(msg)
6666

6767
self.github_token = github_token
6868
self.raise_on_failure = raise_on_failure

integrations/github/src/haystack_integrations/prompts/github/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .file_editor_prompt import FILE_EDITOR_PROMPT, FILE_EDITOR_SCHEMA
66
from .issue_commenter_prompt import ISSUE_COMMENTER_PROMPT, ISSUE_COMMENTER_SCHEMA
77
from .pr_creator_prompt import PR_CREATOR_PROMPT, PR_CREATOR_SCHEMA
8+
from .repo_forker_prompt import REPO_FORKER_PROMPT, REPO_FORKER_SCHEMA
89
from .repo_viewer_prompt import REPO_VIEWER_PROMPT, REPO_VIEWER_SCHEMA
910
from .system_prompt import SYSTEM_PROMPT
1011

@@ -16,6 +17,8 @@
1617
"ISSUE_COMMENTER_SCHEMA",
1718
"PR_CREATOR_PROMPT",
1819
"PR_CREATOR_SCHEMA",
20+
"REPO_FORKER_PROMPT",
21+
"REPO_FORKER_SCHEMA",
1922
"REPO_VIEWER_PROMPT",
2023
"REPO_VIEWER_SCHEMA",
2124
"SYSTEM_PROMPT",
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# SPDX-FileCopyrightText: 2023-present deepset GmbH <[email protected]>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
REPO_FORKER_PROMPT = """Haystack-Agent uses this tool to fork GitHub repositories in order to contribute to issues.
5+
Haystack-Agent initiates a fork so it can freely make changes for contributions.
6+
A fork is required to open a pull request to the upstream repository.
7+
Haystack-Agent works by forking the repository associated with a given issue.
8+
9+
<usage>
10+
Pass a `url` string for the GitHub issue you want to work on in a fork.
11+
It is REQUIRED to pass `url` to use this tool.
12+
The structure must be "https://github.com/<repo-owner>/<repo-name>/issues/<issue-number>".
13+
14+
Examples:
15+
16+
- {"url": "https://github.com/deepset-ai/haystack/issues/9343"}
17+
- will fork the "deepset-ai/haystack" repository to work on issue 9343
18+
- {"url": "https://github.com/deepset-ai/haystack-core-integrations/issues/1685"}
19+
- will fork the "deepset-ai/haystack-core-integrations" repository to work on issue 1685
20+
</usage>
21+
22+
Haystack-Agent uses the `repo_forker` tool to create a copy (fork) of the target repository into its own account.
23+
Haystack-Agent ensures the issue URL is valid and points to a real GitHub issue.
24+
It parses the URL to identify the correct repository.
25+
26+
<thinking>
27+
- Does this issue belong to the repository I need to work on?
28+
- Can I extract the owner and repository name from the URL?
29+
- Why am I forking this repository? (e.g., to implement a fix, to add a feature)
30+
- Is there anything special about the branch or base state I should be aware of?
31+
</thinking>
32+
33+
Haystack-Agent reflects on the results after forking:
34+
<thinking>
35+
- Did the fork succeed? Is the fork visible in my account?
36+
- Can I access, clone, and push to my fork?
37+
- Are there any permissions or fork-specific settings to configure before proceeding?
38+
- Which branch will I be working on in the fork?
39+
</thinking>
40+
41+
IMPORTANT
42+
Haystack-Agent ONLY forks the repository mentioned in the given issue URL.
43+
Haystack-Agent does NOT attempt to fork organizations, user profiles, or non-issue URLs.
44+
Haystack-Agent knows that forking is a prerequisite to contributing changes and creating pull requests.
45+
46+
Haystack-Agent takes notes after the fork:
47+
<scratchpad>
48+
- Record the URL of the forked repository
49+
- Note the original issue being worked on
50+
- Document any post-fork steps (e.g., git cloning, installing dependencies)
51+
- Make note of any errors or special setup requirements
52+
</scratchpad>
53+
"""
54+
55+
REPO_FORKER_SCHEMA = {
56+
"properties": {
57+
"url": {"type": "string", "description": "URL of the GitHub issue to work on in the fork."},
58+
},
59+
"required": ["url"],
60+
"type": "object",
61+
}

integrations/github/src/haystack_integrations/tools/github/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@
55
from .issue_commenter_tool import GitHubIssueCommenterTool
66
from .issue_viewer_tool import GitHubIssueViewerTool
77
from .pr_creator_tool import GitHubPRCreatorTool
8+
from .repo_forker_tool import GitHubRepoForkerTool
89
from .repo_viewer_tool import GitHubRepoViewerTool
910

1011
__all__ = [
1112
"GitHubFileEditorTool",
1213
"GitHubIssueCommenterTool",
1314
"GitHubIssueViewerTool",
1415
"GitHubPRCreatorTool",
16+
"GitHubRepoForkerTool",
1517
"GitHubRepoViewerTool",
1618
]
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
# SPDX-FileCopyrightText: 2023-present deepset GmbH <[email protected]>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
from typing import Any, Callable, Dict, Optional, Union
5+
6+
from haystack.core.serialization import generate_qualified_class_name
7+
from haystack.tools import ComponentTool
8+
from haystack.utils import Secret, deserialize_secrets_inplace
9+
10+
from haystack_integrations.components.connectors.github.repo_forker import GitHubRepoForker
11+
from haystack_integrations.prompts.github.repo_forker_prompt import REPO_FORKER_PROMPT, REPO_FORKER_SCHEMA
12+
from haystack_integrations.tools.github.utils import deserialize_handlers, serialize_handlers
13+
14+
15+
class GitHubRepoForkerTool(ComponentTool):
16+
"""
17+
A tool for forking Github repository.
18+
"""
19+
20+
def __init__(
21+
self,
22+
*,
23+
name: Optional[str] = "repo_forker",
24+
description: Optional[str] = REPO_FORKER_PROMPT,
25+
parameters: Optional[Dict[str, Any]] = REPO_FORKER_SCHEMA,
26+
github_token: Secret = Secret.from_env_var("GITHUB_TOKEN"),
27+
raise_on_failure: bool = True,
28+
outputs_to_string: Optional[Dict[str, Union[str, Callable[[Any], str]]]] = None,
29+
inputs_from_state: Optional[Dict[str, str]] = None,
30+
outputs_to_state: Optional[Dict[str, Dict[str, Union[str, Callable]]]] = None,
31+
):
32+
"""
33+
Initialize the GitHub Repo Forker tool.
34+
35+
:param name: Optional name for the tool.
36+
:param description: Optional description.
37+
:param parameters: Optional JSON schema defining the parameters expected by the Tool.
38+
:param github_token: GitHub personal access token for API authentication
39+
:param raise_on_failure: If True, raises exceptions on API errors
40+
:param outputs_to_string:
41+
Optional dictionary defining how a tool outputs should be converted into a string.
42+
If the source is provided only the specified output key is sent to the handler.
43+
If the source is omitted the whole tool result is sent to the handler.
44+
Example: {
45+
"source": "docs", "handler": format_documents
46+
}
47+
:param inputs_from_state:
48+
Optional dictionary mapping state keys to tool parameter names.
49+
Example: {"repository": "repo"} maps state's "repository" to tool's "repo" parameter.
50+
:param outputs_to_state:
51+
Optional dictionary defining how tool outputs map to keys within state as well as optional handlers.
52+
If the source is provided only the specified output key is sent to the handler.
53+
Example: {
54+
"documents": {"source": "docs", "handler": custom_handler}
55+
}
56+
If the source is omitted the whole tool result is sent to the handler.
57+
Example: {
58+
"documents": {"handler": custom_handler}
59+
}
60+
"""
61+
self.github_token = github_token
62+
self.raise_on_failure = raise_on_failure
63+
self.outputs_to_string = outputs_to_string
64+
self.inputs_from_state = inputs_from_state
65+
self.outputs_to_state = outputs_to_state
66+
67+
repo_forker = GitHubRepoForker(
68+
github_token=github_token,
69+
raise_on_failure=raise_on_failure,
70+
)
71+
72+
super().__init__(
73+
component=repo_forker,
74+
name=name,
75+
description=description,
76+
parameters=parameters,
77+
outputs_to_string=self.outputs_to_string,
78+
inputs_from_state=self.inputs_from_state,
79+
outputs_to_state=self.outputs_to_state,
80+
)
81+
82+
def to_dict(self) -> Dict[str, Any]:
83+
"""
84+
Serializes the tool to a dictionary.
85+
86+
Returns:
87+
Dictionary with serialized data.
88+
"""
89+
serialized = {
90+
"name": self.name,
91+
"description": self.description,
92+
"parameters": self.parameters,
93+
"github_token": self.github_token.to_dict() if self.github_token else None,
94+
"raise_on_failure": self.raise_on_failure,
95+
"outputs_to_string": self.outputs_to_string,
96+
"inputs_from_state": self.inputs_from_state,
97+
"outputs_to_state": self.outputs_to_state,
98+
}
99+
100+
serialize_handlers(serialized, self.outputs_to_state, self.outputs_to_string)
101+
return {"type": generate_qualified_class_name(type(self)), "data": serialized}
102+
103+
@classmethod
104+
def from_dict(cls, data: Dict[str, Any]) -> "GitHubRepoForkerTool":
105+
"""
106+
Deserializes the tool from a dictionary.
107+
108+
:param data:
109+
Dictionary to deserialize from.
110+
:returns:
111+
Deserialized tool.
112+
"""
113+
inner_data = data["data"]
114+
deserialize_secrets_inplace(inner_data, keys=["github_token"])
115+
deserialize_handlers(inner_data)
116+
return cls(**inner_data)

integrations/github/tests/test_repo_forker.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class TestGitHubRepoForker:
1414
def test_init_default(self, monkeypatch):
1515
monkeypatch.setenv("GITHUB_TOKEN", "test-token")
1616

17-
forker = GitHubRepoForker()
17+
forker = GitHubRepoForker(github_token=Secret.from_env_var("GITHUB_TOKEN"))
1818
assert forker.github_token is not None
1919
assert forker.github_token.resolve_value() == "test-token"
2020
assert forker.raise_on_failure is True
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
# SPDX-FileCopyrightText: 2023-present deepset GmbH <[email protected]>
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
from haystack.utils import Secret
5+
6+
from haystack_integrations.prompts.github.repo_forker_prompt import REPO_FORKER_PROMPT, REPO_FORKER_SCHEMA
7+
from haystack_integrations.tools.github.repo_forker_tool import GitHubRepoForkerTool
8+
from haystack_integrations.tools.github.utils import message_handler
9+
10+
11+
class TestGitHubRepoForkerTool:
12+
def test_init(self, monkeypatch):
13+
monkeypatch.setenv("GITHUB_TOKEN", "test-token")
14+
15+
tool = GitHubRepoForkerTool()
16+
assert tool.name == "repo_forker"
17+
assert tool.description == REPO_FORKER_PROMPT
18+
assert tool.parameters == REPO_FORKER_SCHEMA
19+
assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN")
20+
assert tool.raise_on_failure is True
21+
assert tool.outputs_to_string is None
22+
assert tool.inputs_from_state is None
23+
assert tool.outputs_to_state is None
24+
25+
def test_from_dict(self, monkeypatch):
26+
monkeypatch.setenv("GITHUB_TOKEN", "test-token")
27+
tool_dict = {
28+
"type": "haystack_integrations.tools.github.repo_forker_tool.GitHubRepoForkerTool",
29+
"data": {
30+
"name": "repo_forker",
31+
"description": REPO_FORKER_PROMPT,
32+
"parameters": REPO_FORKER_SCHEMA,
33+
"github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"},
34+
"raise_on_failure": True,
35+
"outputs_to_string": None,
36+
"inputs_from_state": None,
37+
"outputs_to_state": None,
38+
},
39+
}
40+
tool = GitHubRepoForkerTool.from_dict(tool_dict)
41+
assert tool.name == "repo_forker"
42+
assert tool.description == REPO_FORKER_PROMPT
43+
assert tool.parameters == REPO_FORKER_SCHEMA
44+
assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN")
45+
assert tool.raise_on_failure is True
46+
assert tool.outputs_to_string is None
47+
assert tool.inputs_from_state is None
48+
assert tool.outputs_to_state is None
49+
50+
def test_to_dict(self, monkeypatch):
51+
monkeypatch.setenv("GITHUB_TOKEN", "test-token")
52+
tool = GitHubRepoForkerTool()
53+
tool_dict = tool.to_dict()
54+
assert tool_dict["type"] == "haystack_integrations.tools.github.repo_forker_tool.GitHubRepoForkerTool"
55+
assert tool_dict["data"]["name"] == "repo_forker"
56+
assert tool_dict["data"]["description"] == REPO_FORKER_PROMPT
57+
assert tool_dict["data"]["parameters"] == REPO_FORKER_SCHEMA
58+
assert tool_dict["data"]["github_token"] == {
59+
"env_vars": ["GITHUB_TOKEN"],
60+
"strict": True,
61+
"type": "env_var",
62+
}
63+
assert tool_dict["data"]["raise_on_failure"] is True
64+
assert tool_dict["data"]["outputs_to_string"] is None
65+
assert tool_dict["data"]["inputs_from_state"] is None
66+
assert tool_dict["data"]["outputs_to_state"] is None
67+
68+
def test_to_dict_with_extra_params(self, monkeypatch):
69+
monkeypatch.setenv("GITHUB_TOKEN", "test-token")
70+
tool = GitHubRepoForkerTool(
71+
github_token=Secret.from_env_var("GITHUB_TOKEN"),
72+
raise_on_failure=False,
73+
outputs_to_string={"source": "docs", "handler": message_handler},
74+
inputs_from_state={"repository": "repo"},
75+
outputs_to_state={"documents": {"source": "docs", "handler": message_handler}},
76+
)
77+
tool_dict = tool.to_dict()
78+
assert tool_dict["type"] == "haystack_integrations.tools.github.repo_forker_tool.GitHubRepoForkerTool"
79+
assert tool_dict["data"]["name"] == "repo_forker"
80+
assert tool_dict["data"]["description"] == REPO_FORKER_PROMPT
81+
assert tool_dict["data"]["parameters"] == REPO_FORKER_SCHEMA
82+
assert tool_dict["data"]["github_token"] == {
83+
"env_vars": ["GITHUB_TOKEN"],
84+
"strict": True,
85+
"type": "env_var",
86+
}
87+
assert tool_dict["data"]["raise_on_failure"] is False
88+
assert (
89+
tool_dict["data"]["outputs_to_string"]["handler"]
90+
== "haystack_integrations.tools.github.utils.message_handler"
91+
)
92+
assert tool_dict["data"]["inputs_from_state"] == {"repository": "repo"}
93+
assert tool_dict["data"]["outputs_to_state"]["documents"]["source"] == "docs"
94+
assert (
95+
tool_dict["data"]["outputs_to_state"]["documents"]["handler"]
96+
== "haystack_integrations.tools.github.utils.message_handler"
97+
)
98+
99+
def test_from_dict_with_extra_params(self, monkeypatch):
100+
monkeypatch.setenv("GITHUB_TOKEN", "test-token")
101+
tool_dict = {
102+
"type": "haystack_integrations.tools.github.repo_forker_tool.GitHubRepoForkerTool",
103+
"data": {
104+
"name": "repo_forker",
105+
"description": REPO_FORKER_PROMPT,
106+
"parameters": REPO_FORKER_SCHEMA,
107+
"github_token": {"env_vars": ["GITHUB_TOKEN"], "strict": True, "type": "env_var"},
108+
"raise_on_failure": False,
109+
"outputs_to_string": {"handler": "haystack_integrations.tools.github.utils.message_handler"},
110+
"inputs_from_state": {"repository": "repo"},
111+
"outputs_to_state": {
112+
"documents": {
113+
"source": "docs",
114+
"handler": "haystack_integrations.tools.github.utils.message_handler",
115+
}
116+
},
117+
},
118+
}
119+
tool = GitHubRepoForkerTool.from_dict(tool_dict)
120+
assert tool.name == "repo_forker"
121+
assert tool.description == REPO_FORKER_PROMPT
122+
assert tool.parameters == REPO_FORKER_SCHEMA
123+
assert tool.github_token == Secret.from_env_var("GITHUB_TOKEN")
124+
assert tool.raise_on_failure is False
125+
assert tool.outputs_to_string["handler"] == message_handler
126+
assert tool.inputs_from_state == {"repository": "repo"}
127+
assert tool.outputs_to_state["documents"]["source"] == "docs"
128+
assert tool.outputs_to_state["documents"]["handler"] == message_handler

0 commit comments

Comments
 (0)