remind101 · hamiltop · Apr 25, 2025 · Apr 25, 2025 · Apr 26, 2025 · Apr 26, 2025
diff --git a/.github/workflows/openhands-code-reviewer.yml b/.github/workflows/openhands-code-reviewer.yml
diff --git a/.github/workflows/openhands-resolver.yml b/.github/workflows/openhands-resolver.yml
@@ -118,6 +118,7 @@ jobs:
           else
             echo "Using default GITHUB_TOKEN"
             echo "AUTH_TOKEN=${{ github.token }}" >> $GITHUB_ENV
+          fi
       - name: Log Auth Token Source
         run: |
           if [ -n "${{ steps.generate-token.outputs.token }}" ]; then
@@ -234,7 +235,7 @@ jobs:
 
           echo "MAX_ITERATIONS=${{ inputs.max_iterations || 50 }}" >> $GITHUB_ENV
           echo "SANDBOX_ENV_GITHUB_TOKEN=${{ env.AUTH_TOKEN }}" >> $GITHUB_ENV
-          echo "SANDBOX_ENV_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV
+          echo "SANDBOX_BASE_CONTAINER_IMAGE=${{ inputs.base_container_image }}" >> $GITHUB_ENV
 
           # Set branch variables
           echo "TARGET_BRANCH=${{ inputs.target_branch || 'main' }}" >> $GITHUB_ENV

diff --git a/dev_config/python/.pre-commit-config.yaml b/dev_config/python/.pre-commit-config.yaml
@@ -37,7 +37,7 @@ repos:
     hooks:
       - id: mypy
         additional_dependencies:
-          [types-requests, types-setuptools, types-pyyaml, types-toml]
+          [types-requests, types-setuptools, types-pyyaml, types-toml, types-aiofiles]
         entry: mypy --config-file dev_config/python/mypy.ini openhands/
         always_run: true
         pass_filenames: false
diff --git a/openhands/code_reviewer/__init__.py b/openhands/code_reviewer/__init__.py
diff --git a/openhands/code_reviewer/post_review_comments.py b/openhands/code_reviewer/post_review_comments.py
@@ -0,0 +1,155 @@
+import argparse
+import asyncio
+import json
+import os
+
+from openhands.code_reviewer.reviewer_output import ReviewerOutput
+from openhands.core.logger import openhands_logger as logger
+from openhands.integrations.service_types import ProviderType
+from openhands.resolver.interfaces.github import GithubPRHandler
+
+
+def get_pr_handler(
+    owner: str,
+    repo: str,
+    token: str | None,
+    platform: ProviderType,
+) -> GithubPRHandler:  # Return specific type now
+    """Get the GitHub PR handler. Raises error for other platforms."""
+    if platform != ProviderType.GITHUB:
+        raise ValueError(
+            f'Unsupported platform for code review comments: {platform}. Only GitHub is supported.'
+        )
+
+    gh_token = token or os.environ.get('GITHUB_TOKEN')
+    if not gh_token:
+        raise ValueError('GitHub token is required for GitHub PR handler')
+
+    return GithubPRHandler(token=gh_token, owner=owner, repo=repo)
+
+
+def post_comments(
+    output_file: str,
+    token: str | None,
+    selected_repo: str,
+    pr_number: int,
+):
+    from openhands.code_reviewer.reviewer_output import ReviewComment
+
+    """Reads review output and posts comments to the PR."""
+    logger.info(f'Reading review output from: {output_file}')
+    try:
+        with open(output_file, 'r') as f:
+            # Read the entire file content
+            file_content = f.read()
+            if not file_content:
+                logger.error(f'Output file is empty: {output_file}')
+                return
+            output_data = json.loads(file_content)
+            # Manually construct ReviewComment objects
+            comments_data = output_data.pop(
+                'comments', []
+            )  # Get comments list, remove from dict
+            comments_objects = [ReviewComment(**c) for c in comments_data]
+            # Construct ReviewerOutput, passing the objects list
+            review_output = ReviewerOutput(**output_data, comments=comments_objects)
+    except FileNotFoundError:
+        logger.error(f'Output file not found: {output_file}')
+        return
+    except json.JSONDecodeError:
+        logger.error(f'Failed to decode JSON from output file: {output_file}')
+        return
+    except Exception as e:
+        logger.error(f'Error reading or parsing output file {output_file}: {e}')
+        return
+
+    if not review_output.success:
+        logger.error(f'Review generation failed. Error: {review_output.error}')
+        # Optionally post a general failure comment? For now, just log.
+        return
+
+    if not review_output.comments:
+        logger.warning('Review was successful, but no comments were generated.')
+        # Optionally post a comment indicating review completed with no findings?
+        return
+
+    logger.info(f'Successfully parsed {len(review_output.comments)} comments.')
+
+    try:
+        owner, repo = selected_repo.split('/')
+    except ValueError:
+        logger.error(f'Invalid repository format: {selected_repo}. Use owner/repo.')
+        return
+
+    # Assume GitHub platform
+    platform = ProviderType.GITHUB
+    try:
+        pr_handler = get_pr_handler(owner, repo, token, platform)
+    except ValueError as e:  # Catch specific error from get_pr_handler
+        logger.error(f'Configuration error getting PR handler: {e}')
+        return
+
+    logger.info(
+        f'Posting {len(review_output.comments)} comments to PR #{pr_number} on {platform.value}...'
+    )
+
+    # Post comments using the handler
+    # The handler interface might need adjustment if post_review doesn't exist
+    # or takes different arguments. Assuming a method like post_review(pr_number, comments)
+    # Check if the handler has the post_review method
+    if not hasattr(pr_handler, 'post_review'):
+        logger.error(f'{type(pr_handler).__name__} does not have a post_review method.')
+        return
+    comments_to_post = review_output.comments
+    try:
+        asyncio.run(
+            pr_handler.post_review(pr_number=pr_number, comments=comments_to_post)
+        )
+    except Exception:  # Catch errors during comment posting
+        logger.exception(
+            f'Failed to post comments to PR #{pr_number}'
+        )  # Use logger.exception for stack trace
+        return  # Exit if posting fails
+
+    logger.info(f'Successfully posted comments to PR #{pr_number}.')
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Post review comments to a PR.')
+    parser.add_argument(
+        '--output-file',
+        type=str,
+        required=True,
+        help='Path to the review_output.jsonl file.',
+    )
+    parser.add_argument(
+        '--selected-repo',
+        type=str,
+        required=True,
+        help='Repository where the PR exists in the format `owner/repo`.',
+    )
+    parser.add_argument(
+        '--pr-number',
+        type=int,
+        required=True,
+        help='Pull Request number to post comments to.',
+    )
+    parser.add_argument(
+        '--token',
+        type=str,
+        default=None,
+        help='Platform token (GitHub PAT or GitLab access token). Reads from env vars (GITHUB_TOKEN/GITLAB_TOKEN) if not provided.',
+    )
+
+    args = parser.parse_args()
+
+    post_comments(
+        output_file=args.output_file,
+        token=args.token,
+        selected_repo=args.selected_repo,
+        pr_number=args.pr_number,
+    )
+
+
+if __name__ == '__main__':
+    main()
diff --git a/openhands/code_reviewer/prompts/review/basic-review.jinja b/openhands/code_reviewer/prompts/review/basic-review.jinja
@@ -0,0 +1,56 @@
+You are an AI code reviewer. Your task is to review the following pull request for the repository located in /workspace.
+An environment with the repository checked out at the PR's head commit is available for you to analyze the code.
+
+# Pull Request Details
+Title: {{ pr_data.title }}
+{% if pr_data.body %}
+Body:
+{{ pr_data.body }}
+{% endif %}
+
+# Review Task
+First, ensure the latest changes are fetched using `git fetch origin`. Then, analyze the code changes between the base branch (`{{ pr_data['base']['ref'] }}`) and the head branch (`{{ pr_data['head']['ref'] }}`) using git commands (e.g., `git diff origin/{{ pr_data['base']['ref'] }}...origin/{{ pr_data['head']['ref'] }}`). Base your review on the following parameters:
+- Review Level: `{{ review_level }}` (Specifies the granularity: 'line' for specific lines, 'file' for overall file changes, 'pr' for a high-level summary)
+- Review Depth: `{{ review_depth }}` (Specifies the thoroughness: 'quick' for obvious issues, 'medium' for standard checks, 'deep' for in-depth analysis including potential bugs and security concerns)
+**It is crucial that you strictly adhere to the specified Review Level and Review Depth.**
+
+When reviewing changes:
+- Identify the file path from the `git diff` output (usually the line starting with `+++ b/`). This is the path in the head commit.
+- Comments should only be placed on lines that exist in the head commit (lines starting with `+` or ` ` within the diff hunk). Do not comment on removed lines (starting with `-`).
+    - **Check Existing Comments:** Before adding a comment, consider if similar feedback has already been provided by other reviewers on this PR. Avoid adding duplicate comments.
+    - **Write Actionable Feedback:** Ensure your comments are constructive and actionable. Suggest specific improvements or changes rather than just explaining what the code does.
+- **Line Number Determination and Verification:** Line numbers for comments MUST be determined **exclusively** by reading the file content from the `/workspace` directory (which is checked out to the head commit). Use `cat <file_path>` to get the content and `grep -n` (or manual counting in the `cat` output) to find the line number of the *exact* code you want to comment on. **Do NOT use the `git diff` output to determine or calculate line numbers.** This is unreliable. The line number reported by `grep -n` on the `/workspace` file is the **only** valid source for the `line` field in your comment. If the exact line text appears multiple times, use the surrounding code context (from the `cat` output) to identify the correct instance. The line numbers you provide MUST correspond to the line numbers in the head commit version.
+
+
+{% if repo_instruction %}
+# Repository Guidelines/Instructions
+Please also consider the following repository-specific guidelines during your review:
+{{ repo_instruction }}
+{% endif %}
+
+# Output Format
+Your final action **MUST** be the `finish` tool call.
+- The `message` argument of this tool call **MUST** contain **ONLY** a single, raw JSON list containing review comment objects. It must NOT contain any other text, explanations, or markdown formatting.
+- You can include any summary or explanation of your review process in the `thought` that accompanies the `finish` tool call.
+
+Each comment object in the JSON list should have the following structure:
+- `path`: (string) The full path to the file being commented on, relative to the repository root (e.g., "openhands/core/config.py").
+- `comment`: (string) The text of your review comment.
+    - `line`: (integer) The line number in the *head commit version* of the file the comment refers to, determined and verified as described above. Required for all comments unless `review_level` is 'pr'.
+    - `code_snippet`: (string) The exact line(s) of code from the head commit version that the comment refers to. Include 1-3 lines for context if helpful. Required for all comments unless `review_level` is 'pr'.
+    - `line_number_justification`: (string) A brief explanation of how the `line` number was determined using file content verification (e.g., "Verified line 42 with `grep -n 'exact code line' /workspace/src/utils/parser.py`"). Required for all comments unless `review_level` is 'pr'.
+
+Example structure of the JSON list (this exact string goes into the `message` argument of the `finish` tool call):
+`[{"path": "src/utils/parser.py", "line": 42, "code_snippet": "... code line(s) ...", "line_number_justification": "...", "comment": "..."}, {"path": "src/main.py", "comment": "..."}]`
+
+IMPORTANT:
+- Focus your review on the changes between the base branch (`{{ pr_data['base']['ref'] }}`) and the head branch (`{{ pr_data['head']['ref'] }}`).
+- Adhere strictly to the specified JSON output format for your final response.
+- The `message` argument of your `finish` tool call **MUST** contain **ONLY** the raw JSON list string. No extra text, no markdown.
+- Any explanatory text belongs in the accompanying `thought`, NOT the `message` argument.
+- Do NOT attempt to modify any files. Your role is only to review.
+- Do NOT ask for human help or clarification. Provide the review based on the information given.
+- Use the `finish` tool call with the JSON review in the `message` argument as your **very last step** to signal completion.
+- If no issues are found, the `message` argument should contain the exact string `[]`.
+    - You are running in a non-interactive environment. Do NOT ask questions or wait for user input. Proceed directly to the `finish` action with the JSON review when your analysis is complete.
+    - Do not use any other action other than `finish` to output the review.