Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions vulnerabilities/improvers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
from vulnerabilities.pipelines import flag_ghost_packages
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
from vulnerabilities.pipelines import remove_duplicate_advisories
from vulnerabilities.pipelines.v2_improvers import (
collect_commits_project_kb as collect_commits_project_kb_v2,
)
from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2
from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
from vulnerabilities.pipelines.v2_improvers import (
Expand Down Expand Up @@ -68,5 +71,6 @@
compute_version_rank_v2.ComputeVersionRankPipeline,
compute_advisory_todo_v2.ComputeToDo,
compute_advisory_todo.ComputeToDo,
collect_commits_project_kb_v2.CollectFixCommitsProjectKBPipeline,
]
)
120 changes: 120 additions & 0 deletions vulnerabilities/pipelines/v2_improvers/collect_commits_project_kb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# VulnerableCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import csv
from pathlib import Path

import saneyaml
from fetchcode.vcs import fetch_via_vcs

from vulnerabilities.models import AdvisoryV2
from vulnerabilities.models import CodeFixV2
from vulnerabilities.pipelines import VulnerableCodePipeline


class CollectFixCommitsProjectKBPipeline(VulnerableCodePipeline):
"""
Pipeline to collect fix commits from Project KB:
https://github.com/SAP/project-kb/blob/main/MSR2019/dataset/vulas_db_msr2019_release.csv
https://github.com/SAP/project-kb/blob/vulnerability-data/statements/*/*.yaml
"""

pipeline_id = "kb_project_fix_commits"
spdx_license_expression = "Apache-2.0"
license_url = "https://github.com/SAP/project-kb/blob/main/LICENSE.txt"
importer_name = "Project KB Importer"
qualified_name = "kb_project_fix_commits"
repo_url_vulnerability_data = "git+https://github.com/SAP/project-kb@vulnerability-data"
repo_url_main = "git+https://github.com/SAP/project-kb"

@classmethod
def steps(cls):
return (cls.collect_fix_commits,)

def collect_fix_commits(self):
self.vcs_response_main = fetch_via_vcs(self.repo_url_main)
self.vcs_response_vuln_data = fetch_via_vcs(self.repo_url_vulnerability_data)

self.log(f"Processing ProjectKBP fix commits.")
csv_database_filepath = (
Path(self.vcs_response_main.dest_dir) / "MSR2019/dataset/vulas_db_msr2019_release.csv"
)
try:
with open(csv_database_filepath, mode="r", newline="", encoding="utf-8") as f:
reader = csv.reader(f)
next(reader, None) # Skip header row
for row in reader:
if len(row) != 4:
continue
vulnerability_id, repo_url, commit_hash, label = row

if not vulnerability_id:
continue

try:
advisory = AdvisoryV2.objects.get(advisory_id=vulnerability_id)
except AdvisoryV2.DoesNotExist:
self.log(f"Can't find vulnerability_id: {vulnerability_id}")
continue

self.create_codefix_entries(advisory, repo_url, commit_hash, vulnerability_id)
except FileNotFoundError:
self.log(f"CSV file not found: {csv_database_filepath}")

base_path = Path(self.vcs_response_vuln_data.dest_dir) / "statements"
for file_path in base_path.rglob("*.yaml"):
if file_path.name != "statement.yaml":
continue

with open(file_path) as f:
vulnerability_fixes_data = saneyaml.load(f)

vulnerability_id = vulnerability_fixes_data.get("vulnerability_id")
if not vulnerability_id:
continue

try:
advisory = AdvisoryV2.objects.get(advisory_id=vulnerability_id)
except AdvisoryV2.DoesNotExist:
self.log(f"Can't find vulnerability_id: {vulnerability_id}")
continue

for commit_data in vulnerability_fixes_data.get("fixes", []):
for commit in commit_data.get("commits", []):
commit_id = commit.get("id")
repo_url = commit.get("repository")

if not commit_id or not repo_url:
continue

self.create_codefix_entries(advisory, repo_url, commit_id, vulnerability_id)

def create_codefix_entries(self, advisory, repo_url, commit_id, vulnerability_id):
repo_url = repo_url.rstrip("/").removesuffix(".git")
vcs_url = f"{repo_url}/commit/{commit_id}"

for impact in advisory.impacted_packages.all():
for package in impact.affecting_packages.all():
code_fix, created = CodeFixV2.objects.get_or_create(
commits=[vcs_url],
advisory=advisory,
affected_package=package,
)
if created:
self.log(
f"Created CodeFix entry for vulnerability_id: {vulnerability_id} with VCS URL {vcs_url}"
)

def clean_downloads(self):
if self.vcs_response_main or self.vcs_response_vuln_data:
self.log(f"Removing cloned repository")
self.vcs_response_main.delete()
self.vcs_response_vuln_data.delete()

def on_failure(self):
self.clean_downloads()
Loading