22import os
33import sys
44import time
5+ import io
56from dataclasses import asdict
67from glob import glob
8+ from io import BytesIO
79from pathlib import PurePath
8- from typing import BinaryIO , Dict , List , Tuple , Set
10+ from typing import BinaryIO , Dict , List , Tuple , Set , Union
911import re
1012from socketdev import socketdev
1113from socketdev .exceptions import APIFailure
2426 Purl
2527)
2628from socketsecurity .core .exceptions import APIResourceNotFound
27- from socketsecurity .core .licenses import Licenses
2829from .socket_config import SocketConfig
2930from .utils import socket_globs
3031import importlib
@@ -186,6 +187,7 @@ def find_files(self, path: str) -> List[str]:
186187 for ecosystem in patterns :
187188 if ecosystem in self .config .excluded_ecosystems :
188189 continue
190+ log .info (f'Scanning ecosystem: { ecosystem } ' )
189191 ecosystem_patterns = patterns [ecosystem ]
190192 for file_name in ecosystem_patterns :
191193 original_pattern = ecosystem_patterns [file_name ]["pattern" ]
@@ -208,7 +210,7 @@ def find_files(self, path: str) -> List[str]:
208210 glob_end = time .time ()
209211 log .debug (f"Globbing took { glob_end - glob_start :.4f} seconds" )
210212
211- log .debug (f"Total files found: { len (files )} " )
213+ log .info (f"Total files found: { len (files )} " )
212214 return sorted (files )
213215
214216 def get_supported_patterns (self ) -> Dict :
@@ -278,6 +280,14 @@ def to_case_insensitive_regex(input_string: str) -> str:
278280 """
279281 return '' .join (f'[{ char .lower ()} { char .upper ()} ]' if char .isalpha () else char for char in input_string )
280282
283+ @staticmethod
284+ def empty_head_scan_file () -> list [tuple [str , tuple [str , Union [BinaryIO , BytesIO ]]]]:
285+ # Create an empty file for when no head full scan so that the diff endpoint can always be used
286+ empty_file_obj = io .BytesIO (b"" )
287+ empty_filename = "initial_head_scan"
288+ empty_full_scan_file = [(empty_filename , (empty_filename , empty_file_obj ))]
289+ return empty_full_scan_file
290+
281291 @staticmethod
282292 def load_files_for_sending (files : List [str ], workspace : str ) -> List [Tuple [str , Tuple [str , BinaryIO ]]]:
283293 """
@@ -311,7 +321,7 @@ def load_files_for_sending(files: List[str], workspace: str) -> List[Tuple[str,
311321
312322 return send_files
313323
314- def create_full_scan (self , files : List [ str ], params : FullScanParams , has_head_scan : bool = False ) -> FullScan :
324+ def create_full_scan (self , files : list [ tuple [ str , tuple [ str , BytesIO ]]], params : FullScanParams ) -> FullScan :
315325 """
316326 Creates a new full scan via the Socket API.
317327
@@ -322,7 +332,7 @@ def create_full_scan(self, files: List[str], params: FullScanParams, has_head_sc
322332 Returns:
323333 FullScan object with scan results
324334 """
325- log .debug ("Creating new full scan" )
335+ log .info ("Creating new full scan" )
326336 create_full_start = time .time ()
327337
328338 res = self .sdk .fullscans .post (files , params , use_types = True )
@@ -331,16 +341,60 @@ def create_full_scan(self, files: List[str], params: FullScanParams, has_head_sc
331341 raise Exception (f"Error creating full scan: { res .message } , status: { res .status } " )
332342
333343 full_scan = FullScan (** asdict (res .data ))
334- if not has_head_scan :
335- full_scan .sbom_artifacts = self .get_sbom_data (full_scan .id )
336- full_scan .packages = self .create_packages_dict (full_scan .sbom_artifacts )
337-
338344 create_full_end = time .time ()
339345 total_time = create_full_end - create_full_start
340346 log .debug (f"New Full Scan created in { total_time :.2f} seconds" )
341347
342348 return full_scan
343349
350+ def check_full_scans_status (self , head_full_scan_id : str , new_full_scan_id : str ) -> bool :
351+ is_ready = False
352+ current_timeout = self .config .timeout
353+ self .sdk .set_timeout (0.5 )
354+ try :
355+ self .sdk .fullscans .stream (self .config .org_slug , head_full_scan_id )
356+ except Exception :
357+ log .debug (f"Queued up full scan for processing ({ head_full_scan_id } )" )
358+
359+ try :
360+ self .sdk .fullscans .stream (self .config .org_slug , new_full_scan_id )
361+ except Exception :
362+ log .debug (f"Queued up full scan for processing ({ new_full_scan_id } )" )
363+ self .sdk .set_timeout (current_timeout )
364+ start_check = time .time ()
365+ head_is_ready = False
366+ new_is_ready = False
367+ while not is_ready :
368+ head_full_scan_metadata = self .sdk .fullscans .metadata (self .config .org_slug , head_full_scan_id )
369+ if head_full_scan_metadata :
370+ head_state = head_full_scan_metadata .get ("scan_state" )
371+ else :
372+ head_state = None
373+ new_full_scan_metadata = self .sdk .fullscans .metadata (self .config .org_slug , new_full_scan_id )
374+ if new_full_scan_metadata :
375+ new_state = new_full_scan_metadata .get ("scan_state" )
376+ else :
377+ new_state = None
378+ if head_state and head_state == "resolve" :
379+ head_is_ready = True
380+ if new_state and new_state == "resolve" :
381+ new_is_ready = True
382+ if head_is_ready and new_is_ready :
383+ is_ready = True
384+ current_time = time .time ()
385+ if current_time - start_check >= self .config .timeout :
386+ log .debug (
387+ f"Timeout reached while waiting for full scans to be ready "
388+ f"({ head_full_scan_id } , { new_full_scan_id } )"
389+ )
390+ break
391+ total_time = time .time () - start_check
392+ if is_ready :
393+ log .info (f"Full scans are ready in { total_time :.2f} seconds" )
394+ else :
395+ log .warning (f"Full scans are not ready yet ({ head_full_scan_id } , { new_full_scan_id } )" )
396+ return is_ready
397+
344398 def get_full_scan (self , full_scan_id : str ) -> FullScan :
345399 """
346400 Get a FullScan object for an existing full scan including sbom_artifacts and packages.
@@ -403,14 +457,9 @@ def get_package_license_text(self, package: Package) -> str:
403457 return ""
404458
405459 license_raw = package .license
406- all_licenses = Licenses ()
407- license_str = Licenses .make_python_safe (license_raw )
408-
409- if license_str is not None and hasattr (all_licenses , license_str ):
410- license_obj = getattr (all_licenses , license_str )
411- return license_obj .licenseText
412-
413- return ""
460+ data = self .sdk .licensemetadata .post ([license_raw ], {'includetext' : 'true' })
461+ license_str = data .data [0 ].license if data and len (data ) == 1 else ""
462+ return license_str
414463
415464 def get_repo_info (self , repo_slug : str , default_branch : str = "socket-default-branch" ) -> RepositoryInfo :
416465 """
@@ -485,7 +534,7 @@ def update_package_values(pkg: Package) -> Package:
485534 pkg .url += f"/{ pkg .name } /overview/{ pkg .version } "
486535 return pkg
487536
488- def get_added_and_removed_packages (self , head_full_scan_id : str , new_full_scan : FullScan ) -> Tuple [Dict [str , Package ], Dict [str , Package ]]:
537+ def get_added_and_removed_packages (self , head_full_scan_id : str , new_full_scan_id : str ) -> Tuple [Dict [str , Package ], Dict [str , Package ]]:
489538 """
490539 Get packages that were added and removed between scans.
491540
@@ -496,14 +545,11 @@ def get_added_and_removed_packages(self, head_full_scan_id: str, new_full_scan:
496545 Returns:
497546 Tuple of (added_packages, removed_packages) dictionaries
498547 """
499- if head_full_scan_id is None :
500- log .info (f"No head scan found. New scan ID: { new_full_scan .id } " )
501- return new_full_scan .packages , {}
502548
503- log .info (f"Comparing scans - Head scan ID: { head_full_scan_id } , New scan ID: { new_full_scan . id } " )
549+ log .info (f"Comparing scans - Head scan ID: { head_full_scan_id } , New scan ID: { new_full_scan_id } " )
504550 diff_start = time .time ()
505551 try :
506- diff_report = self .sdk .fullscans .stream_diff (self .config .org_slug , head_full_scan_id , new_full_scan . id , use_types = True ).data
552+ diff_report = self .sdk .fullscans .stream_diff (self .config .org_slug , head_full_scan_id , new_full_scan_id , use_types = True ).data
507553 except APIFailure as e :
508554 log .error (f"API Error: { e } " )
509555 sys .exit (1 )
@@ -572,22 +618,27 @@ def create_new_diff(
572618 # Find manifest files
573619 files = self .find_files (path )
574620 files_for_sending = self .load_files_for_sending (files , path )
575- has_head_scan = False
576621 if not files :
577622 return Diff (id = "no_diff_id" )
578623
579624 try :
580625 # Get head scan ID
581626 head_full_scan_id = self .get_head_scan_for_repo (params .repo )
582- if head_full_scan_id is not None :
583- has_head_scan = True
584627 except APIResourceNotFound :
585628 head_full_scan_id = None
586629
630+ if head_full_scan_id is None :
631+ tmp_params = params
632+ tmp_params .tmp = True
633+ tmp_params .set_as_pending_head = False
634+ tmp_params .make_default_branch = False
635+ head_full_scan = self .create_full_scan (Core .empty_head_scan_file (), params )
636+ head_full_scan_id = head_full_scan .id
637+
587638 # Create new scan
588639 try :
589640 new_scan_start = time .time ()
590- new_full_scan = self .create_full_scan (files_for_sending , params , has_head_scan )
641+ new_full_scan = self .create_full_scan (files_for_sending , params )
591642 new_full_scan .sbom_artifacts = self .get_sbom_data (new_full_scan .id )
592643 new_scan_end = time .time ()
593644 log .info (f"Total time to create new full scan: { new_scan_end - new_scan_start :.2f} " )
@@ -600,7 +651,10 @@ def create_new_diff(
600651 log .error (f"Stack trace:\n { traceback .format_exc ()} " )
601652 raise
602653
603- added_packages , removed_packages = self .get_added_and_removed_packages (head_full_scan_id , new_full_scan )
654+ scans_ready = self .check_full_scans_status (head_full_scan_id , new_full_scan .id )
655+ if scans_ready is False :
656+ log .error (f"Full scans did not complete within { self .config .timeout } seconds" )
657+ added_packages , removed_packages = self .get_added_and_removed_packages (head_full_scan_id , new_full_scan .id )
604658
605659 diff = self .create_diff_report (added_packages , removed_packages )
606660
0 commit comments