88#
99
1010import os
11- import json
12- import attr
1311import fnmatch
12+ import pickle
13+ import multiregex
1414
15- from commoncode . fileutils import create_dir
15+ import attr
1616
17+ from commoncode .fileutils import create_dir
1718from packagedcode import APPLICATION_PACKAGE_DATAFILE_HANDLERS
1819from packagedcode import SYSTEM_PACKAGE_DATAFILE_HANDLERS
1920
2930# global in-memory cache of the PkgManifestPatternsCache
3031_PACKAGE_CACHE = None
3132
33+ # This is the Pickle protocol we use, which was added in Python 3.4.
34+ PICKLE_PROTOCOL = 4
35+
3236PACKAGE_INDEX_LOCK_TIMEOUT = 60 * 6
3337PACKAGE_INDEX_DIR = 'package_patterns_index'
3438PACKAGE_INDEX_FILENAME = 'index_cache'
@@ -45,23 +49,21 @@ class PkgManifestPatternsCache:
4549 """
4650
4751 handler_by_regex = attr .ib (default = attr .Factory (dict ))
48- system_multiregex_patterns = attr .ib (default = attr .Factory (list ))
49- application_multiregex_patterns = attr .ib (default = attr .Factory (list ))
52+ system_package_matcher = attr .ib (default = None )
53+ application_package_matcher = attr .ib (default = None )
54+ all_package_matcher = attr .ib (default = None )
5055
5156 @staticmethod
52- def all_multiregex_patterns (self ):
53- return self . application_multiregex_patterns + [
57+ def all_multiregex_patterns (application_multiregex_patterns , system_multiregex_patterns ):
58+ return application_multiregex_patterns + [
5459 multiregex_pattern
55- for multiregex_pattern in self . system_multiregex_patterns
56- if multiregex_pattern not in self . application_multiregex_patterns
60+ for multiregex_pattern in system_multiregex_patterns
61+ if multiregex_pattern not in application_multiregex_patterns
5762 ]
5863
5964 @classmethod
60- def from_mapping (cls , cache_mapping ):
61- return cls (** cache_mapping )
62-
63- @staticmethod
6465 def load_or_build (
66+ cls ,
6567 packagedcode_cache_dir = packagedcode_cache_dir ,
6668 scancode_cache_dir = scancode_cache_dir ,
6769 force = False ,
@@ -94,7 +96,6 @@ def load_or_build(
9496 print (str (e ))
9597 print (traceback .format_exc ())
9698
97-
9899 from scancode import lockfile
99100 lock_file = os .path .join (scancode_cache_dir , PACKAGE_LOCKFILE_NAME )
100101
@@ -109,29 +110,31 @@ def load_or_build(
109110 application_multiregex_patterns , application_handlers_by_regex = build_mappings_and_multiregex_patterns (
110111 datafile_handlers = application_package_datafile_handlers ,
111112 )
112- package_cache = PkgManifestPatternsCache (
113+ all_multiregex_matcher = PkgManifestPatternsCache .all_multiregex_patterns (
114+ application_multiregex_patterns , system_multiregex_patterns ,
115+ )
116+ system_package_matcher = multiregex .RegexMatcher (system_multiregex_patterns )
117+ application_package_matcher = multiregex .RegexMatcher (application_multiregex_patterns )
118+ all_package_matcher = multiregex .RegexMatcher (all_multiregex_matcher )
119+ package_cache = cls (
113120 handler_by_regex = system_handlers_by_regex | application_handlers_by_regex ,
114- system_multiregex_patterns = system_multiregex_patterns ,
115- application_multiregex_patterns = application_multiregex_patterns ,
121+ system_package_matcher = system_package_matcher ,
122+ application_package_matcher = application_package_matcher ,
123+ all_package_matcher = all_package_matcher ,
116124 )
117125 package_cache .dump (cache_file )
118126 return package_cache
119127
120128 except lockfile .LockTimeout :
121129 # TODO: handle unable to lock in a nicer way
122- raise
130+ raise
123131
124132 def dump (self , cache_file ):
125133 """
126- Dump this package cache on disk at ``cache_file``.
134+ Dump this license cache on disk at ``cache_file``.
127135 """
128- package_cache = {
129- "handler_by_regex" : self .handler_by_regex ,
130- "system_multiregex_patterns" : self .system_multiregex_patterns ,
131- "application_multiregex_patterns" : self .application_multiregex_patterns ,
132- }
133- with open (cache_file , 'w' ) as f :
134- json .dump (package_cache , f )
136+ with open (cache_file , 'wb' ) as fn :
137+ pickle .dump (self , fn , protocol = PICKLE_PROTOCOL )
135138
136139
137140def get_prematchers_from_glob_pattern (pattern ):
@@ -203,20 +206,16 @@ def get_cache(
203206
204207def load_cache_file (cache_file ):
205208 """
206- Return a PkgManifestPatternsCache loaded from JSON ``cache_file``.
209+ Return a PkgManifestPatternsCache loaded from ``cache_file``.
207210 """
208- with open (cache_file ) as f :
209- cache = json .load (f )
210-
211- # convert multiregex patterns from list to tuples while loading
212- cache_transformed = {"handler_by_regex" : cache .get ("handler_by_regex" )}
213- cache_transformed ["system_multiregex_patterns" ] = [
214- tuple (multiregex_pattern )
215- for multiregex_pattern in cache .get ("system_multiregex_patterns" )
216- ]
217- cache_transformed ["application_multiregex_patterns" ] = [
218- tuple (multiregex_pattern )
219- for multiregex_pattern in cache .get ("application_multiregex_patterns" )
220- ]
221-
222- return PkgManifestPatternsCache .from_mapping (cache_transformed )
211+ with open (cache_file , 'rb' ) as lfc :
212+ try :
213+ return pickle .load (lfc )
214+ except Exception as e :
215+ msg = (
216+ 'ERROR: Failed to load package cache (the file may be corrupted ?).\n '
217+ f'Please delete "{ cache_file } " and retry.\n '
218+ 'If the problem persists, copy this error message '
219+ 'and submit a bug report at https://github.com/nexB/scancode-toolkit/issues/'
220+ )
221+ raise Exception (msg ) from e
0 commit comments