Skip to content

Commit a99c4f8

Browse files
Only scan for bianry packages optionally
Introduce a new option --binary-packages which looks for package/dependency data in binaries. Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent e0460ef commit a99c4f8

File tree

7 files changed

+84
-32
lines changed

7 files changed

+84
-32
lines changed

docs/source/rst_snippets/basic_options.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ documenting a program's options. For example:
3333
--system-package Scan ``<input>`` for installed system package
3434
databases.
3535

36+
-b, --binary-package Scan <input> for package and dependency related
37+
data in binaries. Note that looking for packages
38+
in binaries makes package scan slower.
39+
Currently supported binaries: Go, Rust.
40+
3641
--package-only Scan ``<input>`` for system and application
3742
only for package metadata, without license/
3843
copyright detection and package assembly.

src/packagedcode/__init__.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -251,32 +251,27 @@
251251
# detect these binaries instead of datafile path patterns
252252
# as these are optionally installed, we can skip checking
253253
# for filetype if these are not available
254-
BINARY_HANDLERS_PRESENT = False
255254
BINARY_PACKAGE_DATAFILE_HANDLERS = []
256255

257256
try:
258257
from go_inspector.binary import get_go_binary_handler
259258
handler = get_go_binary_handler()
260-
APPLICATION_PACKAGE_DATAFILE_HANDLERS.append(handler)
261259
BINARY_PACKAGE_DATAFILE_HANDLERS.append(handler)
262-
BINARY_HANDLERS_PRESENT = True
263260
except ImportError:
264261
pass
265262

266263
try:
267264
from rust_inspector.packages import get_rust_binary_handler
268265
handler = get_rust_binary_handler()
269-
APPLICATION_PACKAGE_DATAFILE_HANDLERS.append(handler)
270266
BINARY_PACKAGE_DATAFILE_HANDLERS.append(handler)
271-
BINARY_HANDLERS_PRESENT = True
272267
except ImportError:
273268
pass
274269

275270
ALL_DATAFILE_HANDLERS = (
276271
APPLICATION_PACKAGE_DATAFILE_HANDLERS + [
277272
p for p in SYSTEM_PACKAGE_DATAFILE_HANDLERS
278273
if p not in APPLICATION_PACKAGE_DATAFILE_HANDLERS
279-
]
274+
] + BINARY_PACKAGE_DATAFILE_HANDLERS
280275
)
281276

282277
# registry of all handler classes keyed by datasource_id

src/packagedcode/plugin_package.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,20 @@ class PackageScanner(ScanPlugin):
170170
help_group=SCAN_GROUP,
171171
sort_order=21,
172172
),
173+
PluggableCommandLineOption(
174+
(
175+
'-b',
176+
'--binary-package',
177+
),
178+
is_flag=True,
179+
default=False,
180+
help=(
181+
'Scan <input> for package and dependency related data in binaries. '
182+
'Currently supported binaries: Go, Rust.'
183+
),
184+
help_group=SCAN_GROUP,
185+
sort_order=22,
186+
),
173187
PluggableCommandLineOption(
174188
(
175189
'--package-only',
@@ -182,7 +196,7 @@ class PackageScanner(ScanPlugin):
182196
'license/copyright detection and top-level package creation.'
183197
),
184198
help_group=SCAN_GROUP,
185-
sort_order=22,
199+
sort_order=23,
186200
),
187201
PluggableCommandLineOption(
188202
('--list-packages',),
@@ -195,10 +209,17 @@ class PackageScanner(ScanPlugin):
195209
),
196210
]
197211

198-
def is_enabled(self, package, system_package, package_only, **kwargs):
199-
return package or system_package or package_only
212+
def is_enabled(self, package, system_package, binary_package, package_only, **kwargs):
213+
return package or system_package or binary_package or package_only
200214

201-
def get_scanner(self, package=True, system_package=False, package_only=False, **kwargs):
215+
def get_scanner(
216+
self,
217+
package=True,
218+
system_package=False,
219+
binary_package=False,
220+
package_only=False,
221+
**kwargs
222+
):
202223
"""
203224
Return a scanner callable to scan a file for package data.
204225
"""
@@ -208,6 +229,7 @@ def get_scanner(self, package=True, system_package=False, package_only=False, **
208229
get_package_data,
209230
application=package,
210231
system=system_package,
232+
binary=binary_package,
211233
package_only=package_only,
212234
)
213235

@@ -464,7 +486,7 @@ def get_package_and_deps(codebase, package_adder=add_to_package, strip_root=Fals
464486
resource.scan_errors.append(msg)
465487
resource.save(codebase)
466488

467-
if TRACE:
489+
if TRACE_ASSEMBLY:
468490
raise Exception(msg) from e
469491

470492
return packages, dependencies

src/packagedcode/recognize.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from commoncode.fileutils import as_posixpath
1515

1616
from packagedcode import HANDLER_BY_DATASOURCE_ID
17-
from packagedcode import BINARY_HANDLERS_PRESENT
1817
from packagedcode import BINARY_PACKAGE_DATAFILE_HANDLERS
1918
from packagedcode import models
2019
from packagedcode.cache import get_cache
@@ -47,6 +46,7 @@ def recognize_package_data(
4746
location,
4847
application=True,
4948
system=False,
49+
binary=False,
5050
package_only=False,
5151
):
5252
"""
@@ -61,16 +61,18 @@ def recognize_package_data(
6161

6262
return list(_parse(
6363
location=location,
64-
package_only=package_only,
6564
application=application,
6665
system=system,
66+
binary=binary,
67+
package_only=package_only,
6768
))
6869

6970

7071
def _parse(
7172
location,
7273
application=True,
7374
system=False,
75+
binary=False,
7476
package_only=False,
7577
):
7678
"""
@@ -83,30 +85,39 @@ def _parse(
8385
package_path = as_posixpath(location)
8486
package_patterns = get_cache()
8587

86-
assert application or system or package_only
88+
has_patterns = application or system or package_only
89+
assert has_patterns or binary
8790
if package_only or (application and system):
8891
package_matcher = package_patterns.all_package_matcher
8992
elif application:
9093
package_matcher = package_patterns.application_package_matcher
9194
elif system:
9295
package_matcher = package_patterns.system_package_matcher
9396

94-
matched_patterns = package_matcher.match(package_path)
97+
matched_patterns = []
98+
if has_patterns:
99+
matched_patterns = package_matcher.match(package_path)
95100

96-
datafile_handlers = []
101+
all_handler_ids = []
97102
for matched_pattern in matched_patterns:
98103
regex, _match = matched_pattern
99104
handler_ids = package_patterns.handler_by_regex.get(regex.pattern)
100105
if TRACE:
101106
logger_debug(f'_parse:.handler_ids: {handler_ids}')
102107

103-
datafile_handlers.extend([
104-
HANDLER_BY_DATASOURCE_ID.get(handler_id)
108+
all_handler_ids.extend([
109+
handler_id
105110
for handler_id in handler_ids
111+
if handler_id not in all_handler_ids
106112
])
107113

114+
datafile_handlers = [
115+
HANDLER_BY_DATASOURCE_ID.get(handler_id)
116+
for handler_id in all_handler_ids
117+
]
118+
108119
if not datafile_handlers:
109-
if BINARY_HANDLERS_PRESENT:
120+
if binary:
110121
datafile_handlers.extend(BINARY_PACKAGE_DATAFILE_HANDLERS)
111122
elif TRACE:
112123
logger_debug(f'_parse: no package datafile detected at {package_path}')

src/scancode/api.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -256,20 +256,28 @@ def get_licenses(
256256
SCANCODE_DEBUG_PACKAGE_API = os.environ.get('SCANCODE_DEBUG_PACKAGE_API', False)
257257

258258

259-
def _get_package_data(location, application=True, system=False, package_only=False, **kwargs):
259+
def _get_package_data(
260+
location,
261+
application=True,
262+
system=False,
263+
binary=False,
264+
package_only=False,
265+
**kwargs
266+
):
260267
"""
261268
Return a mapping of package manifest information detected in the file at ``location``.
262269
Include ``application`` packages (such as pypi) and/or ``system`` packages.
263270
Note that all exceptions are caught if there are any errors while parsing a
264271
package manifest.
265272
"""
266-
assert application or system or package_only
273+
assert application or system or binary or package_only
267274
from packagedcode.recognize import recognize_package_data
268275
try:
269276
return recognize_package_data(
270277
location=location,
271278
application=application,
272279
system=system,
280+
binary=binary,
273281
package_only=package_only,
274282
) or []
275283

@@ -300,7 +308,14 @@ def get_package_info(location, **kwargs):
300308
return dict(packages=[p.to_dict() for p in packages])
301309

302310

303-
def get_package_data(location, application=True, system=False, package_only=False, **kwargs):
311+
def get_package_data(
312+
location,
313+
application=True,
314+
system=False,
315+
binary=False,
316+
package_only=False,
317+
**kwargs
318+
):
304319
"""
305320
Return a mapping of package manifest information detected in the file at
306321
`location`.
@@ -313,6 +328,7 @@ def get_package_data(location, application=True, system=False, package_only=Fals
313328
location=location,
314329
application=application,
315330
system=system,
331+
binary=binary,
316332
package_only=package_only,
317333
**kwargs,
318334
) or []

tests/packagedcode/test_cargo.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def test_scan_works_on_rust_binary_with_inspector(self):
159159
test_file = self.get_test_loc('cargo/binary/cargo_dependencies')
160160
expected_file = self.get_test_loc('cargo/binary/cargo-binary.expected.json')
161161
result_file = self.get_temp_file('results.json')
162-
run_scan_click(['--package', test_file, '--json', result_file])
162+
run_scan_click(['--binary-package', test_file, '--json', result_file])
163163
check_json_scan(
164164
expected_file, result_file, remove_uuid=True, regen=REGEN_TEST_FIXTURES
165165
)

tests/scancode/data/help/help_linux.txt

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,18 @@ Usage: scancode [OPTIONS] <OUTPUT FORMAT OPTION(s)> <input>...
88
Options:
99

1010
primary scans:
11-
-l, --license Scan <input> for licenses.
12-
-p, --package Scan <input> for application package and dependency
13-
manifests, lockfiles and related data.
14-
--system-package Scan <input> for installed system package databases.
15-
--package-only Scan for system and application package data and skip
16-
license/copyright detection and top-level package creation.
17-
-c, --copyright Scan <input> for copyrights.
18-
--go-symbol Collect Go symbols.
19-
--rust-symbol Collect Rust symbols from rust binaries.
11+
-l, --license Scan <input> for licenses.
12+
-p, --package Scan <input> for application package and dependency
13+
manifests, lockfiles and related data.
14+
--system-package Scan <input> for installed system package databases.
15+
-b, --binary-package Scan <input> for package and dependency related data in
16+
binaries. Currently supported binaries: Go, Rust.
17+
--package-only Scan for system and application package data and skip
18+
license/copyright detection and top-level package
19+
creation.
20+
-c, --copyright Scan <input> for copyrights.
21+
--go-symbol Collect Go symbols.
22+
--rust-symbol Collect Rust symbols from rust binaries.
2023

2124
other scans:
2225
-i, --info Scan <input> for file information (size, checksums, etc).

0 commit comments

Comments
 (0)