Skip to content

Commit 05942b2

Browse files
committed
Collect and handle the "matched_text" from the --license-text option from the scancode-toolkit
Signed-off-by: Chin Yeung Li <[email protected]>
1 parent 373ab07 commit 05942b2

File tree

6 files changed

+44
-12
lines changed

6 files changed

+44
-12
lines changed

CHANGELOG.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
==============================
22
Changelog
33
==============================
4-
2022-09-21
4+
2022-xx-xx
55
Release 7.1.0
66

77
* Fixed version mismatch (https://github.com/nexB/aboutcode-toolkit/issues/510)
88
* Improve `check` performance (https://github.com/nexB/aboutcode-toolkit/issues/511)
99
* Relax the requirement to have the same format for input and output for `transform`
10+
* Collect and handle the "matched_text" from the `--license-text` option from the scancode-toolkit
1011

1112

1213
2022-03-21

src/attributecode/attrib.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -114,41 +114,69 @@ def generate(abouts, is_about_input, license_dict, scancode, min_license_score,
114114
if scancode:
115115
meet_score_licenses_list = []
116116
for about in abouts:
117+
# See if the input has 'matched_text'
118+
matched_text_exist = False
119+
try:
120+
if about.matched_text:
121+
matched_text_exist = True
122+
except:
123+
pass
117124
# We will use a dictionary to keep the unique license key
118125
# which the dictionary key is the license key and the dictionary value
119-
# is (lic_score, lic_name)
126+
# is (lic_score, lic_name) or (lic_score, lic_name, matched_text)
120127
if about.license_key.value:
121128
updated_dict = {}
122129
lic_key = about.license_key.value
123130
lic_name = about.license_name.value
124131
lic_score = about.license_score.value
132+
if matched_text_exist:
133+
matched_text = about.matched_text.value
134+
assert len(lic_key) == len(matched_text)
125135
assert len(lic_key) == len(lic_name)
126136
assert len(lic_key) == len(lic_score)
127137
if lic_key:
128138
index = 0
129139
for key in lic_key:
130140
if key in updated_dict:
131-
previous_score, _name = updated_dict[key]
141+
if matched_text_exist:
142+
previous_score, _name, _detected_text = updated_dict[key]
143+
else:
144+
previous_score, _name = updated_dict[key]
132145
current_score = lic_score[index]
133146
if current_score > previous_score:
134-
updated_dict[key] = (lic_score[index], lic_name[index])
147+
if matched_text_exist:
148+
updated_dict[key] = (lic_score[index], lic_name[index], matched_text[index])
149+
else:
150+
updated_dict[key] = (lic_score[index], lic_name[index])
135151
else:
136-
updated_dict[key] = (lic_score[index], lic_name[index])
152+
if matched_text_exist:
153+
updated_dict[key] = (lic_score[index], lic_name[index], matched_text[index])
154+
else:
155+
updated_dict[key] = (lic_score[index], lic_name[index])
137156
index = index + 1
138157
updated_lic_key = []
139158
updated_lic_name = []
140159
updated_lic_score = []
160+
if matched_text_exist:
161+
updated_matched_text = []
141162
for lic in updated_dict:
142-
score, name = updated_dict[lic]
163+
if matched_text_exist:
164+
score, name, text = updated_dict[lic]
165+
else:
166+
score, name = updated_dict[lic]
143167
if score >= min_license_score:
144168
updated_lic_key.append(lic)
145169
updated_lic_score.append(score)
146170
updated_lic_name.append(name)
171+
if matched_text_exist:
172+
updated_matched_text.append(text)
147173
if not lic in meet_score_licenses_list:
148174
meet_score_licenses_list.append(lic)
149175
about.license_key.value = updated_lic_key
150176
about.license_name.value = updated_lic_name
151177
about.license_score.value = updated_lic_score
178+
if matched_text_exist:
179+
about.matched_text.value = updated_matched_text
152180

153181
for lic in licenses_list:
154182
if not lic.key in meet_score_licenses_list:
@@ -180,7 +208,6 @@ def generate(abouts, is_about_input, license_dict, scancode, min_license_score,
180208

181209
# Sort the license object by key
182210
licenses_list = sorted(licenses_list, key=lambda x: x.key)
183-
184211
rendered = template.render(
185212
abouts=abouts,
186213
common_licenses=COMMON_LICENSES,

src/attributecode/gen.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,6 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
235235
running_inventory=False,
236236
reference_dir=reference_dir,
237237
)
238-
239238
for severity, message in ld_errors:
240239
if 'Custom Field' in message:
241240
field_name = message.replace('Custom Field: ', '').strip()

src/attributecode/model.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,7 +1051,7 @@ def load_dict(self, fields_dict, base_dir, scancode=False, from_attrib=False, ru
10511051
continue
10521052
if key == u'licenses':
10531053
# FIXME: use a license object instead
1054-
lic_key, lic_name, lic_file, lic_url, spdx_lic_key, lic_score = ungroup_licenses(value)
1054+
lic_key, lic_name, lic_file, lic_url, spdx_lic_key, lic_score, lic_matched_text = ungroup_licenses(value)
10551055
if lic_key:
10561056
fields.append(('license_key', lic_key))
10571057
if lic_name:
@@ -1067,6 +1067,8 @@ def load_dict(self, fields_dict, base_dir, scancode=False, from_attrib=False, ru
10671067
# The license score is a key from scancode license scan
10681068
if lic_score:
10691069
fields.append(('license_score', lic_score))
1070+
if lic_matched_text:
1071+
fields.append(('matched_text', lic_matched_text))
10701072
# The licenses field has been ungrouped and can be removed.
10711073
# Otherwise, it will gives the following INFO level error
10721074
# 'Field licenses is a custom field.'
@@ -1660,7 +1662,7 @@ def pre_process_and_fetch_license_dict(abouts, from_check=False, api_url=None, a
16601662
url = 'https://scancode-licensedb.aboutcode.org/'
16611663
if util.have_network_connection():
16621664
if not valid_api_url(url):
1663-
msg = u"URL not reachable. Invalid 'URL'. License generation is skipped."
1665+
msg = u"URL not reachable. Invalid 'URL. License generation is skipped."
16641666
errors.append(Error(ERROR, msg))
16651667
else:
16661668
msg = u'Network problem. Please check your Internet connection. License generation is skipped.'

src/attributecode/util.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,7 @@ def ungroup_licenses(licenses):
472472
lic_url = []
473473
spdx_lic_key = []
474474
lic_score = []
475+
lic_matched_text = []
475476
for lic in licenses:
476477
if 'key' in lic:
477478
lic_key.append(lic['key'])
@@ -485,7 +486,9 @@ def ungroup_licenses(licenses):
485486
spdx_lic_key.append(lic['spdx_license_key'])
486487
if 'score' in lic:
487488
lic_score.append(lic['score'])
488-
return lic_key, lic_name, lic_file, lic_url, spdx_lic_key, lic_score
489+
if 'matched_text' in lic:
490+
lic_matched_text.append(lic['matched_text'])
491+
return lic_key, lic_name, lic_file, lic_url, spdx_lic_key, lic_score, lic_matched_text
489492

490493

491494
# FIXME: add docstring

tests/test_util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -572,7 +572,7 @@ def test_ungroup_licenses(self):
572572
u'https://enterprise.dejacode.com/urn/?urn=urn:dje:license:mit',
573573
u'https://enterprise.dejacode.com/urn/?urn=urn:dje:license:bsd-new']
574574
expected_spdx = [u'MIT', u'BSD-3-Clause']
575-
lic_key, lic_name, lic_file, lic_url, spdx_lic_key, lic_score = util.ungroup_licenses(about)
575+
lic_key, lic_name, lic_file, lic_url, spdx_lic_key, lic_score, _matched_text = util.ungroup_licenses(about)
576576
assert expected_lic_key == lic_key
577577
assert expected_lic_name == lic_name
578578
assert expected_lic_file == lic_file

0 commit comments

Comments
 (0)