Skip to content

Commit 8245c43

Browse files
a-waiJenySadadia
authored andcommitted
config: runtime: coverage-report: differentiate job/kbuild nodes
Processing the coverage results for all test jobs of a single `kbuild` involves a very long processing time, leading to this script timing out after 6 hours (!). This can be fixed by creating a post-processing job for each test job, then only merge the tracefiles and generate the HTML report in the post-processing job for the `kbuild` node. This requires setting different code paths in the script for each node kind, so we either process the raw data into a tracefile, or retrieve each child job's tracefile and merge them. The final part of the script (report generation and artifacts upload) is then common to both node kinds. Signed-off-by: Arnaud Ferraris <[email protected]>
1 parent f7bab02 commit 8245c43

File tree

1 file changed

+160
-73
lines changed

1 file changed

+160
-73
lines changed

config/runtime/coverage-report.jinja2

Lines changed: 160 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,20 @@ import kernelci.api.helper
1818

1919
{%- block python_globals %}
2020
{{ super() }}
21+
DEFAULT_FAIL_RESULT = {
22+
'node': {
23+
'result': 'fail',
24+
'artifacts': {},
25+
},
26+
'child_nodes': [],
27+
}
28+
DEFAULT_PASS_RESULT = {
29+
'node': {
30+
'result': 'pass',
31+
'artifacts': {},
32+
},
33+
'child_nodes': [],
34+
}
2135
{% endblock %}
2236

2337
{% block python_job -%}
@@ -89,121 +103,194 @@ class Job(BaseJob):
89103
'child_nodes': child_nodes,
90104
}
91105

106+
def _process_coverage_data(self, node):
107+
coverage_dir = os.path.join(self._workspace, f"coverage-{node['id']}")
108+
109+
try:
110+
data_url = self._get_artifact_url(node, 'coverage_data')
111+
except:
112+
self._logfile.write(f"WARNING: No coverage data available for {node['id']}\n")
113+
# Return a "pass" result as this isn't an error, we can carry on
114+
return None, DEFAULT_PASS_RESULT
115+
116+
try:
117+
self._get_source(data_url, path=coverage_dir)
118+
self._logfile.write(f"Downloaded coverage data from {data_url}\n")
119+
except:
120+
self._logfile.write(f"ERROR: Unable to download coverage data for {node['id']}\n")
121+
return None, DEFAULT_FAIL_RESULT
122+
123+
# We now have raw coverage data available, process it
124+
tracefile = coverage_dir + '.json'
125+
json_summary = coverage_dir + '.summary.json'
126+
self._logfile.write(f"--- Processing coverage data for {node['id']} ---\n")
127+
cmd = subprocess.run(self._gcovr_cmd + [
128+
'--object-directory', coverage_dir,
129+
'--json', tracefile,
130+
'--json-summary', json_summary,
131+
], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
132+
self._logfile.write(cmd.stdout)
133+
134+
try:
135+
cmd.check_returncode()
136+
except:
137+
self._logfile.write(f"ERROR: Unable to process coverage data for {node['id']}")
138+
return None, DEFAULT_FAIL_RESULT
139+
140+
results = self._extract_coverage(json_summary, node=node)
141+
return tracefile, results
142+
143+
144+
def _retrieve_tracefile(self, node):
145+
process_nodes = self._api.node.findfast({'parent': node['id'], 'kind': 'process'})
146+
for pnode in process_nodes:
147+
if not pnode['name'].startswith('coverage-report'):
148+
self._logfile.write(f"Ignoring post-process node {pnode['id']} (pnode['name'])\n")
149+
continue
150+
151+
try:
152+
data_url = self._get_artifact_url(pnode, 'tracefile')
153+
except:
154+
self._logfile.write(f"WARNING: No tracefile artifact in {pnode['id']}\n")
155+
continue
156+
157+
resp = requests.get(data_url, stream=True)
158+
resp.raise_for_status()
159+
self._logfile.write(f"Downloaded tracefile from {data_url}\n")
160+
trace_name = os.path.basename(urllib.parse.urlparse(data_url).path)
161+
tracefile = os.path.join(self._workspace, trace_name.removesuffix('.gz'))
162+
with gzip.open(resp.raw, mode='rb') as f_in:
163+
with open(tracefile, 'wb') as f_out:
164+
shutil.copyfileobj(f_in, f_out)
165+
self._logfile.write(f"Uncompressed tracefile into {tracefile}\n")
166+
167+
return tracefile
168+
169+
return None
170+
92171
def _run(self, src_path):
93172
self._artifacts = {}
94173
api_helper = kernelci.api.helper.APIHelper(self._api)
95-
child_nodes = self._api.node.findfast({'parent': self._node['parent']})
174+
parent_node = self._api.node.get(self._node['parent'])
96175

97176
log_path = os.path.join(self._workspace, f"log.txt")
98-
log_file = open(log_path, mode='w')
177+
self._logfile = open(log_path, mode='w')
99178

100-
log_file.write("Getting coverage source...\n")
179+
self._logfile.write("Getting coverage source...\n")
101180
tarball_url = self._get_artifact_url(self._node, 'coverage_source_tar_xz')
102181
self._get_source(tarball_url)
103182
# Not getting src_path from _get_source() as it doesn't work in our case
104183
# We do know that the top-level folder is named 'linux' however, so let's
105184
# just use that
106-
src_path = os.path.join(self._workspace, 'linux')
107-
log_file.write(f"Coverage source downloaded from {tarball_url}\n")
108-
109-
base_cmd = ['gcovr', '--root', src_path]
110-
tracefiles = []
185+
self._src_path = os.path.join(self._workspace, 'linux')
186+
self._logfile.write(f"Coverage source downloaded from {tarball_url}\n")
111187

112-
# Download and process coverage data for all child nodes
113-
for cnode in child_nodes:
114-
if cnode['id'] == self._node['id']:
115-
log_file.write(f"Skipping self ({cnode['id']})\n")
116-
continue
188+
self._gcovr_cmd = [
189+
'gcovr',
190+
'--gcov-ignore-parse-errors',
191+
'--root', self._src_path,
192+
]
117193

118-
coverage_dir = os.path.join(self._workspace, f"coverage-{cnode['id']}")
119-
json_summary = coverage_dir + '.summary.json'
120-
try:
121-
data_url = self._get_artifact_url(cnode, 'coverage_data')
122-
tracefile = coverage_dir + '.json'
123-
self._get_source(data_url, path=coverage_dir)
124-
log_file.write(f"Downloaded coverage data from {data_url}\n")
125-
except:
126-
log_file.write(f"WARNING: Unable to download coverage data for {cnode['id']}\n")
127-
continue
194+
output_base = os.path.join(self._workspace, f"coverage-{self._node['parent']}")
128195

129-
# We now have raw coverage data available, process it
130-
log_file.write(f"--- Processing coverage data for {cnode['id']} ---\n")
131-
cmd = subprocess.run(base_cmd + [
132-
'--gcov-ignore-parse-errors',
133-
'--object-directory', coverage_dir,
196+
if parent_node['kind'] == 'job':
197+
(tracefile, results) = self._process_coverage_data(parent_node)
198+
if tracefile is None:
199+
self._logfile.close()
200+
self._artifacts = {'log': log_path}
201+
return results
202+
203+
else:
204+
tracefiles = []
205+
child_nodes = self._api.node.findfast({'parent': parent_node['id'], 'kind': 'job'})
206+
# Download and process coverage data for all child nodes
207+
for cnode in child_nodes:
208+
if cnode['id'] == self._node['id']:
209+
self._logfile.write(f"Skipping self ({cnode['id']})\n")
210+
continue
211+
212+
try:
213+
job_trace = self._retrieve_tracefile(cnode)
214+
except:
215+
self._logfile.write(f"WARNING: No tracefile available for {cnode['id']}\n")
216+
continue
217+
218+
if job_trace:
219+
tracefiles += [job_trace]
220+
221+
if len(tracefiles) == 0:
222+
self._logfile.write(f"WARNING: No tracefile found in children for {parent_node['id']}\n")
223+
self._logfile.close()
224+
self._artifacts = {'log': log_path}
225+
return DEFAULT_PASS_RESULT
226+
227+
# Coverage data has been processed for all job nodes, we can now merge the tracefiles
228+
tracefile = output_base + '.json'
229+
json_summary = output_base + '.summary.json'
230+
args = self._gcovr_cmd
231+
for trace in tracefiles:
232+
args += ['--add-tracefile', trace]
233+
args += [
134234
'--json', tracefile,
135235
'--json-summary', json_summary,
136-
], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
137-
log_file.write(cmd.stdout)
138-
139-
try:
140-
cmd.check_returncode()
141-
except:
142-
log_file.write(f"WARNING: Unable to process coverage data for {cnode['id']}")
143-
continue
236+
]
144237

145-
tracefiles += [tracefile]
146-
results = self._extract_coverage(json_summary, node=cnode)
147-
# We only want to create child nodes reporting coverage percentages, not actually
148-
# update the test node
149-
if len(results['child_nodes']) > 0:
150-
api_helper.submit_results(results, cnode)
238+
self._logfile.write("--- Merging tracefiles ---\n")
239+
cmd = subprocess.run(args,
240+
stdout=subprocess.PIPE,
241+
stderr=subprocess.STDOUT,
242+
text=True)
243+
self._logfile.write(cmd.stdout)
151244

152-
# Coverage data has been processed for all child nodes, we can now merge the tracefiles
153-
args = base_cmd
154-
for trace in tracefiles:
155-
args += ['--add-tracefile', trace]
245+
results = self._extract_coverage(json_summary, node=parent_node)
156246

157-
output_base = os.path.join(self._workspace, f"coverage-{self._node['parent']}")
158-
json_summary = output_base + '.summary.json'
247+
self._logfile.write("--- Generating HTML report ---\n")
159248
html_report = output_base + '.html'
160-
lcov_tracefile = output_base + '.info'
161-
args += [
162-
'--json-summary', json_summary,
249+
args = self._gcovr_cmd + [
250+
'--add-tracefile', tracefile,
163251
'--html', html_report,
164-
'--lcov', lcov_tracefile,
165252
]
253+
if parent_node['kind'] == 'kbuild':
254+
lcov_tracefile = output_base + '.info'
255+
args += ['--lcov', lcov_tracefile]
166256

167-
log_file.write("--- Merging tracefiles ---\n")
168257
cmd = subprocess.run(args,
169258
stdout=subprocess.PIPE,
170259
stderr=subprocess.STDOUT,
171260
text=True)
172-
log_file.write(cmd.stdout)
261+
self._logfile.write(cmd.stdout)
173262

174263
# Ensure job completed successfully or report failure
175264
try:
176265
cmd.check_returncode()
177266
except:
178-
log_file.write(f"ERROR: Unable to generate coverage report\n")
179-
log_file.close()
180-
267+
self._logfile.write(f"ERROR: Unable to generate coverage report\n")
268+
self._logfile.close()
181269
self._artifacts = {'log': log_path}
182-
return {
183-
'node': {
184-
'result': 'fail',
185-
'artifacts': {},
186-
},
187-
'child_nodes': [],
188-
}
189-
190-
log_file.write("--- Compressing artifacts ---\n")
191-
compressed_lcov = lcov_tracefile + '.gz'
192-
with open(lcov_tracefile, 'rb') as f_in:
193-
with gzip.open(compressed_lcov, 'wb') as f_out:
270+
return DEFAULT_FAIL_RESULT
271+
272+
self._logfile.write("--- Compressing artifacts ---\n")
273+
compressed_trace = tracefile + '.gz'
274+
with open(tracefile, 'rb') as f_in:
275+
with gzip.open(compressed_trace, 'wb') as f_out:
194276
shutil.copyfileobj(f_in, f_out)
195277

196278
# Finish writing the job log and upload it along with other artifacts
197-
log_file.write("--- Job successful ---\n")
198-
log_file.close()
279+
self._logfile.write("--- Job successful ---\n")
280+
self._logfile.close()
199281

200282
self._artifacts = {
201283
'coverage_report': html_report,
202-
'tracefile': compressed_lcov,
284+
'tracefile': compressed_trace,
203285
'log': log_path,
204286
}
205287

206-
return self._extract_coverage(json_summary)
288+
if parent_node['kind'] == 'kbuild':
289+
compressed_lcov = lcov_tracefile + '.gz'
290+
with open(lcov_tracefile, 'rb') as f_in:
291+
with gzip.open(compressed_lcov, 'wb') as f_out:
292+
shutil.copyfileobj(f_in, f_out)
293+
self._artifacts['lcov_tracefile'] = compressed_lcov
207294

208295
return results
209296

0 commit comments

Comments
 (0)