1313# limitations under the License.
1414import asyncio
1515import multiprocessing
16+ import random
1617import subprocess
1718import sys
19+ import time
1820from pathlib import Path
1921from typing import List , Optional , Tuple
2022
23+ import attrs
2124import filelock
2225import nbconvert
2326import nbformat
@@ -180,6 +183,13 @@ def execute_and_export_notebook(paths: _NBInOutPaths) -> Optional[Exception]:
180183 return None
181184
182185
186+ @attrs .frozen
187+ class _NotebookRunResult :
188+ nb_in : Path
189+ err : Optional [Exception ]
190+ duration_s : float
191+
192+
183193class _NotebookRunClosure :
184194 """Used to run notebook execution logic in subprocesses."""
185195
@@ -189,7 +199,7 @@ def __init__(self, reporoot: Path, output_nbs: bool, output_md: bool, only_out_o
189199 self .output_md = output_md
190200 self .only_out_of_date = only_out_of_date
191201
192- def __call__ (self , nb_rel_path : Path , sourceroot : Path ) -> Tuple [ Path , Optional [ Exception ]] :
202+ def __call__ (self , nb_rel_path : Path , sourceroot : Path ) -> _NotebookRunResult :
193203 paths = _NBInOutPaths .from_nb_rel_path (
194204 nb_rel_path ,
195205 reporoot = self .reporoot ,
@@ -200,15 +210,21 @@ def __call__(self, nb_rel_path: Path, sourceroot: Path) -> Tuple[Path, Optional[
200210
201211 if self .only_out_of_date and not paths .needs_reexport ():
202212 print (f'{ nb_rel_path } up to date' )
203- return paths .nb_in , None
213+ return _NotebookRunResult ( paths .nb_in , None , 0.0 )
204214
215+ start = time .time ()
205216 err = execute_and_export_notebook (paths )
206- print (f"Exported { nb_rel_path } " )
207- return paths .nb_in , err
217+ end = time .time ()
218+ print (f"Exported { nb_rel_path } in { end - start :.2f} seconds." )
219+ return _NotebookRunResult (paths .nb_in , err , duration_s = end - start )
208220
209221
210222def execute_and_export_notebooks (
211- * , output_nbs : bool , output_md : bool , only_out_of_date : bool = True
223+ * ,
224+ output_nbs : bool ,
225+ output_md : bool ,
226+ only_out_of_date : bool = True ,
227+ n_workers : Optional [int ] = None ,
212228):
213229 """Find, execute, and export all checked-in ipynbs.
214230
@@ -217,23 +233,41 @@ def execute_and_export_notebooks(
217233 output_md: Whether to save the executed notebooks as markdown
218234 only_out_of_date: Only re-execute and re-export notebooks whose output files
219235 are out of date.
236+ n_workers: If set to 1, do not use parallelization. If set to `None` (the detault),
237+ `multiprocessing.Pool()` will be used, which uses the number of processors as
238+ a default. Otherwise, this argument is passed to
239+ `multiprocessing.Pool(n_workers)` to execute notebooks in parallel on this many
240+ worker processes..
220241 """
221242 reporoot = get_git_root ()
222243 nb_rel_paths = get_nb_rel_paths (sourceroot = reporoot / 'qualtran' )
223244 nb_rel_paths += get_nb_rel_paths (sourceroot = reporoot / 'tutorials' )
245+ random .shuffle (nb_rel_paths )
246+ print (f"Found { len (nb_rel_paths )} notebooks." )
224247 func = _NotebookRunClosure (
225248 reporoot = reporoot ,
226249 output_nbs = output_nbs ,
227250 output_md = output_md ,
228251 only_out_of_date = only_out_of_date ,
229252 )
230- with multiprocessing .Pool () as pool :
231- results = pool .starmap (func , nb_rel_paths )
232- bad_nbs = [nbname for nbname , err in results if err is not None ]
253+ if n_workers == 1 :
254+ print ("(Not using multiprocessing, n_workers=1)" )
255+ results = [func (nb_rel_path , sourceroot ) for nb_rel_path , sourceroot in nb_rel_paths ]
256+ else :
257+ print (f"Multiprocessing with { n_workers = } " )
258+ with multiprocessing .Pool (n_workers , maxtasksperchild = 1 ) as pool :
259+ results = pool .starmap (func , nb_rel_paths )
260+ assert results
261+ bad_nbs = [result .nb_in for result in results if result .err is not None ]
233262
234263 if len (bad_nbs ) > 0 :
235264 print ()
236265 print ("Errors in notebooks:" )
237266 for nb in bad_nbs :
238267 print (' ' , nb )
239268 sys .exit (1 )
269+
270+ duration_nbs = sorted (results , key = lambda r : r .duration_s , reverse = True )
271+ print ("Slowest 10 notebooks:" )
272+ for result in duration_nbs [:10 ]:
273+ print (f'{ result .duration_s :5.2f} s { result .nb_in } ' )
0 commit comments