diff --git a/.circleci/config.yml b/.circleci/config.yml index 258575ec..701fc350 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -220,7 +220,7 @@ jobs: PYTHON_VERSION: "3_6" CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_6 CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_6 - VERSION: 0.6.3 + VERSION: 0.6.4 PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases YARN_STATIC_DIR: notebooker/web/static/ IMAGE_NAME: mangroup/notebooker @@ -236,7 +236,7 @@ jobs: environment: CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_7 CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_7 - VERSION: 0.6.3 + VERSION: 0.6.4 PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases YARN_STATIC_DIR: notebooker/web/static/ IMAGE_NAME: mangroup/notebooker @@ -250,7 +250,7 @@ jobs: environment: CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_8 CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_8 - VERSION: 0.6.3 + VERSION: 0.6.4 PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases YARN_STATIC_DIR: notebooker/web/static/ IMAGE_NAME: mangroup/notebooker @@ -264,7 +264,7 @@ jobs: environment: CIRCLE_ARTIFACTS: /tmp/circleci-artifacts/3_11 CIRCLE_TEST_REPORTS: /tmp/circleci-test-results/3_11 - VERSION: 0.6.3 + VERSION: 0.6.4 PANDOC_RELEASES_URL: https://github.com/jgm/pandoc/releases YARN_STATIC_DIR: notebooker/web/static/ IMAGE_NAME: mangroup/notebooker diff --git a/CHANGELOG.md b/CHANGELOG.md index 2d2278c3..fd325321 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,16 @@ +0.6.4 (2024-10-08) +------------------ +* Feature: Categorization of the notebooks, allowing set special category tag to notebooks for easy grouping +* This feature solves issues with huge repo's where only limited number of notebooks are used in the webapp: + * Very deep navigation tree in the ui for deepest notebook paths + * Super long names of the reports in scheduler and results pages + * Hard tile navigation for the reports +* Enabling categorization of the notebooks using special flag: + * Add 'category=..' tag to the relevant notebooks metadata + * Execute notebooker with --categorization flag +* Important: only categorized notebooks, those having 'category=..' tag are shown as options to select in the webapp +* Keeps original navigation by directory structure if categorization flag is not + 0.6.3 (2024-07-11) ------------------ * Feature: Flag to preserve original working directory when running notebooks to make local imports and relative paths work. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index c098c5fd..dd958dea 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -20,7 +20,7 @@ Do also make sure to run the webapp and make sure you haven't broken anything. When releasing a new version, please increment the version number in: * `notebooker/version.py` * `.circleci/config.yml` -* `docs/config.yml` +* `docs/conf.py` * `notebooker/web/static/package.json` This build will validate that these numbers match those given in `.circleci/config.yml`. diff --git a/docs/conf.py b/docs/conf.py index 8b04cb8e..d282a5c7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,7 +23,7 @@ author = "Man Group Quant Tech" # The full version, including alpha/beta/rc tags -release = "0.6.3" +release = "0.6.4" # -- General configuration --------------------------------------------------- diff --git a/notebooker/_entrypoints.py b/notebooker/_entrypoints.py index 23f0d581..e1b729e6 100644 --- a/notebooker/_entrypoints.py +++ b/notebooker/_entrypoints.py @@ -70,6 +70,12 @@ def filesystem_default_value(dirname): is_flag=True, help="If selected, notebooker set current working directory to absolute path of the notebook to keep it local context available", ) +@click.option( + "--categorization", + default=False, + is_flag=True, + help="If selected, discovers only templates with the 'category=example' tags set to any cell and groups notebooks by their category names", +) @click.option( "--default-mailfrom", default=DEFAULT_MAILFROM_ADDRESS, help="Set a new value for the default mailfrom setting." ) @@ -91,6 +97,7 @@ def base_notebooker( py_template_subdir, notebooker_disable_git, execute_at_origin, + categorization, default_mailfrom, running_timeout, serializer_cls, @@ -106,6 +113,7 @@ def base_notebooker( PY_TEMPLATE_SUBDIR=py_template_subdir, NOTEBOOKER_DISABLE_GIT=notebooker_disable_git, EXECUTE_AT_ORIGIN=execute_at_origin, + CATEGORIZATION=categorization, DEFAULT_MAILFROM=default_mailfrom, RUNNING_TIMEOUT=running_timeout, ) @@ -180,6 +188,7 @@ def start_webapp( @base_notebooker.command() @click.option("--report-name", help="The name of the template to execute, relative to the template directory.") +@click.option("--category", default="", help="Category of the template.") @click.option( "--overrides-as-json", default="{}", help="The parameters to inject into the notebook template, in JSON format." ) @@ -230,6 +239,7 @@ def start_webapp( def execute_notebook( config: BaseConfig, report_name, + category, overrides_as_json, iterate_override_values_of, report_title, @@ -250,6 +260,7 @@ def execute_notebook( return execute_notebook_entrypoint( config, report_name, + category, overrides_as_json, iterate_override_values_of, report_title, diff --git a/notebooker/constants.py b/notebooker/constants.py index a957a061..8b3df5c7 100644 --- a/notebooker/constants.py +++ b/notebooker/constants.py @@ -88,6 +88,7 @@ class NotebookResultBase(object): mailfrom = attr.ib(default=None) email_subject = attr.ib(default=None) is_slideshow = attr.ib(default=False) + category = attr.ib(default=None) def saveable_output(self): out = attr.asdict(self) @@ -164,6 +165,7 @@ class NotebookResultComplete(NotebookResultBase): scheduler_job_id = attr.ib(default=None) mailfrom = attr.ib(default=None) is_slideshow = attr.ib(default=False) + category = attr.ib(default=None) def html_resources(self): """We have to save the raw images using Mongo GridFS - figure out where they will go here""" @@ -197,6 +199,7 @@ def saveable_output(self): "raw_html": "", # backwards compatibility for versions<0.3.1 "mailfrom": self.mailfrom, "is_slideshow": self.is_slideshow, + "category": self.category, } def __repr__(self): @@ -205,7 +208,7 @@ def __repr__(self): "job_start_time={job_start_time}, job_finish_time={job_finish_time}, update_time={update_time}, " "report_title={report_title}, overrides={overrides}, mailto={mailto}, error_mailto={error_mailto}, " "mailfrom={mailfrom}, email_subject={email_subject}, generate_pdf_output={generate_pdf_output}, " - "hide_code={hide_code}, scheduler_job_id={scheduler_job_id}, is_slideshow={is_slideshow})".format( + "hide_code={hide_code}, scheduler_job_id={scheduler_job_id}, is_slideshow={is_slideshow}, category={category})".format( job_id=self.job_id, status=self.status, report_name=self.report_name, @@ -222,5 +225,6 @@ def __repr__(self): hide_code=self.hide_code, scheduler_job_id=self.scheduler_job_id, is_slideshow=self.is_slideshow, + category=self.category, ) ) diff --git a/notebooker/execute_notebook.py b/notebooker/execute_notebook.py index bc6ba10c..ce1770dc 100644 --- a/notebooker/execute_notebook.py +++ b/notebooker/execute_notebook.py @@ -54,6 +54,7 @@ def _run_checks( scheduler_job_id: Optional[str] = None, mailfrom: Optional[str] = None, is_slideshow: bool = False, + category: Optional[str] = None, ) -> NotebookResultComplete: """ This is the actual method which executes a notebook, whether running in the webapp or via the entrypoint. @@ -152,6 +153,7 @@ def _run_checks( generate_pdf_output=generate_pdf_output, report_name=template_name, report_title=report_title, + category=category, overrides=overrides, scheduler_job_id=scheduler_job_id, mailfrom=mailfrom, @@ -164,6 +166,7 @@ def _run_checks( def run_report( job_submit_time, report_name, + category, overrides, result_serializer, report_title="", @@ -222,6 +225,7 @@ def run_report( scheduler_job_id=scheduler_job_id, mailfrom=mailfrom, is_slideshow=is_slideshow, + category=category, ) logger.info("Successfully got result.") result_serializer.save_check_result(result) @@ -234,6 +238,7 @@ def run_report( job_start_time=job_submit_time, report_name=report_name, report_title=report_title, + category=category, error_info=error_info, overrides=overrides, mailto=mailto, @@ -257,6 +262,7 @@ def run_report( return run_report( job_submit_time, report_name, + category, overrides, result_serializer, report_title=report_title, @@ -351,6 +357,7 @@ def _get_overrides(overrides_as_json: AnyStr, iterate_override_values_of: Option def execute_notebook_entrypoint( config: BaseConfig, report_name: str, + category: str, overrides_as_json: str, iterate_override_values_of: Union[List[str], str], report_title: str, @@ -377,6 +384,7 @@ def execute_notebook_entrypoint( start_time = datetime.datetime.now() logger.info("Running a report with these parameters:") logger.info("report_name = %s", report_name) + logger.info("category = %s", category) logger.info("overrides_as_json = %s", overrides_as_json) logger.info("iterate_override_values_of = %s", iterate_override_values_of) logger.info("report_title = %s", report_title) @@ -407,6 +415,7 @@ def execute_notebook_entrypoint( result = run_report( start_time, report_name, + category, overrides, result_serializer, report_title=report_title, @@ -495,6 +504,7 @@ def run_report_in_subprocess( email_subject=None, n_retries=3, is_slideshow=False, + category=None, ) -> str: """ Execute the Notebooker report in a subprocess. @@ -513,6 +523,7 @@ def run_report_in_subprocess( :param email_subject: `str` if passed, then this string will be used in the email subject :param n_retries: The number of retries to attempt. :param is_slideshow: Whether the notebook is a reveal.js slideshow or not. + :param category: Category of the notebook :return: The unique job_id. """ if error_mailto is None: @@ -535,6 +546,7 @@ def run_report_in_subprocess( is_slideshow=is_slideshow, email_subject=email_subject, mailfrom=mailfrom, + category=category, ) command = ( @@ -578,6 +590,7 @@ def run_report_in_subprocess( + (["--is-slideshow"] if is_slideshow else []) + ([f"--scheduler-job-id={scheduler_job_id}"] if scheduler_job_id is not None else []) + ([f"--mailfrom={mailfrom}"] if mailfrom is not None else []) + + ([f"--category={category}"] if category is not None else []) + ([f"--email-subject={email_subject}"] if email_subject else []) ) p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) diff --git a/notebooker/serialization/mongo.py b/notebooker/serialization/mongo.py index 4ddf97b2..0ad1d8c6 100644 --- a/notebooker/serialization/mongo.py +++ b/notebooker/serialization/mongo.py @@ -209,6 +209,7 @@ def save_check_stub( is_slideshow: bool = False, email_subject: Optional[str] = None, mailfrom: Optional[str] = None, + category: Optional[str] = None, ) -> None: """Call this when we are just starting a check. Saves a "pending" job into storage.""" job_start_time = job_start_time or datetime.datetime.now() @@ -228,6 +229,7 @@ def save_check_stub( scheduler_job_id=scheduler_job_id, is_slideshow=is_slideshow, mailfrom=mailfrom, + category=category, ) self._save_to_db(pending_result) @@ -325,6 +327,7 @@ def _convert_result( scheduler_job_id=result.get("scheduler_job_id", None), is_slideshow=result.get("is_slideshow", False), email_subject=result.get("email_subject", None), + category=result.get("category", None), ) elif cls == NotebookResultPending: return NotebookResultPending( @@ -344,6 +347,7 @@ def _convert_result( stdout=result.get("stdout", []), scheduler_job_id=result.get("scheduler_job_id", None), is_slideshow=result.get("is_slideshow", False), + category=result.get("category", None), ) elif cls == NotebookResultError: @@ -370,6 +374,7 @@ def _convert_result( stdout=result.get("stdout", []), scheduler_job_id=result.get("scheduler_job_id", False), is_slideshow=result.get("is_slideshow", False), + category=result.get("category", None), ) else: raise ValueError("Could not deserialise {} into result object.".format(result)) @@ -397,10 +402,17 @@ def _get_result_count(self, base_filter): def get_count_and_latest_time_per_report(self, subfolder: Optional[str]): base_filer = {} if not subfolder else {"report_name": {"$regex": subfolder + ".*"}} + return self.fetch_reports(base_filer) + + def get_count_and_latest_time_per_report_per_category(self, category: Optional[str]): + base_filer = {} if not category else {"category": category} + return self.fetch_reports(base_filer) + + def fetch_reports(self, base_filer: Dict[str, Any]): reports = list( self._get_raw_results( base_filter=base_filer, - projection={"report_name": 1, "job_start_time": 1, "scheduler_job_id": 1, "_id": 0}, + projection={"report_name": 1, "job_start_time": 1, "scheduler_job_id": 1, "category": 1, "_id": 0}, limit=0, ) ) @@ -411,7 +423,12 @@ def get_count_and_latest_time_per_report(self, subfolder: Optional[str]): for report, all_runs in jobs_by_name.items(): latest_start_time = max(r["job_start_time"] for r in all_runs) scheduled_runs = len([x for x in all_runs if x.get("scheduler_job_id")]) - output[report] = {"count": len(all_runs), "latest_run": latest_start_time, "scheduler_runs": scheduled_runs} + output[report] = { + "count": len(all_runs), + "latest_run": latest_start_time, + "scheduler_runs": scheduled_runs, + "category": r["category"], + } return output def get_all_results( diff --git a/notebooker/settings.py b/notebooker/settings.py index daf2d01e..eeea35b9 100644 --- a/notebooker/settings.py +++ b/notebooker/settings.py @@ -28,6 +28,9 @@ class BaseConfig: # A boolean flag to dictate whether we should execute the notebook at the origin or not. EXECUTE_AT_ORIGIN: bool = False + # A boolean flag to dictate whether we should discover and group notebooker by their category tags. + CATEGORIZATION: bool = False + # The serializer class we are using for storage, e.g. PyMongoResultSerializer SERIALIZER_CLS: DEFAULT_SERIALIZER = None # The dictionary of parameters which are used to initialize the serializer class above diff --git a/notebooker/utils/results.py b/notebooker/utils/results.py index 85eb8519..f75ac27e 100644 --- a/notebooker/utils/results.py +++ b/notebooker/utils/results.py @@ -6,7 +6,7 @@ import babel.dates import inflection -from flask import url_for +from flask import url_for, current_app from notebooker import constants from notebooker.exceptions import NotebookRunException @@ -140,12 +140,20 @@ def get_all_available_results_json( def get_count_and_latest_time_per_report(serializer: MongoResultSerializer, subfolder: Optional[str] = None): - reports = serializer.get_count_and_latest_time_per_report(subfolder) + if subfolder and current_app.config["CATEGORIZATION"]: + category = subfolder.rstrip("/") + reports = serializer.get_count_and_latest_time_per_report_per_category(category) + else: + reports = serializer.get_count_and_latest_time_per_report(subfolder) output = {} for report_name, metadata in sorted(reports.items(), key=lambda x: x[1]["latest_run"], reverse=True): - metadata["report_name"] = report_name + title_name = report_name + if "PATH_TO_CATEGORY_DICT" in current_app.config and report_name in current_app.config["PATH_TO_CATEGORY_DICT"]: + title_name = current_app.config["PATH_TO_CATEGORY_DICT"][report_name] + "/" + report_name.split("/")[-1] + metadata["report_name"] = title_name + metadata["original_report"] = report_name metadata["time_diff"] = babel.dates.format_timedelta(datetime.datetime.now() - metadata["latest_run"]) - output[inflection.titleize(report_name)] = metadata + output[inflection.titleize(title_name)] = metadata return output diff --git a/notebooker/utils/templates.py b/notebooker/utils/templates.py index 86e86fe6..a73aada2 100644 --- a/notebooker/utils/templates.py +++ b/notebooker/utils/templates.py @@ -22,6 +22,23 @@ def _valid_filename(f): return (f.endswith(".py") or f.endswith(".ipynb")) and "__init__" not in f and "__pycache__" not in f +def _extract_category(path) -> Optional[str]: + if path.endswith(".ipynb"): + nb = nbformat.read(path, as_version=nbformat.v4.nbformat) + return _get_category(nb) + return None + + +def _get_category(notebook: nbformat.NotebookNode) -> Optional[int]: + for idx, cell in enumerate(notebook["cells"]): + tags = cell.get("metadata", {}).get("tags", []) + for tag in tags: + clean_tag = tag.translate({ord(" "): None}) # Remove spaces + if clean_tag.startswith("category="): + return clean_tag.split("=")[1] + return None + + def _get_parameters_cell_idx(notebook: nbformat.NotebookNode) -> Optional[int]: for idx, cell in enumerate(notebook["cells"]): tags = cell.get("metadata", {}).get("tags", []) diff --git a/notebooker/version.py b/notebooker/version.py index 63af8876..364e7bae 100644 --- a/notebooker/version.py +++ b/notebooker/version.py @@ -1 +1 @@ -__version__ = "0.6.3" +__version__ = "0.6.4" diff --git a/notebooker/web/routes/index.py b/notebooker/web/routes/index.py index 6d4df3ea..b4825a67 100644 --- a/notebooker/web/routes/index.py +++ b/notebooker/web/routes/index.py @@ -41,6 +41,10 @@ def result_listing(report_name): result_limit = int(request.args.get("limit") or DEFAULT_RESULT_LIMIT) all_reports = get_all_possible_templates() with current_app.app_context(): + if "PATH_TO_CATEGORY_DICT" in current_app.config and report_name in current_app.config["PATH_TO_CATEGORY_DICT"]: + title_name = current_app.config["PATH_TO_CATEGORY_DICT"][report_name] + "/" + report_name.split("/")[-1] + else: + title_name = inflection.titleize(report_name) result = render_template( "result_listing.html", all_reports=all_reports, @@ -49,7 +53,7 @@ def result_listing(report_name): report_name=report_name, result_limit=result_limit, n_results_available=get_serializer().n_all_results_for_report_name(report_name), - titleised_report_name=inflection.titleize(report_name), + titleised_report_name=title_name, readonly_mode=current_app.config["READONLY_MODE"], scheduler_disabled=current_app.config["DISABLE_SCHEDULER"], ) diff --git a/notebooker/web/routes/report_execution.py b/notebooker/web/routes/report_execution.py index d837a17c..6a76e6a0 100644 --- a/notebooker/web/routes/report_execution.py +++ b/notebooker/web/routes/report_execution.py @@ -81,6 +81,10 @@ def run_report_http(report_name): :returns: An HTML template which is the Run Report interface. """ + if "PATH_TO_CATEGORY_DICT" in current_app.config and report_name in current_app.config["PATH_TO_CATEGORY_DICT"]: + category = current_app.config["PATH_TO_CATEGORY_DICT"][report_name] + else: + category = "" report_name = convert_report_name_url_to_path(report_name) json_params = request.args.get("json_params") initial_python_parameters = json_to_python(json_params) or "" @@ -95,6 +99,7 @@ def run_report_http(report_name): has_prefix=False, has_suffix=False, report_name=report_name, + category=category, all_reports=get_all_possible_templates(), initialPythonParameters={}, readonly_mode=current_app.config["READONLY_MODE"], @@ -111,6 +116,7 @@ def run_report_http(report_name): has_prefix=has_prefix, has_suffix=has_suffix, report_name=report_name, + category=category, all_reports=get_all_possible_templates(), initialPythonParameters=initial_python_parameters, default_mailfrom=current_app.config["DEFAULT_MAILFROM"], @@ -129,12 +135,18 @@ class RunReportParams(NamedTuple): scheduler_job_id: Optional[str] is_slideshow: bool email_subject: Optional[str] + category: Optional[str] def validate_run_params(report_name, params, issues: List[str]) -> RunReportParams: logger.info(f"Validating input params: {params} for {report_name}") # Find and cleanse the title of the report - report_title = validate_title(params.get("report_title") or report_name, issues) + category = params.get("category", "") + report_title = validate_title( + params.get("report_title") + or (category + "/" + report_name.strip("/").split("/")[-1] if category else report_name), + issues, + ) # Get mailto email address mailto = validate_mailto(params.get("mailto"), issues) error_mailto = validate_mailto(params.get("error_mailto"), issues) @@ -155,6 +167,7 @@ def validate_run_params(report_name, params, issues: List[str]) -> RunReportPara scheduler_job_id=params.get("scheduler_job_id"), is_slideshow=is_slideshow, email_subject=email_subject, + category=category, ) logger.info(f"Validated params: {out}") return out @@ -179,6 +192,7 @@ def _handle_run_report( f"mailfrom={params.mailfrom} " f"email_subject={params.email_subject} " f"is_slideshow={params.is_slideshow} " + f"category={params.category} " ) try: with current_app.app_context(): @@ -196,6 +210,7 @@ def _handle_run_report( mailfrom=params.mailfrom, email_subject=params.email_subject, is_slideshow=params.is_slideshow, + category=params.category, ) return ( jsonify({"id": job_id}), diff --git a/notebooker/web/routes/scheduling.py b/notebooker/web/routes/scheduling.py index 9bfc26c1..01e241c1 100644 --- a/notebooker/web/routes/scheduling.py +++ b/notebooker/web/routes/scheduling.py @@ -52,6 +52,8 @@ def remove_schedule(job_id): def get_job_id(report_name: str, report_title: str) -> str: + if "PATH_TO_CATEGORY_DICT" in current_app.config and report_name in current_app.config["PATH_TO_CATEGORY_DICT"]: + report_name = current_app.config["PATH_TO_CATEGORY_DICT"][report_name] + "/" + report_name.split("/")[-1] return f"{report_name}_{report_title}" @@ -68,7 +70,10 @@ def update_schedule(report_name): overrides_dict = handle_overrides(request.values.get("overrides", ""), issues) if issues: return jsonify({"status": "Failed", "content": ("\n".join(issues))}) - + if "PATH_TO_CATEGORY_DICT" in current_app.config and report_name in current_app.config["PATH_TO_CATEGORY_DICT"]: + category = current_app.config["PATH_TO_CATEGORY_DICT"][report_name] + else: + category = "" params = { "report_name": report_name, "overrides": overrides_dict, @@ -81,6 +86,7 @@ def update_schedule(report_name): "hide_code": params.hide_code, "is_slideshow": params.is_slideshow, "scheduler_job_id": job_id, + "category": category, } job.modify(trigger=trigger, kwargs=params) current_app.apscheduler.reschedule_job(job_id, jobstore="mongo", trigger=trigger) @@ -103,6 +109,10 @@ def create_schedule(report_name): if issues: return jsonify({"status": "Failed", "content": ("\n".join(issues))}) job_id = get_job_id(report_name, params.report_title) + if "PATH_TO_CATEGORY_DICT" in current_app.config and report_name in current_app.config["PATH_TO_CATEGORY_DICT"]: + category = current_app.config["PATH_TO_CATEGORY_DICT"][report_name] + else: + category = "" dict_params = { "report_name": report_name, "overrides": overrides_dict, @@ -115,6 +125,7 @@ def create_schedule(report_name): "hide_code": params.hide_code, "scheduler_job_id": job_id, "is_slideshow": params.is_slideshow, + "category": category, } logger.info(f"Creating job with params: {dict_params}") try: diff --git a/notebooker/web/routes/templates.py b/notebooker/web/routes/templates.py index 597dddf7..d13b951e 100644 --- a/notebooker/web/routes/templates.py +++ b/notebooker/web/routes/templates.py @@ -5,7 +5,7 @@ from notebooker.utils.results import get_count_and_latest_time_per_report from notebooker.utils.web import convert_report_name_url_to_path from notebooker.web.routes.report_execution import get_report_parameters_html -from notebooker.web.utils import all_templates_flattened, get_all_possible_templates, get_serializer +from notebooker.web.utils import get_all_possible_templates, get_serializer, get_all_templates templates_bp = Blueprint("templates_bp", __name__) @@ -41,7 +41,7 @@ def all_possible_templates_flattened(): :returns: A JSON which is a list of all possible templates with their full names. """ - return jsonify({"result": all_templates_flattened()}) + return jsonify({"result": get_all_templates()}) @templates_bp.route("/core/get_template_parameters/", methods=["GET"]) diff --git a/notebooker/web/scheduler.py b/notebooker/web/scheduler.py index 2a31125f..7c5f4e52 100644 --- a/notebooker/web/scheduler.py +++ b/notebooker/web/scheduler.py @@ -24,6 +24,7 @@ def run_report( is_slideshow: bool = False, error_mailto: Optional[str] = None, email_subject: Optional[str] = None, + category: Optional[str] = None, ): """ This is the entrypoint of the scheduler; APScheduler has to @@ -46,6 +47,7 @@ def run_report( n_retries=0, is_slideshow=is_slideshow, email_subject=email_subject, + category=category, ) else: # Fall back to using API. This will not work in readonly mode. diff --git a/notebooker/web/static/notebooker/index.js b/notebooker/web/static/notebooker/index.js index 095e9bc4..e5e3e9dd 100644 --- a/notebooker/web/static/notebooker/index.js +++ b/notebooker/web/static/notebooker/index.js @@ -66,7 +66,7 @@ const updateContents = (currentFolder, entries) => { const displayName = entryAfterLevel(report, level); const stats = entries[report]; reportParts.push( - `` + + `` + '
' + `

${displayName}

\n` + '
\n' + @@ -81,7 +81,7 @@ const updateContents = (currentFolder, entries) => { " " + "
" + '
' + - ` Original report name: ${stats.report_name}\n` + + ` Original report name: ${stats.original_report}\n` + "
" + "
" ); diff --git a/notebooker/web/static/notebooker/scheduler.js b/notebooker/web/static/notebooker/scheduler.js index fba014d3..1bce25e9 100644 --- a/notebooker/web/static/notebooker/scheduler.js +++ b/notebooker/web/static/notebooker/scheduler.js @@ -80,9 +80,25 @@ load_all_templates = (callback) => { dataType: "json", success: (result) => { let templates = Array(); - for (let i = 0; i < result.result.length; i++) { - let value = result.result[i]; - templates = templates.concat({ name: value, value: value }); + if (Array.isArray(result.result)) { + // This logic handles basic templates + for (let i = 0; i < result.result.length; i++) { + let value = result.result[i]; + templates = templates.concat({ name: value, value: value }); + } + } else if (typeof result.result === "object") { + // This logic handles categorized templates + for (let key in result.result) { + if (result.result.hasOwnProperty(key)) { + let value = result.result[key]; + for (let subKey in value) { + if (value.hasOwnProperty(subKey)) { + let lastPart = subKey.split("/").pop(); + templates = templates.concat({ name: key + "/" + lastPart, value: subKey }); + } + } + } + } } $(".selection.dropdown").dropdown({ values: templates, @@ -295,21 +311,21 @@ $(document).ready(() => { const schedulerTable = $("#schedulerTable"); const schedulerDataTable = schedulerTable.DataTable({ columns: [ - { - title: "Report Unique ID", - name: "id", - data: "id", - }, { title: "Report Title", name: "report_title", data: "params.report_title", }, { - title: "Report Name", - name: "report_name", - data: "params.report_name", + title: "Report Unique ID", + name: "id", + data: "id", }, + // { + // title: "Report Name", + // name: "report_name", + // data: "params.report_name", + // }, { title: "Cron Schedule", name: "cron_schedule", diff --git a/notebooker/web/static/package.json b/notebooker/web/static/package.json index 00facf89..859c4512 100644 --- a/notebooker/web/static/package.json +++ b/notebooker/web/static/package.json @@ -1,6 +1,6 @@ { "name": "notebooker", - "version": "0.6.3", + "version": "0.6.4", "description": "Notebooker - Turn notebooks into reports", "dependencies": { "bootstrap-table": "1.20.2", diff --git a/notebooker/web/templates/header.html b/notebooker/web/templates/header.html index 19097fe5..fdfc7677 100644 --- a/notebooker/web/templates/header.html +++ b/notebooker/web/templates/header.html @@ -82,7 +82,7 @@

Execute a notebook:

{% for report, subreports in all_reports|dictsort() recursive %} {% if subreports is none %} - {{ report }} + {{ report.split('/')[-1] }} {% else %} diff --git a/notebooker/web/templates/run_report.html b/notebooker/web/templates/run_report.html index 9ddc5dcb..a47542e6 100644 --- a/notebooker/web/templates/run_report.html +++ b/notebooker/web/templates/run_report.html @@ -16,7 +16,7 @@

Customise your report

Report Title:

- +

Existing parameters:

@@ -34,6 +34,10 @@

Override parameters:

+
+ + +

Email result to:

Dict[str, U """ Creates a nested dictionary that represents the folder structure of rootdir """ + categorization = current_app.config.get("CATEGORIZATION", False) starting_point = starting_point or _get_python_template_dir() all_dirs = {} rootdir = starting_point.rstrip(os.sep) start = rootdir.rfind(os.sep) + 1 - for path, dirs, files in os.walk(rootdir): + + for path, _, files in os.walk(rootdir): if not _valid_dirname(path): continue folders = path[start:].split(os.sep) - subdir = { - os.sep.join(folders[1:] + [f.replace(".ipynb", "").replace(".py", "")]): None - for f in files - if _valid_filename(f) + subdir = {} + parent = all_dirs + + for f in files: + full_path = os.path.join(starting_point, *folders[1:], f) + if _valid_filename(f): + if categorization: + category = _extract_category(full_path) + if category: + parent.setdefault(rootdir.split(os.sep)[-1], {}).setdefault(category, {})[ + os.path.join(*folders[1:], f) + ] = None + else: + subdir[os.path.join(*folders[1:], f)] = None + + for folder in folders[:-1]: + if folder not in parent: + parent[folder] = {} + parent = parent[folder] + if not categorization: + parent[folders[-1]] = subdir + + if categorization: + all_dirs = filter_for_code_files(all_dirs) + path_to_category_name = { + name: original_key + for original_key, sub_dict in all_dirs.get(rootdir.split(os.sep)[-1], {}).items() + for name, value in sub_dict.items() + if value is None } - parent = reduce(dict.get, folders[:-1], all_dirs) - parent[folders[-1]] = subdir - return all_dirs[rootdir[start:]] + current_app.config["PATH_TO_CATEGORY_DICT"] = path_to_category_name + + stripped = strip_extensions(all_dirs) + logger.info("Stripped directory structure %s", stripped) + return stripped.get(rootdir[start:], {}) + + +def strip_extensions(d): + def strip_extension(item): + """Strips .py or .ipynb extension from a given item, if present.""" + for ext in (".py", ".ipynb"): + if item.endswith(ext): + return item[: -len(ext)] + return item + + """ + Recursively removes .ipynb and .py extensions from all keys and values in the dictionary. + """ + + def process_dict(sub_d): + """ + Recursively processes each item in the dictionary to strip extensions from keys and values. + """ + new_dict = {} + for k, v in sub_d.items(): + new_key = strip_extension(k) + if isinstance(v, dict): + new_dict[new_key] = process_dict(v) # Recursive call for sub-dictionaries + elif isinstance(v, str): + new_dict[new_key] = strip_extension(v) # Strip extension from values if string + else: + new_dict[new_key] = v # Copy other values directly + return new_dict + + return process_dict(d) + + +def filter_for_code_files(d): + """ + Recursively filters a dictionary to retain only items that are either + .py or .ipynb files or directories leading to such files. + """ + + def has_code_files(sub_d): + """ + Determines whether a dictionary or its nested dictionaries contain + any .py or .ipynb files. + """ + if not isinstance(sub_d, dict): + return False + for k, v in sub_d.items(): + if isinstance(v, dict) and has_code_files(v): + return True + if k.endswith((".py", ".ipynb")): + return True + return False + + def filter_dict(sub_d): + """ + Recursively filters the dictionary to retain only keys leading to .py or .ipynb files, + or to dictionaries that do, direct or indirectly. + """ + new_dict = {} + for k, v in sub_d.items(): + if isinstance(v, dict): + filtered_sub_d = filter_dict(v) + if has_code_files(filtered_sub_d): # Retain if leads to code files + new_dict[k] = filtered_sub_d + elif k.endswith((".py", ".ipynb")): + new_dict[k] = v + return new_dict + + filtered_dict = filter_dict(d) + return strip_extensions(filtered_dict) def all_templates_flattened(): - templates = list(_gen_all_templates(get_all_possible_templates(warn_on_local=False))) - return templates + return list(_gen_all_templates(get_all_possible_templates(warn_on_local=False))) + + +def get_all_templates(): + if current_app.config["CATEGORIZATION"]: + return get_all_possible_templates(warn_on_local=False) + else: + return all_templates_flattened() diff --git a/tests/integration/test_execute_notebook.py b/tests/integration/test_execute_notebook.py index 4af44623..1ed991c2 100644 --- a/tests/integration/test_execute_notebook.py +++ b/tests/integration/test_execute_notebook.py @@ -97,6 +97,20 @@ def test_main(mongo_host, setup_and_cleanup_notebooker_filesystem, webapp_config "sad@email", "notebooker@example.com", ), + ( + [ + "--report-name", + "crashyreport", + "--category", + "cat1", + "--error-mailto", + "sad@email", + "--mailfrom", + "notebooker@example.com", + ], + "sad@email", + "notebooker@example.com", + ), (["--report-name", "crashyreport", "--mailfrom", "notebooker@example.com"], None, "notebooker@example.com"), ], ) diff --git a/tests/integration/test_templates.py b/tests/integration/test_templates.py index 284f70ba..881546e1 100644 --- a/tests/integration/test_templates.py +++ b/tests/integration/test_templates.py @@ -1,18 +1,94 @@ -from notebooker.web.utils import get_all_possible_templates - - -def test_get_all_possible_templates(flask_app): - flask_app.config["PY_TEMPLATE_BASE_DIR"] = None - with flask_app.app_context(): - assert get_all_possible_templates() == { - "sample": { - "sample/plot_random": None, - "sample/test_plotly": None, - "sample/plot_random_raw": None, - "sample/slideshow_test": None, - }, - "other_folder": { - "other_folder/other_slideshow_test": None, - "this one has spaces": {"other_folder/this one has spaces/plot_random2": None}, +import os +import pytest +import shutil +import tempfile + +from flask import Flask + +from notebooker.utils.filesystem import mkdir_p +from notebooker.utils.templates import _valid_dirname +from notebooker.web.utils import get_directory_structure + + +@pytest.fixture +def app_context(): + app = Flask(__name__) + # Configure your app for testing here + ctx = app.app_context() + ctx.push() # Pushes the application context + yield app # This makes the app available to the test functions + ctx.pop() # Removes the application context after test completion + + +@pytest.mark.parametrize( + "input_dirname, expected_result", + [ + ("./my_directory", True), + ("../hello_world/a/b/c/", True), + (".git/blah", False), + ("../.git/hello/world", False), + ("normal/path/to/something", True), + ("/absolute/path/.git", False), + ("/absolute/path/git", True), + ], +) +def test_valid_dirnames(input_dirname, expected_result): + assert _valid_dirname(input_dirname) is expected_result + + +def test_get_directory_structure(app_context): + temp_dir = tempfile.mkdtemp() + try: + paths = [ + "hello.py", + "goodbye.py", + "depth/1.py", + "this/is/very/deep.py", + "depth/2.py", + "this/is/deep.py", + "this/report.py", + "hello_again.ipynb", + "depth/3.ipynb", + ".hidden/4.ipynb", + ".hidden/visible/5.ipynb", + ".hidden/.more-hidden/6.ipynb", + "./visible/7.ipynb", + "this/is/../is/8.ipynb", + ] + for path in paths: + abspath = os.path.join(temp_dir, path) + if "/" in path: + mkdir_p(os.path.dirname(abspath)) + with open(abspath, "w") as f: + f.write("#hello") + expected_structure = { + "hello": None, + "goodbye": None, + "depth": {"depth/1": None, "depth/2": None, "depth/3": None}, + "this": { + "this/report": None, + "is": {"this/is/8": None, "this/is/deep": None, "very": {"this/is/very/deep": None}}, }, + "hello_again": None, + "visible": {"visible/7": None}, } + + assert get_directory_structure(temp_dir) == expected_structure + finally: + shutil.rmtree(temp_dir) + + +def test_get_directory_structure_categorized(app_context): + app_context.config["CATEGORIZATION"] = True + temp_dir = tempfile.mkdtemp() + try: + expected_structure = { + "cat1": {"cat1_nb": None, "subdir/cat1_nb_subdir": None}, + "cat2": {"cat2_nb": None, "subdir/cat2_nb_subdir": None}, + } + + templates_path = os.path.join(os.path.dirname(__file__), "templates") + actual_structure = get_directory_structure(templates_path) + assert actual_structure == expected_structure + finally: + shutil.rmtree(temp_dir) diff --git a/tests/integration/web/routes/test_core.py b/tests/integration/web/routes/test_core.py index a92349cb..6af22b3f 100644 --- a/tests/integration/web/routes/test_core.py +++ b/tests/integration/web/routes/test_core.py @@ -14,7 +14,7 @@ def test_create_schedule(flask_app, setup_workspace): rv = client.get("/core/all_possible_templates_flattened") assert rv.status_code == 200 data = json.loads(rv.data) - assert data == {"result": ["fake/py_report", "fake/ipynb_report", "fake/report_failing"]} + assert sorted(data["result"]) == sorted(["fake/py_report", "fake/ipynb_report", "fake/report_failing"]) def test_version_number(flask_app, setup_workspace): @@ -67,6 +67,7 @@ def test_get_all_templates_with_results(flask_app, setup_workspace): status=JobStatus.DONE, overrides={"param1": "big"}, scheduler_job_id="ohboy_it's_a_schedule", + category=None, ), NotebookResultError( job_id="job2", @@ -85,10 +86,12 @@ def test_get_all_templates_with_results(flask_app, setup_workspace): data = json.loads(rv.data) assert data == { "Report Name": { + "category": None, "count": 2, "scheduler_runs": 1, "report_name": "report_name", "latest_run": "Sat, 02 Jan 2021 00:00:00 GMT", + "original_report": "report_name", "time_diff": "1 month", } } @@ -160,6 +163,7 @@ def test_get_all_templates_with_results_then_delete(flask_app, setup_workspace): status=JobStatus.DONE, overrides={"param1": "big"}, scheduler_job_id="ohboy_it's_a_schedule", + category=None, ), NotebookResultError( job_id="job2", @@ -185,17 +189,21 @@ def test_get_all_templates_with_results_then_delete(flask_app, setup_workspace): data = json.loads(rv.data) assert data == { "Bad Report": { + "category": None, "count": 1, "scheduler_runs": 0, "report_name": "BadReport", "latest_run": "Thu, 02 Jan 2014 00:00:00 GMT", + "original_report": "BadReport", "time_diff": "7 years", }, "Report Name": { + "category": None, "count": 2, "scheduler_runs": 1, "report_name": "report_name", "latest_run": "Sat, 02 Jan 2021 00:00:00 GMT", + "original_report": "report_name", "time_diff": "1 month", }, } @@ -208,10 +216,12 @@ def test_get_all_templates_with_results_then_delete(flask_app, setup_workspace): data = json.loads(rv.data) assert data == { "Report Name": { + "category": None, "count": 1, "scheduler_runs": 0, "report_name": "report_name", "latest_run": "Sat, 02 Jan 2021 00:00:00 GMT", + "original_report": "report_name", "time_diff": "1 month", } } diff --git a/tests/integration/web/test_run_report.py b/tests/integration/web/test_run_report.py index 0dcfa202..3be6ddbc 100644 --- a/tests/integration/web/test_run_report.py +++ b/tests/integration/web/test_run_report.py @@ -19,6 +19,7 @@ def test_run_report_json_parameters(flask_app, setup_workspace): is_slideshow = True scheduler_job_id = "abc/123" email_subject = "Subject" + category = "" payload = { "overrides": json.dumps(overrides), "report_title": report_title, @@ -50,6 +51,7 @@ def test_run_report_json_parameters(flask_app, setup_workspace): mailfrom=mailfrom, is_slideshow=is_slideshow, email_subject=email_subject, + category=category, ) diff --git a/tests/integration/web/test_scheduling.py b/tests/integration/web/test_scheduling.py index 2bb2e06a..b7506d5a 100644 --- a/tests/integration/web/test_scheduling.py +++ b/tests/integration/web/test_scheduling.py @@ -41,6 +41,7 @@ def test_create_schedule(flask_app, setup_workspace, report_name): "scheduler_job_id": f"{report_name}_test2", "mailfrom": "test@example.com", "email_subject": "Subject", + "category": "", }, "trigger": { "fields": { diff --git a/tests/unit/test_run_report.py b/tests/unit/test_run_report.py index c0c054c8..a692e6c9 100644 --- a/tests/unit/test_run_report.py +++ b/tests/unit/test_run_report.py @@ -66,6 +66,7 @@ def test_validate_run_params(): mailfrom="test@example.com", is_slideshow=True, email_subject="Subject of the email", + category="", ) actual_output = validate_run_params("lovely_report_name", input_params, issues) assert issues == [] diff --git a/tests/unit/utils/templates/cat1_nb.ipynb b/tests/unit/utils/templates/cat1_nb.ipynb new file mode 100644 index 00000000..082a65fd --- /dev/null +++ b/tests/unit/utils/templates/cat1_nb.ipynb @@ -0,0 +1,41 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aab4d036-a4e4-4ab0-bb49-6aceaff8525a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": ["category=cat1"] + }, + "outputs": [], + "source": [ + "Test notebook without notebooker tag" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8 (py38)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/unit/utils/templates/cat2_nb.ipynb b/tests/unit/utils/templates/cat2_nb.ipynb new file mode 100644 index 00000000..bc37ab03 --- /dev/null +++ b/tests/unit/utils/templates/cat2_nb.ipynb @@ -0,0 +1,41 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aab4d036-a4e4-4ab0-bb49-6aceaff8525a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": ["category=cat2","parameters"] + }, + "outputs": [], + "source": [ + "Test notebook without notebooker tag" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8 (py38)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/unit/utils/templates/no_cat.ipynb b/tests/unit/utils/templates/no_cat.ipynb new file mode 100644 index 00000000..dacdb67c --- /dev/null +++ b/tests/unit/utils/templates/no_cat.ipynb @@ -0,0 +1,41 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aab4d036-a4e4-4ab0-bb49-6aceaff8525a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "Test notebook without notebooker tag" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8 (py38)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/unit/utils/templates/subdir/cat1_nb_subdir.ipynb b/tests/unit/utils/templates/subdir/cat1_nb_subdir.ipynb new file mode 100644 index 00000000..81f5f8f1 --- /dev/null +++ b/tests/unit/utils/templates/subdir/cat1_nb_subdir.ipynb @@ -0,0 +1,41 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aab4d036-a4e4-4ab0-bb49-6aceaff8525a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": ["parameters","category=cat1"] + }, + "outputs": [], + "source": [ + "Test notebook without notebooker tag" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8 (py38)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/unit/utils/templates/subdir/cat2_nb_subdir.ipynb b/tests/unit/utils/templates/subdir/cat2_nb_subdir.ipynb new file mode 100644 index 00000000..d6a4ff0c --- /dev/null +++ b/tests/unit/utils/templates/subdir/cat2_nb_subdir.ipynb @@ -0,0 +1,41 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aab4d036-a4e4-4ab0-bb49-6aceaff8525a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": ["category = cat2"] + }, + "outputs": [], + "source": [ + "Test notebook without notebooker tag" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8 (py38)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/tests/unit/utils/templates/subdir/no_cat_nb_subdir.ipynb b/tests/unit/utils/templates/subdir/no_cat_nb_subdir.ipynb new file mode 100644 index 00000000..dacdb67c --- /dev/null +++ b/tests/unit/utils/templates/subdir/no_cat_nb_subdir.ipynb @@ -0,0 +1,41 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "aab4d036-a4e4-4ab0-bb49-6aceaff8525a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "Test notebook without notebooker tag" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8 (py38)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.19" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}