From 76e6b1c0d495e3f5526dfaa1c7a200569b94295b Mon Sep 17 00:00:00 2001
From: Will Dean <wd60622@gmail.com>
Date: Fri, 28 Feb 2025 10:41:15 +0100
Subject: [PATCH 1/2] initial work

---
 scripts/slowest_tests/extract-slow-tests.py   |  80 ++++++++++
 .../update-slowest-times-issue.sh             | 139 ++++++++++++++++++
 2 files changed, 219 insertions(+)
 create mode 100644 scripts/slowest_tests/extract-slow-tests.py
 create mode 100644 scripts/slowest_tests/update-slowest-times-issue.sh

diff --git a/scripts/slowest_tests/extract-slow-tests.py b/scripts/slowest_tests/extract-slow-tests.py
new file mode 100644
index 0000000000..793b0f9b20
--- /dev/null
+++ b/scripts/slowest_tests/extract-slow-tests.py
@@ -0,0 +1,80 @@
+"""Parse the GitHub action log for test times.
+
+Taken from https://github.com/pymc-labs/pymc-marketing/tree/main/scripts/slowest_tests/extract-slow-tests.py
+
+"""
+
+import re
+import sys
+
+from pathlib import Path
+
+start_pattern = re.compile(r"==== slow")
+separator_pattern = re.compile(r"====")
+time_pattern = re.compile(r"(\d+\.\d+)s ")
+
+
+def extract_lines(lines: list[str]) -> list[str]:
+    times = []
+
+    in_section = False
+    for line in lines:
+        detect_start = start_pattern.search(line)
+        detect_end = separator_pattern.search(line)
+
+        if detect_start:
+            in_section = True
+
+        if in_section:
+            times.append(line)
+
+        if not detect_start and in_section and detect_end:
+            break
+
+    return times
+
+
+def trim_up_to_match(pattern, string: str) -> str:
+    match = pattern.search(string)
+    if not match:
+        return ""
+
+    return string[match.start() :]
+
+
+def trim(pattern, lines: list[str]) -> list[str]:
+    return [trim_up_to_match(pattern, line) for line in lines]
+
+
+def strip_ansi(text: str) -> str:
+    ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])")
+    return ansi_escape.sub("", text)
+
+
+def format_times(times: list[str]) -> list[str]:
+    return (
+        trim(separator_pattern, times[:1])
+        + trim(time_pattern, times[1:-1])
+        + [strip_ansi(line) for line in trim(separator_pattern, times[-1:])]
+    )
+
+
+def read_lines_from_stdin():
+    return sys.stdin.read().splitlines()
+
+
+def read_from_file(file: Path):
+    """For testing purposes."""
+    return file.read_text().splitlines()
+
+
+def main(read_lines):
+    lines = read_lines()
+    times = extract_lines(lines)
+    parsed_times = format_times(times)
+    print("\n".join(parsed_times))  # noqa: T201
+
+
+if __name__ == "__main__":
+    read_lines = read_lines_from_stdin
+    main(read_lines)
diff --git a/scripts/slowest_tests/update-slowest-times-issue.sh b/scripts/slowest_tests/update-slowest-times-issue.sh
new file mode 100644
index 0000000000..6d63d3b8a6
--- /dev/null
+++ b/scripts/slowest_tests/update-slowest-times-issue.sh
@@ -0,0 +1,139 @@
+#!/bin/zsh
+
+DRY_RUN=false
+
+owner=pymc-devs
+repo=pymc
+issue_number=7686
+title="Speed up test times :rocket:"
+workflow=tests
+contributing_url="https://www.pymc.io/projects/docs/en/stable/contributing/index.html"
+action_url="https://github.com/$owner/$repo/blob/main/.github/workflows/slow-tests-issue.yml"
+latest_id=$(gh run list --limit 30 --workflow $workflow --status success --json databaseId,startedAt,updatedAt --jq '
+. | map({
+  databaseId: .databaseId,
+  startedAt: .startedAt,
+  updatedAt: .updatedAt,
+  minutes:  (((.updatedAt | fromdate) - (.startedAt | fromdate)) / 60)
+} | select(.minutes > 10))
+| .[0].databaseId
+')
+jobs=$(gh api /repos/$owner/$repo/actions/runs/$latest_id/jobs --jq '
+.jobs
+')
+# | map({name: .name, run_id: .run_id, id: .id, started_at: .started_at, completed_at: .completed_at})
+
+echo $jobs
+
+# # Skip 3.10, float32, and Benchmark tests
+# function skip_job() {
+#     name=$1
+#     # if [[ $name == *"py3.10"* ]]; then
+#     #     return 0
+#     # fi
+#     #
+#     # if [[ $name == *"float32 1"* ]]; then
+#     #     return 0
+#     # fi
+#     #
+#     # if [[ $name == *"Benchmark"* ]]; then
+#     #     return 0
+#     # fi
+#
+#     return 1
+# }
+#
+# # Remove common prefix from the name
+# function remove_prefix() {
+#     name=$1
+#     echo $name
+#     # echo $name | sed -e 's/^ubuntu-latest test py3.12 numpy>=2.0 : fast-compile 0 : float32 0 : //'
+# }
+#
+# function human_readable_time() {
+#     started_at=$1
+#     completed_at=$2
+#
+#     start_seconds=$(date -d "$started_at" +%s)
+#     end_seconds=$(date -d "$completed_at" +%s)
+#
+#     seconds=$(($end_seconds - $start_seconds))
+#
+#     if [ $seconds -lt 60 ]; then
+#         echo "$seconds seconds"
+#     else
+#         echo "$(date -u -d @$seconds +'%-M minutes %-S seconds')"
+#     fi
+# }
+#
+# all_times=""
+# echo "$jobs" | jq -c '.[]' | while read -r job; do
+#     id=$(echo $job | jq -r '.id')
+#     name=$(echo $job | jq -r '.name')
+#     run_id=$(echo $job | jq -r '.run_id')
+#     started_at=$(echo $job | jq -r '.started_at')
+#     completed_at=$(echo $job | jq -r '.completed_at')
+#
+#     if skip_job $name; then
+#         echo "Skipping $name"
+#         continue
+#     fi
+#
+#     echo "Processing job: $name (ID: $id, Run ID: $run_id)"
+#
+#     # Seeing a bit more stabilty with the API rather than the CLI
+#     # https://docs.github.com/en/rest/actions/workflow-jobs?apiVersion=2022-11-28#download-job-logs-for-a-workflow-run
+#     times=$(gh api /repos/$owner/$repo/actions/jobs/$id/logs | python extract-slow-tests.py)
+#     # times=$(gh run view --job $id --log | python extract-slow-tests.py)
+#
+#     if [ -z "$times" ]; then
+#         # Some of the jobs are non-test jobs, so we skip them
+#         echo "No tests found for '$name', skipping"
+#         continue
+#     fi
+#
+#     echo $times
+#
+#     human_readable=$(human_readable_time $started_at $completed_at)
+#     name=$(remove_prefix $name)
+#
+#     top="<details><summary>($human_readable) $name</summary>\n\n\n\`\`\`"
+#     bottom="\`\`\`\n\n</details>"
+#
+#     formatted_times="$top\n$times\n$bottom"
+#
+#     if [ -n "$all_times" ]; then
+#         all_times="$all_times\n$formatted_times"
+#     else
+#         all_times="$formatted_times"
+#     fi
+# done
+#
+# # if [ -z "$all_times" ]; then
+# #     echo "No slow tests found, exiting"
+# #     exit 1
+# # fi
+#
+# run_date=$(date +"%Y-%m-%d")
+# body=$(cat << EOF
+# If you are motivated to help speed up some tests, we would appreciate it!
+#
+# Here are some of the slowest test times:
+#
+# $all_times
+#
+# You can find more information on how to contribute [here]($contributing_url)
+#
+# Automatically generated by [GitHub Action]($action_url)
+# Latest run date: $run_date
+# Run logs: [$latest_id](https://github.com/$owner/$repo/actions/runs/$latest_id)
+# EOF
+# )
+#
+# if [ "$DRY_RUN" = true ]; then
+#     echo "Dry run, not updating issue"
+#     echo $body
+#     exit
+# fi
+# echo $body | gh issue edit $issue_number --body-file - --title "$title"
+# echo "Updated issue $issue_number with all times"

From 10a5d3ce404a6040d9e0cc386703b43221da90fb Mon Sep 17 00:00:00 2001
From: Will Dean <wd60622@gmail.com>
Date: Mon, 24 Mar 2025 09:56:55 +0100
Subject: [PATCH 2/2] handle character

---
 .../update-slowest-times-issue.sh             | 238 +++++++++---------
 1 file changed, 119 insertions(+), 119 deletions(-)

diff --git a/scripts/slowest_tests/update-slowest-times-issue.sh b/scripts/slowest_tests/update-slowest-times-issue.sh
index 6d63d3b8a6..be9d18bb42 100644
--- a/scripts/slowest_tests/update-slowest-times-issue.sh
+++ b/scripts/slowest_tests/update-slowest-times-issue.sh
@@ -18,122 +18,122 @@ latest_id=$(gh run list --limit 30 --workflow $workflow --status success --json
 } | select(.minutes > 10))
 | .[0].databaseId
 ')
-jobs=$(gh api /repos/$owner/$repo/actions/runs/$latest_id/jobs --jq '
-.jobs
-')
-# | map({name: .name, run_id: .run_id, id: .id, started_at: .started_at, completed_at: .completed_at})
-
-echo $jobs
-
-# # Skip 3.10, float32, and Benchmark tests
-# function skip_job() {
-#     name=$1
-#     # if [[ $name == *"py3.10"* ]]; then
-#     #     return 0
-#     # fi
-#     #
-#     # if [[ $name == *"float32 1"* ]]; then
-#     #     return 0
-#     # fi
-#     #
-#     # if [[ $name == *"Benchmark"* ]]; then
-#     #     return 0
-#     # fi
-#
-#     return 1
-# }
-#
-# # Remove common prefix from the name
-# function remove_prefix() {
-#     name=$1
-#     echo $name
-#     # echo $name | sed -e 's/^ubuntu-latest test py3.12 numpy>=2.0 : fast-compile 0 : float32 0 : //'
-# }
-#
-# function human_readable_time() {
-#     started_at=$1
-#     completed_at=$2
-#
-#     start_seconds=$(date -d "$started_at" +%s)
-#     end_seconds=$(date -d "$completed_at" +%s)
-#
-#     seconds=$(($end_seconds - $start_seconds))
-#
-#     if [ $seconds -lt 60 ]; then
-#         echo "$seconds seconds"
-#     else
-#         echo "$(date -u -d @$seconds +'%-M minutes %-S seconds')"
-#     fi
-# }
-#
-# all_times=""
-# echo "$jobs" | jq -c '.[]' | while read -r job; do
-#     id=$(echo $job | jq -r '.id')
-#     name=$(echo $job | jq -r '.name')
-#     run_id=$(echo $job | jq -r '.run_id')
-#     started_at=$(echo $job | jq -r '.started_at')
-#     completed_at=$(echo $job | jq -r '.completed_at')
-#
-#     if skip_job $name; then
-#         echo "Skipping $name"
-#         continue
-#     fi
-#
-#     echo "Processing job: $name (ID: $id, Run ID: $run_id)"
-#
-#     # Seeing a bit more stabilty with the API rather than the CLI
-#     # https://docs.github.com/en/rest/actions/workflow-jobs?apiVersion=2022-11-28#download-job-logs-for-a-workflow-run
-#     times=$(gh api /repos/$owner/$repo/actions/jobs/$id/logs | python extract-slow-tests.py)
-#     # times=$(gh run view --job $id --log | python extract-slow-tests.py)
-#
-#     if [ -z "$times" ]; then
-#         # Some of the jobs are non-test jobs, so we skip them
-#         echo "No tests found for '$name', skipping"
-#         continue
-#     fi
-#
-#     echo $times
-#
-#     human_readable=$(human_readable_time $started_at $completed_at)
-#     name=$(remove_prefix $name)
-#
-#     top="<details><summary>($human_readable) $name</summary>\n\n\n\`\`\`"
-#     bottom="\`\`\`\n\n</details>"
-#
-#     formatted_times="$top\n$times\n$bottom"
-#
-#     if [ -n "$all_times" ]; then
-#         all_times="$all_times\n$formatted_times"
-#     else
-#         all_times="$formatted_times"
-#     fi
-# done
-#
-# # if [ -z "$all_times" ]; then
-# #     echo "No slow tests found, exiting"
-# #     exit 1
-# # fi
-#
-# run_date=$(date +"%Y-%m-%d")
-# body=$(cat << EOF
-# If you are motivated to help speed up some tests, we would appreciate it!
-#
-# Here are some of the slowest test times:
-#
-# $all_times
-#
-# You can find more information on how to contribute [here]($contributing_url)
-#
-# Automatically generated by [GitHub Action]($action_url)
-# Latest run date: $run_date
-# Run logs: [$latest_id](https://github.com/$owner/$repo/actions/runs/$latest_id)
-# EOF
-# )
-#
-# if [ "$DRY_RUN" = true ]; then
-#     echo "Dry run, not updating issue"
-#     echo $body
-#     exit
-# fi
-# echo $body | gh issue edit $issue_number --body-file - --title "$title"
-# echo "Updated issue $issue_number with all times"
+gh api /repos/$owner/$repo/actions/runs/$latest_id/jobs --jq '
+ .jobs
+ | map({name, id, run_id, node_id, started_at, completed_at})
+' > tmp.json
+
+# Skip 3.10, float32, and Benchmark tests
+function skip_job() {
+    name=$1
+    # if [[ $name == *"py3.10"* ]]; then
+    #     return 0
+    # fi
+    #
+    # if [[ $name == *"float32 1"* ]]; then
+    #     return 0
+    # fi
+    #
+    # if [[ $name == *"Benchmark"* ]]; then
+    #     return 0
+    # fi
+
+    return 1
+}
+
+# Remove common prefix from the name
+function remove_prefix() {
+    name=$1
+    echo $name
+    # echo $name | sed -e 's/^ubuntu-latest test py3.12 numpy>=2.0 : fast-compile 0 : float32 0 : //'
+}
+
+function human_readable_time() {
+    started_at=$1
+    completed_at=$2
+
+    start_seconds=$(date -d "$started_at" +%s)
+    end_seconds=$(date -d "$completed_at" +%s)
+
+    seconds=$(($end_seconds - $start_seconds))
+
+    if [ $seconds -lt 60 ]; then
+        echo "$seconds seconds"
+    else
+        echo "$(date -u -d @$seconds +'%-M minutes %-S seconds')"
+    fi
+}
+
+
+all_times=""
+cat tmp.json | jq -c '.[]' | while IFS= read -r job; do
+    id=$(printf '%s' "$job" | jq -r '.id')
+    name=$(printf '%s' "$job" | jq -r '.name')
+    run_id=$(printf '%s' "$job" | jq -r '.run_id')
+    started_at=$(printf '%s' "$job" | jq -r '.started_at')
+    completed_at=$(printf '%s' "$job" | jq -r '.completed_at')
+
+    if skip_job $name; then
+        echo "Skipping $name"
+        continue
+    fi
+
+    echo "Processing job: $name (ID: $id, Run ID: $run_id)"
+
+    # Seeing a bit more stabilty with the API rather than the CLI
+    # https://docs.github.com/en/rest/actions/workflow-jobs?apiVersion=2022-11-28#download-job-logs-for-a-workflow-run
+    times=$(gh api /repos/$owner/$repo/actions/jobs/$id/logs | python extract-slow-tests.py)
+    # times=$(gh run view --job $id --log | python extract-slow-tests.py)
+
+    if [ -z "$times" ]; then
+        # Some of the jobs are non-test jobs, so we skip them
+        echo "No tests found for '$name', skipping"
+        continue
+    fi
+
+    echo $times
+
+    human_readable=$(human_readable_time $started_at $completed_at)
+    name=$(remove_prefix $name)
+
+    top="<details><summary>($human_readable) $name</summary>\n\n\n\`\`\`"
+    bottom="\`\`\`\n\n</details>"
+
+    formatted_times="$top\n$times\n$bottom"
+
+    if [ -n "$all_times" ]; then
+        all_times="$all_times\n$formatted_times"
+    else
+        all_times="$formatted_times"
+    fi
+done
+
+if [ -z "$all_times" ]; then
+    echo "No slow tests found, exiting"
+    exit 1
+fi
+
+run_date=$(date +"%Y-%m-%d")
+body=$(cat << EOF
+If you are motivated to help speed up some tests, we would appreciate it!
+
+Here are some of the slowest test times:
+
+$all_times
+
+You can find more information on how to contribute [here]($contributing_url)
+
+Automatically generated by [GitHub Action]($action_url)
+Latest run date: $run_date
+Run logs: [$latest_id](https://github.com/$owner/$repo/actions/runs/$latest_id)
+EOF
+)
+
+if [ "$DRY_RUN" = true ]; then
+    echo "Dry run, not updating issue"
+    echo $body
+    exit
+fi
+
+echo $body | gh issue edit $issue_number --body-file - --title "$title"
+echo "Updated issue $issue_number with all times"