From 76e6b1c0d495e3f5526dfaa1c7a200569b94295b Mon Sep 17 00:00:00 2001 From: Will Dean Date: Fri, 28 Feb 2025 10:41:15 +0100 Subject: [PATCH 1/2] initial work --- scripts/slowest_tests/extract-slow-tests.py | 80 ++++++++++ .../update-slowest-times-issue.sh | 139 ++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 scripts/slowest_tests/extract-slow-tests.py create mode 100644 scripts/slowest_tests/update-slowest-times-issue.sh diff --git a/scripts/slowest_tests/extract-slow-tests.py b/scripts/slowest_tests/extract-slow-tests.py new file mode 100644 index 0000000000..793b0f9b20 --- /dev/null +++ b/scripts/slowest_tests/extract-slow-tests.py @@ -0,0 +1,80 @@ +"""Parse the GitHub action log for test times. + +Taken from https://github.com/pymc-labs/pymc-marketing/tree/main/scripts/slowest_tests/extract-slow-tests.py + +""" + +import re +import sys + +from pathlib import Path + +start_pattern = re.compile(r"==== slow") +separator_pattern = re.compile(r"====") +time_pattern = re.compile(r"(\d+\.\d+)s ") + + +def extract_lines(lines: list[str]) -> list[str]: + times = [] + + in_section = False + for line in lines: + detect_start = start_pattern.search(line) + detect_end = separator_pattern.search(line) + + if detect_start: + in_section = True + + if in_section: + times.append(line) + + if not detect_start and in_section and detect_end: + break + + return times + + +def trim_up_to_match(pattern, string: str) -> str: + match = pattern.search(string) + if not match: + return "" + + return string[match.start() :] + + +def trim(pattern, lines: list[str]) -> list[str]: + return [trim_up_to_match(pattern, line) for line in lines] + + +def strip_ansi(text: str) -> str: + ansi_escape = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") + return ansi_escape.sub("", text) + + +def format_times(times: list[str]) -> list[str]: + return ( + trim(separator_pattern, times[:1]) + + trim(time_pattern, times[1:-1]) + + [strip_ansi(line) for line in trim(separator_pattern, times[-1:])] + ) + + +def read_lines_from_stdin(): + return sys.stdin.read().splitlines() + + +def read_from_file(file: Path): + """For testing purposes.""" + return file.read_text().splitlines() + + +def main(read_lines): + lines = read_lines() + times = extract_lines(lines) + parsed_times = format_times(times) + print("\n".join(parsed_times)) # noqa: T201 + + +if __name__ == "__main__": + read_lines = read_lines_from_stdin + main(read_lines) diff --git a/scripts/slowest_tests/update-slowest-times-issue.sh b/scripts/slowest_tests/update-slowest-times-issue.sh new file mode 100644 index 0000000000..6d63d3b8a6 --- /dev/null +++ b/scripts/slowest_tests/update-slowest-times-issue.sh @@ -0,0 +1,139 @@ +#!/bin/zsh + +DRY_RUN=false + +owner=pymc-devs +repo=pymc +issue_number=7686 +title="Speed up test times :rocket:" +workflow=tests +contributing_url="https://www.pymc.io/projects/docs/en/stable/contributing/index.html" +action_url="https://github.com/$owner/$repo/blob/main/.github/workflows/slow-tests-issue.yml" +latest_id=$(gh run list --limit 30 --workflow $workflow --status success --json databaseId,startedAt,updatedAt --jq ' +. | map({ + databaseId: .databaseId, + startedAt: .startedAt, + updatedAt: .updatedAt, + minutes: (((.updatedAt | fromdate) - (.startedAt | fromdate)) / 60) +} | select(.minutes > 10)) +| .[0].databaseId +') +jobs=$(gh api /repos/$owner/$repo/actions/runs/$latest_id/jobs --jq ' +.jobs +') +# | map({name: .name, run_id: .run_id, id: .id, started_at: .started_at, completed_at: .completed_at}) + +echo $jobs + +# # Skip 3.10, float32, and Benchmark tests +# function skip_job() { +# name=$1 +# # if [[ $name == *"py3.10"* ]]; then +# # return 0 +# # fi +# # +# # if [[ $name == *"float32 1"* ]]; then +# # return 0 +# # fi +# # +# # if [[ $name == *"Benchmark"* ]]; then +# # return 0 +# # fi +# +# return 1 +# } +# +# # Remove common prefix from the name +# function remove_prefix() { +# name=$1 +# echo $name +# # echo $name | sed -e 's/^ubuntu-latest test py3.12 numpy>=2.0 : fast-compile 0 : float32 0 : //' +# } +# +# function human_readable_time() { +# started_at=$1 +# completed_at=$2 +# +# start_seconds=$(date -d "$started_at" +%s) +# end_seconds=$(date -d "$completed_at" +%s) +# +# seconds=$(($end_seconds - $start_seconds)) +# +# if [ $seconds -lt 60 ]; then +# echo "$seconds seconds" +# else +# echo "$(date -u -d @$seconds +'%-M minutes %-S seconds')" +# fi +# } +# +# all_times="" +# echo "$jobs" | jq -c '.[]' | while read -r job; do +# id=$(echo $job | jq -r '.id') +# name=$(echo $job | jq -r '.name') +# run_id=$(echo $job | jq -r '.run_id') +# started_at=$(echo $job | jq -r '.started_at') +# completed_at=$(echo $job | jq -r '.completed_at') +# +# if skip_job $name; then +# echo "Skipping $name" +# continue +# fi +# +# echo "Processing job: $name (ID: $id, Run ID: $run_id)" +# +# # Seeing a bit more stabilty with the API rather than the CLI +# # https://docs.github.com/en/rest/actions/workflow-jobs?apiVersion=2022-11-28#download-job-logs-for-a-workflow-run +# times=$(gh api /repos/$owner/$repo/actions/jobs/$id/logs | python extract-slow-tests.py) +# # times=$(gh run view --job $id --log | python extract-slow-tests.py) +# +# if [ -z "$times" ]; then +# # Some of the jobs are non-test jobs, so we skip them +# echo "No tests found for '$name', skipping" +# continue +# fi +# +# echo $times +# +# human_readable=$(human_readable_time $started_at $completed_at) +# name=$(remove_prefix $name) +# +# top="
($human_readable) $name\n\n\n\`\`\`" +# bottom="\`\`\`\n\n
" +# +# formatted_times="$top\n$times\n$bottom" +# +# if [ -n "$all_times" ]; then +# all_times="$all_times\n$formatted_times" +# else +# all_times="$formatted_times" +# fi +# done +# +# # if [ -z "$all_times" ]; then +# # echo "No slow tests found, exiting" +# # exit 1 +# # fi +# +# run_date=$(date +"%Y-%m-%d") +# body=$(cat << EOF +# If you are motivated to help speed up some tests, we would appreciate it! +# +# Here are some of the slowest test times: +# +# $all_times +# +# You can find more information on how to contribute [here]($contributing_url) +# +# Automatically generated by [GitHub Action]($action_url) +# Latest run date: $run_date +# Run logs: [$latest_id](https://github.com/$owner/$repo/actions/runs/$latest_id) +# EOF +# ) +# +# if [ "$DRY_RUN" = true ]; then +# echo "Dry run, not updating issue" +# echo $body +# exit +# fi +# echo $body | gh issue edit $issue_number --body-file - --title "$title" +# echo "Updated issue $issue_number with all times" From 10a5d3ce404a6040d9e0cc386703b43221da90fb Mon Sep 17 00:00:00 2001 From: Will Dean Date: Mon, 24 Mar 2025 09:56:55 +0100 Subject: [PATCH 2/2] handle character --- .../update-slowest-times-issue.sh | 238 +++++++++--------- 1 file changed, 119 insertions(+), 119 deletions(-) diff --git a/scripts/slowest_tests/update-slowest-times-issue.sh b/scripts/slowest_tests/update-slowest-times-issue.sh index 6d63d3b8a6..be9d18bb42 100644 --- a/scripts/slowest_tests/update-slowest-times-issue.sh +++ b/scripts/slowest_tests/update-slowest-times-issue.sh @@ -18,122 +18,122 @@ latest_id=$(gh run list --limit 30 --workflow $workflow --status success --json } | select(.minutes > 10)) | .[0].databaseId ') -jobs=$(gh api /repos/$owner/$repo/actions/runs/$latest_id/jobs --jq ' -.jobs -') -# | map({name: .name, run_id: .run_id, id: .id, started_at: .started_at, completed_at: .completed_at}) - -echo $jobs - -# # Skip 3.10, float32, and Benchmark tests -# function skip_job() { -# name=$1 -# # if [[ $name == *"py3.10"* ]]; then -# # return 0 -# # fi -# # -# # if [[ $name == *"float32 1"* ]]; then -# # return 0 -# # fi -# # -# # if [[ $name == *"Benchmark"* ]]; then -# # return 0 -# # fi -# -# return 1 -# } -# -# # Remove common prefix from the name -# function remove_prefix() { -# name=$1 -# echo $name -# # echo $name | sed -e 's/^ubuntu-latest test py3.12 numpy>=2.0 : fast-compile 0 : float32 0 : //' -# } -# -# function human_readable_time() { -# started_at=$1 -# completed_at=$2 -# -# start_seconds=$(date -d "$started_at" +%s) -# end_seconds=$(date -d "$completed_at" +%s) -# -# seconds=$(($end_seconds - $start_seconds)) -# -# if [ $seconds -lt 60 ]; then -# echo "$seconds seconds" -# else -# echo "$(date -u -d @$seconds +'%-M minutes %-S seconds')" -# fi -# } -# -# all_times="" -# echo "$jobs" | jq -c '.[]' | while read -r job; do -# id=$(echo $job | jq -r '.id') -# name=$(echo $job | jq -r '.name') -# run_id=$(echo $job | jq -r '.run_id') -# started_at=$(echo $job | jq -r '.started_at') -# completed_at=$(echo $job | jq -r '.completed_at') -# -# if skip_job $name; then -# echo "Skipping $name" -# continue -# fi -# -# echo "Processing job: $name (ID: $id, Run ID: $run_id)" -# -# # Seeing a bit more stabilty with the API rather than the CLI -# # https://docs.github.com/en/rest/actions/workflow-jobs?apiVersion=2022-11-28#download-job-logs-for-a-workflow-run -# times=$(gh api /repos/$owner/$repo/actions/jobs/$id/logs | python extract-slow-tests.py) -# # times=$(gh run view --job $id --log | python extract-slow-tests.py) -# -# if [ -z "$times" ]; then -# # Some of the jobs are non-test jobs, so we skip them -# echo "No tests found for '$name', skipping" -# continue -# fi -# -# echo $times -# -# human_readable=$(human_readable_time $started_at $completed_at) -# name=$(remove_prefix $name) -# -# top="
($human_readable) $name\n\n\n\`\`\`" -# bottom="\`\`\`\n\n
" -# -# formatted_times="$top\n$times\n$bottom" -# -# if [ -n "$all_times" ]; then -# all_times="$all_times\n$formatted_times" -# else -# all_times="$formatted_times" -# fi -# done -# -# # if [ -z "$all_times" ]; then -# # echo "No slow tests found, exiting" -# # exit 1 -# # fi -# -# run_date=$(date +"%Y-%m-%d") -# body=$(cat << EOF -# If you are motivated to help speed up some tests, we would appreciate it! -# -# Here are some of the slowest test times: -# -# $all_times -# -# You can find more information on how to contribute [here]($contributing_url) -# -# Automatically generated by [GitHub Action]($action_url) -# Latest run date: $run_date -# Run logs: [$latest_id](https://github.com/$owner/$repo/actions/runs/$latest_id) -# EOF -# ) -# -# if [ "$DRY_RUN" = true ]; then -# echo "Dry run, not updating issue" -# echo $body -# exit -# fi -# echo $body | gh issue edit $issue_number --body-file - --title "$title" -# echo "Updated issue $issue_number with all times" +gh api /repos/$owner/$repo/actions/runs/$latest_id/jobs --jq ' + .jobs + | map({name, id, run_id, node_id, started_at, completed_at}) +' > tmp.json + +# Skip 3.10, float32, and Benchmark tests +function skip_job() { + name=$1 + # if [[ $name == *"py3.10"* ]]; then + # return 0 + # fi + # + # if [[ $name == *"float32 1"* ]]; then + # return 0 + # fi + # + # if [[ $name == *"Benchmark"* ]]; then + # return 0 + # fi + + return 1 +} + +# Remove common prefix from the name +function remove_prefix() { + name=$1 + echo $name + # echo $name | sed -e 's/^ubuntu-latest test py3.12 numpy>=2.0 : fast-compile 0 : float32 0 : //' +} + +function human_readable_time() { + started_at=$1 + completed_at=$2 + + start_seconds=$(date -d "$started_at" +%s) + end_seconds=$(date -d "$completed_at" +%s) + + seconds=$(($end_seconds - $start_seconds)) + + if [ $seconds -lt 60 ]; then + echo "$seconds seconds" + else + echo "$(date -u -d @$seconds +'%-M minutes %-S seconds')" + fi +} + + +all_times="" +cat tmp.json | jq -c '.[]' | while IFS= read -r job; do + id=$(printf '%s' "$job" | jq -r '.id') + name=$(printf '%s' "$job" | jq -r '.name') + run_id=$(printf '%s' "$job" | jq -r '.run_id') + started_at=$(printf '%s' "$job" | jq -r '.started_at') + completed_at=$(printf '%s' "$job" | jq -r '.completed_at') + + if skip_job $name; then + echo "Skipping $name" + continue + fi + + echo "Processing job: $name (ID: $id, Run ID: $run_id)" + + # Seeing a bit more stabilty with the API rather than the CLI + # https://docs.github.com/en/rest/actions/workflow-jobs?apiVersion=2022-11-28#download-job-logs-for-a-workflow-run + times=$(gh api /repos/$owner/$repo/actions/jobs/$id/logs | python extract-slow-tests.py) + # times=$(gh run view --job $id --log | python extract-slow-tests.py) + + if [ -z "$times" ]; then + # Some of the jobs are non-test jobs, so we skip them + echo "No tests found for '$name', skipping" + continue + fi + + echo $times + + human_readable=$(human_readable_time $started_at $completed_at) + name=$(remove_prefix $name) + + top="
($human_readable) $name\n\n\n\`\`\`" + bottom="\`\`\`\n\n
" + + formatted_times="$top\n$times\n$bottom" + + if [ -n "$all_times" ]; then + all_times="$all_times\n$formatted_times" + else + all_times="$formatted_times" + fi +done + +if [ -z "$all_times" ]; then + echo "No slow tests found, exiting" + exit 1 +fi + +run_date=$(date +"%Y-%m-%d") +body=$(cat << EOF +If you are motivated to help speed up some tests, we would appreciate it! + +Here are some of the slowest test times: + +$all_times + +You can find more information on how to contribute [here]($contributing_url) + +Automatically generated by [GitHub Action]($action_url) +Latest run date: $run_date +Run logs: [$latest_id](https://github.com/$owner/$repo/actions/runs/$latest_id) +EOF +) + +if [ "$DRY_RUN" = true ]; then + echo "Dry run, not updating issue" + echo $body + exit +fi + +echo $body | gh issue edit $issue_number --body-file - --title "$title" +echo "Updated issue $issue_number with all times"