Skip to content

Commit 3e06ade

Browse files
authored
Merge pull request #392 from Kobzol/build-cancel
Handle build cancellation errors properly
2 parents 25d721f + ff54f7c commit 3e06ade

File tree

13 files changed

+270
-310
lines changed

13 files changed

+270
-310
lines changed

src/bors/comment.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ pub fn no_try_build_in_progress_comment() -> Comment {
8686
Comment::new(":exclamation: There is currently no try build in progress.".to_string())
8787
}
8888

89-
pub fn unclean_try_build_cancelled_comment() -> Comment {
89+
pub fn try_build_cancelled_with_failed_workflow_cancel_comment() -> Comment {
9090
Comment::new(
9191
"Try build was cancelled. It was not possible to cancel some workflows.".to_string(),
9292
)

src/bors/handlers/info.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ mod tests {
182182
- Priority: 10
183183
- Mergeable: yes
184184
- Try build is in progress
185-
- Workflow URL: https://github.com/workflows/Workflow1/1
185+
- Workflow URL: https://github.com/rust-lang/borstest/actions/runs/1
186186
"
187187
);
188188
Ok(())

src/bors/handlers/pr_events.rs

Lines changed: 12 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,13 @@ use crate::bors::event::{
44
PullRequestMerged, PullRequestOpened, PullRequestPushed, PullRequestReadyForReview,
55
PullRequestReopened, PullRequestUnassigned, PushToBranch,
66
};
7-
use crate::bors::handlers::trybuild::cancel_build_workflows;
87
use crate::bors::handlers::unapprove_pr;
8+
use crate::bors::handlers::workflow::{AutoBuildCancelReason, maybe_cancel_auto_build};
99
use crate::bors::mergeable_queue::MergeableQueueSender;
1010
use crate::bors::{Comment, PullRequestStatus, RepositoryState};
11-
use crate::database::{BuildStatus, MergeableState};
11+
use crate::database::MergeableState;
1212
use crate::github::{CommitSha, PullRequestNumber};
1313
use crate::utils::text::pluralize;
14-
use octocrab::params::checks::CheckRunConclusion;
1514
use std::sync::Arc;
1615

1716
pub(super) async fn handle_pull_request_edited(
@@ -55,44 +54,13 @@ pub(super) async fn handle_push_to_pull_request(
5554

5655
mergeable_queue.enqueue(repo_state.repository().clone(), pr_number);
5756

58-
let mut auto_build_cancel_message: Option<String> = None;
59-
if let Some(auto_build) = &pr_model.auto_build {
60-
if auto_build.status == BuildStatus::Pending {
61-
tracing::info!("Cancelling auto build for PR {pr_number} due to push");
62-
63-
match cancel_build_workflows(
64-
&repo_state.client,
65-
db.as_ref(),
66-
auto_build,
67-
CheckRunConclusion::Cancelled,
68-
)
69-
.await
70-
{
71-
Ok(workflow_ids) => {
72-
tracing::info!("Auto build cancelled");
73-
74-
let workflow_urls = repo_state
75-
.client
76-
.get_workflow_urls(workflow_ids.into_iter());
77-
auto_build_cancel_message = Some(cancelled_workflows_message(workflow_urls));
78-
}
79-
Err(error) => {
80-
tracing::error!(
81-
"Could not cancel workflows for SHA {}: {error:?}",
82-
auto_build.commit_sha
83-
);
84-
85-
auto_build_cancel_message = Some(unclean_auto_build_cancelled_message());
86-
}
87-
};
88-
}
89-
90-
tracing::info!(
91-
"Deleting auto build {} for PR {pr_number} due to push",
92-
auto_build.id
93-
);
94-
db.delete_auto_build(&pr_model).await?;
95-
}
57+
let auto_build_cancel_message = maybe_cancel_auto_build(
58+
&repo_state.client,
59+
&db,
60+
&pr_model,
61+
AutoBuildCancelReason::PushToPR,
62+
)
63+
.await?;
9664

9765
if !pr_model.is_approved() {
9866
return Ok(());
@@ -314,21 +282,6 @@ PR will need to be re-approved."#,
314282
.await
315283
}
316284

317-
fn cancelled_workflows_message(workflow_urls: impl Iterator<Item = String>) -> String {
318-
let mut comment =
319-
r#"Auto build cancelled due to push to branch. Cancelled workflows:"#.to_string();
320-
for url in workflow_urls {
321-
comment += format!("\n- {}", url).as_str();
322-
}
323-
324-
comment
325-
}
326-
327-
fn unclean_auto_build_cancelled_message() -> String {
328-
"Auto build was cancelled due to push to branch. It was not possible to cancel some workflows."
329-
.to_string()
330-
}
331-
332285
#[cfg(test)]
333286
mod tests {
334287
use octocrab::params::checks::{CheckRunConclusion, CheckRunStatus};
@@ -758,32 +711,6 @@ mod tests {
758711
.await;
759712
}
760713

761-
#[sqlx::test]
762-
async fn delete_completed_auto_build_on_push(pool: sqlx::PgPool) {
763-
BorsBuilder::new(pool)
764-
.github(gh_state_with_merge_queue())
765-
.run_test(async |tester| {
766-
tester.post_comment("@bors r+").await?;
767-
tester.expect_comments(1).await;
768-
tester.process_merge_queue().await;
769-
tester.expect_comments(1).await;
770-
tester
771-
.wait_for_default_pr(|pr| pr.auto_build.is_some())
772-
.await?;
773-
tester.workflow_full_success(tester.auto_branch()).await?;
774-
tester.expect_comments(1).await;
775-
tester
776-
.push_to_pr(default_repo_name(), default_pr_number())
777-
.await?;
778-
tester.expect_comments(1).await;
779-
tester
780-
.wait_for_default_pr(|pr| pr.auto_build.is_none())
781-
.await?;
782-
Ok(())
783-
})
784-
.await;
785-
}
786-
787714
#[sqlx::test]
788715
async fn cancel_pending_auto_build_on_push_comment(pool: sqlx::PgPool) {
789716
BorsBuilder::new(pool)
@@ -804,7 +731,8 @@ mod tests {
804731
:warning: A new commit `pr-1-commit-1` was pushed to the branch, the
805732
PR will need to be re-approved.
806733
807-
Auto build cancelled due to push to branch. Cancelled workflows:
734+
Auto build cancelled due to push. Cancelled workflows:
735+
808736
- https://github.com/rust-lang/borstest/actions/runs/1
809737
");
810738
Ok(())
@@ -834,7 +762,7 @@ mod tests {
834762
:warning: A new commit `pr-1-commit-1` was pushed to the branch, the
835763
PR will need to be re-approved.
836764
837-
Auto build was cancelled due to push to branch. It was not possible to cancel some workflows.
765+
Auto build cancelled due to push. It was not possible to cancel some workflows.
838766
");
839767
Ok(())
840768
})

src/bors/handlers/refresh.rs

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ use std::collections::BTreeMap;
99
use crate::bors::PullRequestStatus;
1010
use crate::bors::RepositoryState;
1111
use crate::bors::comment::build_timed_out_comment;
12-
use crate::bors::handlers::trybuild::cancel_build_workflows;
12+
use crate::bors::handlers::workflow::{CancelBuildError, cancel_build};
1313
use crate::bors::mergeable_queue::MergeableQueueSender;
14+
use crate::database::{BuildModel, BuildStatus};
1415
use crate::{PgDbClient, TeamApiClient};
1516

1617
/// Go through pending builds and figure out if we need to do something about them:
@@ -20,33 +21,61 @@ pub async fn refresh_pending_builds(
2021
db: &PgDbClient,
2122
) -> anyhow::Result<()> {
2223
let running_builds = db.get_pending_builds(repo.repository()).await?;
23-
tracing::info!("Found {} running build(s)", running_builds.len());
24+
tracing::info!("Found {} pending build(s)", running_builds.len());
2425

2526
let timeout = repo.config.load().timeout;
2627
for build in running_builds {
27-
if elapsed_time(build.created_at) >= timeout {
28-
if let Some(pr) = db.find_pr_by_build(&build).await? {
29-
tracing::info!("Cancelling build {build:?}");
30-
if let Err(error) =
31-
cancel_build_workflows(&repo.client, db, &build, CheckRunConclusion::TimedOut)
32-
.await
33-
{
28+
if let Err(error) = refresh_build(&repo, db, &build, timeout).await {
29+
tracing::error!("Could not refresh pending build {build:?}: {error:?}");
30+
}
31+
}
32+
Ok(())
33+
}
34+
35+
async fn refresh_build(
36+
repo: &RepositoryState,
37+
db: &PgDbClient,
38+
build: &BuildModel,
39+
timeout: Duration,
40+
) -> anyhow::Result<()> {
41+
if elapsed_time(build.created_at) >= timeout {
42+
if let Some(pr) = db.find_pr_by_build(build).await? {
43+
tracing::info!("Cancelling build {build:?}");
44+
match cancel_build(&repo.client, db, build, CheckRunConclusion::TimedOut).await {
45+
Ok(_) => {}
46+
Err(
47+
CancelBuildError::FailedToMarkBuildAsCancelled(error)
48+
| CancelBuildError::FailedToCancelWorkflows(error),
49+
) => {
3450
tracing::error!(
3551
"Could not cancel workflows for SHA {}: {error:?}",
3652
build.commit_sha
3753
);
3854
}
55+
}
3956

40-
if let Err(error) = repo
41-
.client
42-
.post_comment(pr.number, build_timed_out_comment(timeout))
43-
.await
44-
{
45-
tracing::error!("Could not send comment to PR {}: {error:?}", pr.number);
46-
}
47-
} else {
48-
tracing::warn!("No PR found for build {build:?}");
57+
if let Err(error) = repo
58+
.client
59+
.post_comment(pr.number, build_timed_out_comment(timeout))
60+
.await
61+
{
62+
tracing::error!("Could not send comment to PR {}: {error:?}", pr.number);
4963
}
64+
} else {
65+
// This is an orphaned build. It should never be created, unless we have some bug or
66+
// unexpected race condition in bors.
67+
// When we do encounter such a build, we can mark it as timeouted, as it is no longer
68+
// relevant.
69+
// Note that we could write an explicit query for finding these orphaned builds,
70+
// but that could be quite expensive. Instead we piggyback on the existing logic
71+
// for timed out builds; if a build is still pending and has no PR attached, then
72+
// there likely won't be any additional event that could mark it as finished.
73+
// So eventually all such builds will arrive here
74+
tracing::warn!(
75+
"Detected orphaned pending without a PR, marking it as time outed: {build:?}"
76+
);
77+
db.update_build_status(build, BuildStatus::Timeouted)
78+
.await?;
5079
}
5180
}
5281
Ok(())

src/bors/handlers/review.rs

Lines changed: 11 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
use std::sync::Arc;
22

3-
use octocrab::params::checks::CheckRunConclusion;
4-
53
use crate::bors::RepositoryState;
64
use crate::bors::command::RollupMode;
75
use crate::bors::command::{Approver, CommandPrefix};
@@ -10,14 +8,14 @@ use crate::bors::comment::{
108
approved_comment, delegate_comment, delegate_try_builds_comment, unapprove_non_open_pr_comment,
119
};
1210
use crate::bors::handlers::labels::handle_label_trigger;
13-
use crate::bors::handlers::trybuild::cancel_build_workflows;
11+
use crate::bors::handlers::workflow::{AutoBuildCancelReason, maybe_cancel_auto_build};
1412
use crate::bors::handlers::{PullRequestData, deny_request};
1513
use crate::bors::handlers::{has_permission, unapprove_pr};
1614
use crate::bors::merge_queue::MergeQueueSender;
1715
use crate::bors::{Comment, PullRequestStatus};
16+
use crate::database::ApprovalInfo;
1817
use crate::database::DelegatedPermission;
1918
use crate::database::TreeState;
20-
use crate::database::{ApprovalInfo, BuildStatus};
2119
use crate::github::LabelTrigger;
2220
use crate::github::{GithubUser, PullRequestNumber};
2321
use crate::permissions::PermissionType;
@@ -137,41 +135,13 @@ pub(super) async fn command_unapprove(
137135
return Ok(());
138136
}
139137

140-
let mut auto_build_cancel_message: Option<String> = None;
141-
if let Some(auto_build) = &pr.db.auto_build {
142-
if auto_build.status != BuildStatus::Pending {
143-
return Ok(());
144-
}
145-
146-
tracing::info!("Cancelling auto build for PR {pr_num} due to push");
147-
148-
match cancel_build_workflows(
149-
&repo_state.client,
150-
db.as_ref(),
151-
auto_build,
152-
CheckRunConclusion::Cancelled,
153-
)
154-
.await
155-
{
156-
Ok(workflow_ids) => {
157-
tracing::info!("Auto build cancelled");
158-
159-
let workflow_urls = repo_state
160-
.client
161-
.get_workflow_urls(workflow_ids.into_iter());
162-
auto_build_cancel_message = Some(auto_build_cancelled_message(workflow_urls).await);
163-
}
164-
Err(error) => {
165-
tracing::error!(
166-
"Could not cancel workflows for SHA {}: {error:?}",
167-
auto_build.commit_sha
168-
);
169-
170-
auto_build_cancel_message = Some(unclean_auto_build_cancelled_message().await);
171-
}
172-
};
173-
}
174-
138+
let auto_build_cancel_message = maybe_cancel_auto_build(
139+
&repo_state.client,
140+
&db,
141+
&pr.db,
142+
AutoBuildCancelReason::Unapproval,
143+
)
144+
.await?;
175145
unapprove_pr(&repo_state, &db, &pr.db).await?;
176146
notify_of_unapproval(&repo_state, pr, auto_build_cancel_message).await?;
177147

@@ -392,20 +362,6 @@ async fn notify_of_delegation(
392362
repo.client.post_comment(pr_number, comment).await
393363
}
394364

395-
async fn auto_build_cancelled_message(workflow_urls: impl Iterator<Item = String>) -> String {
396-
let mut comment = r#"Auto build cancelled due to unapproval. Cancelled workflows:"#.to_string();
397-
for url in workflow_urls {
398-
comment += format!("\n- {}", url).as_str();
399-
}
400-
401-
comment
402-
}
403-
404-
async fn unclean_auto_build_cancelled_message() -> String {
405-
"Auto build was cancelled due to unapproval. It was not possible to cancel some workflows."
406-
.to_string()
407-
}
408-
409365
#[cfg(test)]
410366
mod tests {
411367
use octocrab::params::checks::{CheckRunConclusion, CheckRunStatus};
@@ -1386,6 +1342,7 @@ merge_queue_enabled = true
13861342
Commit pr-1-sha has been unapproved.
13871343
13881344
Auto build cancelled due to unapproval. Cancelled workflows:
1345+
13891346
- https://github.com/rust-lang/borstest/actions/runs/1
13901347
");
13911348
Ok(())
@@ -1415,7 +1372,7 @@ merge_queue_enabled = true
14151372
insta::assert_snapshot!(tester.get_comment().await?, @r"
14161373
Commit pr-1-sha has been unapproved.
14171374
1418-
Auto build was cancelled due to unapproval. It was not possible to cancel some workflows.
1375+
Auto build cancelled due to unapproval. It was not possible to cancel some workflows.
14191376
");
14201377
Ok(())
14211378
})

0 commit comments

Comments
 (0)