Skip to content

Commit 106137a

Browse files
authored
Merge pull request #5504 from gchq/5425-add-new-shut-down-ecs-cluster-lambda-into-the-cdk
Issue 5425 - Add new shut down ECS cluster Lambda into the CDK
2 parents 1fef1e6 + 14e19ff commit 106137a

File tree

5 files changed

+124
-14
lines changed

5 files changed

+124
-14
lines changed

java/cdk-custom-resources/src/main/java/sleeper/cdk/custom/AutoStopEcsClusterTasksLambda.java

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import sleeper.core.util.ThreadSleep;
2525

2626
import java.util.Map;
27+
import java.util.function.Consumer;
2728

2829
import static sleeper.core.util.RateLimitUtils.sleepForSustainedRatePerSecond;
2930

@@ -62,24 +63,31 @@ public void handleEvent(
6263
case "Update":
6364
break;
6465
case "Delete":
65-
stopTasks(clusterName);
66+
stopTasks(ecsClient, clusterName, maxResults, sleep);
6667
break;
6768
default:
6869
throw new IllegalArgumentException("Invalid request type: " + event.getRequestType());
6970
}
7071
}
7172

72-
private void stopTasks(String clusterName) {
73-
ecsClient.listTasksPaginator(builder -> builder.cluster(clusterName).maxResults(maxResults))
74-
.taskArns()
73+
private static void stopTasks(EcsClient ecs, String clusterName, int maxResults, ThreadSleep sleep) {
74+
LOGGER.info("Stopping tasks for ECS cluster {}", clusterName);
75+
forEachTaskArn(ecs, clusterName, maxResults, taskArn -> {
76+
// Rate limit for ECS StopTask is 100 burst, 40 sustained:
77+
// https://docs.aws.amazon.com/AmazonECS/latest/APIReference/request-throttling.html
78+
sleepForSustainedRatePerSecond(30, sleep);
79+
ecs.stopTask(builder -> builder.cluster(clusterName).task(taskArn)
80+
.reason("Cleaning up before cdk destroy"));
81+
LOGGER.info("Stopped task {} in ECS cluster {}", taskArn, clusterName);
82+
});
83+
}
84+
85+
private static void forEachTaskArn(EcsClient ecs, String clusterName, int maxResults, Consumer<String> consumer) {
86+
ecs.listTasksPaginator(builder -> builder.cluster(clusterName).maxResults(maxResults))
7587
.stream()
76-
.forEach(
77-
task -> {
78-
LOGGER.info("Stopping task {} in cluster {} ", task, clusterName);
79-
// Rate limit for ECS StopTask is 100 burst, 40 sustained:
80-
// https://docs.aws.amazon.com/AmazonECS/latest/APIReference/request-throttling.html
81-
sleepForSustainedRatePerSecond(30, sleep);
82-
ecsClient.stopTask(builder -> builder.cluster(clusterName).task(task));
83-
});
88+
.peek(response -> LOGGER.info("Found {} tasks", response.taskArns().size()))
89+
.flatMap(response -> response.taskArns().stream())
90+
.forEach(consumer);
8491
}
92+
8593
}

java/cdk/src/main/java/sleeper/cdk/stack/core/LoggingStack.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,8 @@ public enum LogGroupRef {
9191
BULK_IMPORT_AUTODELETE("bulk-import-autodelete"),
9292
BULK_IMPORT_AUTODELETE_PROVIDER("bulk-import-autodelete-provider"),
9393
INGEST_TASKS("IngestTasks"),
94+
INGEST_TASKS_AUTOSTOP("ingest-tasks-autostop"),
95+
INGEST_TASKS_AUTOSTOP_PROVIDER("ingest-tasks-autostop-provider"),
9496
INGEST_CREATE_TASKS("ingest-create-tasks"),
9597
INGEST_BATCHER_SUBMIT_FILES("ingest-batcher-submit-files"),
9698
INGEST_BATCHER_CREATE_JOBS("ingest-batcher-create-jobs"),

java/cdk/src/main/java/sleeper/cdk/stack/ingest/IngestStack.java

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949
import sleeper.cdk.jars.LambdaCode;
5050
import sleeper.cdk.stack.core.CoreStacks;
5151
import sleeper.cdk.stack.core.LoggingStack.LogGroupRef;
52+
import sleeper.cdk.util.AutoStopEcsClusterTasks;
5253
import sleeper.cdk.util.Utils;
5354
import sleeper.core.deploy.DockerDeployment;
5455
import sleeper.core.deploy.LambdaHandler;
@@ -109,6 +110,7 @@ public IngestStack(
109110
// - A lambda that periodically checks the number of running ingest tasks
110111
// and if there are not enough (i.e. there is a backlog on the queue
111112
// then it creates more tasks).
113+
// - A lambda that stops task when a delete cluster event is triggered.
112114

113115
IBucket jarsBucket = Bucket.fromBucketName(this, "JarsBucket", jars.bucketName());
114116
LambdaCode lambdaCode = jars.lambdaCode(jarsBucket);
@@ -117,7 +119,7 @@ public IngestStack(
117119
sqsQueueForIngestJobs(coreStacks, topic, errorMetrics);
118120

119121
// ECS cluster for ingest tasks
120-
ecsClusterForIngestTasks(jarsBucket, coreStacks, ingestJobQueue);
122+
ecsClusterForIngestTasks(jarsBucket, coreStacks, ingestJobQueue, lambdaCode);
121123

122124
// Lambda to create ingest tasks
123125
lambdaToCreateIngestTasks(coreStacks, ingestJobQueue, lambdaCode);
@@ -181,7 +183,9 @@ private Queue sqsQueueForIngestJobs(CoreStacks coreStacks, Topic topic, List<IMe
181183
private Cluster ecsClusterForIngestTasks(
182184
IBucket jarsBucket,
183185
CoreStacks coreStacks,
184-
Queue ingestJobQueue) {
186+
Queue ingestJobQueue,
187+
LambdaCode lambdaCode) {
188+
185189
VpcLookupOptions vpcLookupOptions = VpcLookupOptions.builder()
186190
.vpcId(instanceProperties.get(VPC_ID))
187191
.build();
@@ -236,6 +240,11 @@ private Cluster ecsClusterForIngestTasks(
236240
.build();
237241
new CfnOutput(this, INGEST_CONTAINER_ROLE_ARN, ingestRoleARNProps);
238242

243+
AutoStopEcsClusterTasks.autoStopTasksOnEcsCluster(this, instanceProperties, lambdaCode,
244+
cluster, clusterName,
245+
coreStacks.getLogGroup(LogGroupRef.INGEST_TASKS_AUTOSTOP),
246+
coreStacks.getLogGroup(LogGroupRef.INGEST_TASKS_AUTOSTOP_PROVIDER));
247+
239248
return cluster;
240249
}
241250

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
/*
2+
* Copyright 2022-2025 Crown Copyright
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package sleeper.cdk.util;
17+
18+
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
19+
import software.amazon.awscdk.CustomResource;
20+
import software.amazon.awscdk.Duration;
21+
import software.amazon.awscdk.customresources.Provider;
22+
import software.amazon.awscdk.services.ecs.ICluster;
23+
import software.amazon.awscdk.services.iam.IRole;
24+
import software.amazon.awscdk.services.iam.ManagedPolicy;
25+
import software.amazon.awscdk.services.iam.PolicyStatement;
26+
import software.amazon.awscdk.services.lambda.IFunction;
27+
import software.amazon.awscdk.services.logs.ILogGroup;
28+
import software.constructs.Construct;
29+
30+
import sleeper.cdk.jars.LambdaCode;
31+
import sleeper.core.deploy.LambdaHandler;
32+
import sleeper.core.properties.instance.InstanceProperties;
33+
import sleeper.core.util.EnvironmentUtils;
34+
35+
import java.util.List;
36+
import java.util.Map;
37+
import java.util.Objects;
38+
39+
@SuppressFBWarnings("NP_NULL_ON_SOME_PATH_FROM_RETURN_VALUE")
40+
public class AutoStopEcsClusterTasks {
41+
42+
private AutoStopEcsClusterTasks() {
43+
}
44+
45+
public static void autoStopTasksOnEcsCluster(
46+
Construct scope, InstanceProperties instanceProperties, LambdaCode lambdaCode,
47+
ICluster cluster, String clusterName,
48+
ILogGroup logGroup,
49+
ILogGroup providerLogGroup) {
50+
51+
String id = cluster.getNode().getId() + "-AutoStop";
52+
String functionName = clusterName + "-autostop";
53+
54+
IFunction lambda = lambdaCode.buildFunction(scope, LambdaHandler.AUTO_STOP_ECS_CLUSTER_TASKS, id + "Lambda", builder -> builder
55+
.functionName(functionName)
56+
.memorySize(2048)
57+
.environment(EnvironmentUtils.createDefaultEnvironmentNoConfigBucket(instanceProperties))
58+
.description("Lambda for auto-stopping ECS tasks")
59+
.logGroup(logGroup)
60+
.timeout(Duration.minutes(10)));
61+
62+
// Grant this function permission to list tasks and stop tasks
63+
PolicyStatement policyStatement = PolicyStatement.Builder
64+
.create()
65+
.resources(List.of("*"))
66+
.actions(List.of("ecs:ListTasks", "ecs:StopTask", "iam:PassRole"))
67+
.build();
68+
IRole role = Objects.requireNonNull(lambda.getRole());
69+
role.addToPrincipalPolicy(policyStatement);
70+
role.addManagedPolicy(ManagedPolicy.fromAwsManagedPolicyName("service-role/AmazonECSTaskExecutionRolePolicy"));
71+
72+
Provider propertiesWriterProvider = Provider.Builder.create(scope, id + "Provider")
73+
.onEventHandler(lambda)
74+
.logGroup(providerLogGroup)
75+
.build();
76+
77+
CustomResource customResource = CustomResource.Builder.create(scope, id)
78+
.resourceType("Custom::AutoStopEcsClusterTasks")
79+
.properties(Map.of("cluster", clusterName))
80+
.serviceToken(propertiesWriterProvider.getServiceToken())
81+
.build();
82+
83+
customResource.getNode().addDependency(cluster);
84+
85+
}
86+
87+
}

java/core/src/main/java/sleeper/core/deploy/LambdaHandler.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,10 @@ public class LambdaHandler {
126126
.jar(LambdaJar.CUSTOM_RESOURCES)
127127
.handler("sleeper.cdk.custom.AutoDeleteS3ObjectsLambda::handleEvent")
128128
.core().add();
129+
public static final LambdaHandler AUTO_STOP_ECS_CLUSTER_TASKS = builder()
130+
.jar(LambdaJar.CUSTOM_RESOURCES)
131+
.handler("sleeper.cdk.custom.AutoStopEcsClusterTasksLambda::handleEvent")
132+
.core().add();
129133
public static final LambdaHandler PROPERTIES_WRITER = builder()
130134
.jar(LambdaJar.CUSTOM_RESOURCES)
131135
.handler("sleeper.cdk.custom.PropertiesWriterLambda::handleEvent")

0 commit comments

Comments
 (0)