-
Notifications
You must be signed in to change notification settings - Fork 189
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix bug attaching multiple CapacityProviders in multiple EC2 services. (
#359) * Fix bug attaching multiple CapacityProviders in multiple EC2 services. When launching multiple EC2 services the current tenant-onboarding-app CloudFormation stack tries to create a ClusterCapacityAssociations CloudFormation resource for each service. However because attaching CapacityProviders to ECS Clusters is non-atomic only the first ClusterCapacityAssociations will actually succeed. This commit adds a CustomResource which on CapacityProvider create and delete will attach or detach that CapacityProvider to the existing ECSCluster. Because this attachment is a non-atomic operation (i.e. it involves first getting the existing capacityProviders for the ECS cluster before then overwriting with a new set of capacityProviders) this commit also includes using the existing Onboarding DynamoDB table as a locking mechanism to prevent against illogical overwrites between multiple stacks operating on the same ECS Cluster. Fixes #356. * Fix null pointer in cloudformation-utils Co-authored-by: PoeppingT <[email protected]> Co-authored-by: brtrvn <[email protected]>
- Loading branch information
1 parent
f0276f3
commit a1609e3
Showing
21 changed files
with
1,158 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
134 changes: 134 additions & 0 deletions
134
resources/custom-resources/attach-ecs-capacity-provider/pom.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<!-- | ||
Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
Licensed under the Apache License, Version 2.0 (the "License"). | ||
You may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
--> | ||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> | ||
<modelVersion>4.0.0</modelVersion> | ||
<parent> | ||
<groupId>com.amazon.aws.partners.saasfactory.saasboost</groupId> | ||
<artifactId>saasboost-custom-resources</artifactId> | ||
<version>1.0.0</version> | ||
</parent> | ||
<artifactId>AttachEcsCapacityProvider</artifactId> | ||
<version>1.0.0</version> | ||
<packaging>jar</packaging> | ||
<licenses> | ||
<license> | ||
<name>Apache-2.0</name> | ||
<url>http://www.apache.org/licenses/LICENSE-2.0</url> | ||
</license> | ||
</licenses> | ||
<properties> | ||
<checkstyle.maxAllowedViolations>0</checkstyle.maxAllowedViolations> | ||
</properties> | ||
|
||
<build> | ||
<finalName>${project.artifactId}</finalName> | ||
<plugins> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-compiler-plugin</artifactId> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-surefire-plugin</artifactId> | ||
</plugin> | ||
<plugin> | ||
<groupId>org.apache.maven.plugins</groupId> | ||
<artifactId>maven-assembly-plugin</artifactId> | ||
</plugin> | ||
<plugin> | ||
<groupId>pl.project13.maven</groupId> | ||
<artifactId>git-commit-id-plugin</artifactId> | ||
<version>4.0.0</version> | ||
<executions> | ||
<execution> | ||
<id>get-the-git-infos</id> | ||
<goals> | ||
<goal>revision</goal> | ||
</goals> | ||
<phase>initialize</phase> | ||
</execution> | ||
</executions> | ||
<configuration> | ||
<generateGitPropertiesFile>true</generateGitPropertiesFile> | ||
<generateGitPropertiesFilename>${project.build.outputDirectory}/git.properties</generateGitPropertiesFilename> | ||
<includeOnlyProperties> | ||
<includeOnlyProperty>^git.commit.id.describe</includeOnlyProperty> | ||
<includeOnlyProperty>^git.commit.id.describe-short</includeOnlyProperty> | ||
<includeOnlyProperty>^git.commit.time</includeOnlyProperty> | ||
<includeOnlyProperty>^git.closest.tag.name</includeOnlyProperty> | ||
</includeOnlyProperties> | ||
<commitIdGenerationMode>full</commitIdGenerationMode> | ||
<dotGitDirectory>../../.git</dotGitDirectory> | ||
<failOnNoGitDirectory>false</failOnNoGitDirectory> | ||
</configuration> | ||
</plugin> | ||
</plugins> | ||
</build> | ||
|
||
<dependencies> | ||
<dependency> | ||
<groupId>com.amazon.aws.partners.saasfactory.saasboost</groupId> | ||
<artifactId>Utils</artifactId> | ||
<version>1.0.0</version> | ||
<!-- Don't bundle our layer so we get the shared one at runtime --> | ||
<scope>provided</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>com.amazon.aws.partners.saasfactory.saasboost</groupId> | ||
<artifactId>CloudFormationUtils</artifactId> | ||
<version>1.0.0</version> | ||
<!-- Don't bundle our layer so we get the shared one at runtime --> | ||
<scope>provided</scope> | ||
</dependency> | ||
<dependency> | ||
<groupId>org.mockito</groupId> | ||
<artifactId>mockito-core</artifactId> | ||
</dependency> | ||
<dependency> | ||
<groupId>software.amazon.awssdk</groupId> | ||
<artifactId>ecs</artifactId> | ||
<version>${aws.java.sdk.version}</version> | ||
<exclusions> | ||
<exclusion> | ||
<groupId>software.amazon.awssdk</groupId> | ||
<artifactId>netty-nio-client</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>software.amazon.awssdk</groupId> | ||
<artifactId>apache-client</artifactId> | ||
</exclusion> | ||
</exclusions> | ||
</dependency> | ||
<dependency> | ||
<groupId>software.amazon.awssdk</groupId> | ||
<artifactId>dynamodb</artifactId> | ||
<version>${aws.java.sdk.version}</version> | ||
<exclusions> | ||
<exclusion> | ||
<groupId>software.amazon.awssdk</groupId> | ||
<artifactId>netty-nio-client</artifactId> | ||
</exclusion> | ||
<exclusion> | ||
<groupId>software.amazon.awssdk</groupId> | ||
<artifactId>apache-client</artifactId> | ||
</exclusion> | ||
</exclusions> | ||
</dependency> | ||
</dependencies> | ||
|
||
</project> |
154 changes: 154 additions & 0 deletions
154
...a/com/amazon/aws/partners/saasfactory/saasboost/AttachCapacityProviderRequestHandler.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
/* | ||
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"). | ||
* You may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.amazon.aws.partners.saasfactory.saasboost; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import software.amazon.awssdk.services.ecs.EcsClient; | ||
import software.amazon.awssdk.services.ecs.model.Cluster; | ||
import software.amazon.awssdk.services.ecs.model.ClusterNotFoundException; | ||
import software.amazon.awssdk.services.ecs.model.DescribeClustersRequest; | ||
import software.amazon.awssdk.services.ecs.model.PutClusterCapacityProvidersRequest; | ||
import software.amazon.awssdk.services.ecs.model.UpdateInProgressException; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.List; | ||
import java.util.concurrent.Callable; | ||
import java.util.function.Function; | ||
import java.util.stream.Collectors; | ||
|
||
public final class AttachCapacityProviderRequestHandler implements Callable<HandleResult> { | ||
|
||
private static final Logger LOGGER = LoggerFactory.getLogger(AttachCapacityProviderRequestHandler.class); | ||
|
||
private final RequestContext requestContext; | ||
private final CapacityProviderLock lock; | ||
private final EcsClient ecs; | ||
|
||
public AttachCapacityProviderRequestHandler( | ||
RequestContext requestContext, | ||
CapacityProviderLock lock, | ||
EcsClient ecs) { | ||
this.ecs = ecs; | ||
this.requestContext = requestContext; | ||
this.lock = lock; | ||
} | ||
|
||
@Override | ||
public HandleResult call() { | ||
HandleResult result = new HandleResult(); | ||
LOGGER.info(requestContext.requestType.toUpperCase()); | ||
if ("Create".equalsIgnoreCase(requestContext.requestType) | ||
|| "Update".equalsIgnoreCase(requestContext.requestType)) { | ||
LOGGER.info("Attaching capacity provider {} to ecs cluster {} for tenant {}", | ||
requestContext.capacityProvider, requestContext.ecsCluster, requestContext.tenantId); | ||
result = atomicallyUpdateCapacityProviders((capacityProviders) -> { | ||
if (!capacityProviders.contains(requestContext.capacityProvider)) { | ||
List<String> modifiedCapacityProviders = new ArrayList<String>(capacityProviders); | ||
modifiedCapacityProviders.add(requestContext.capacityProvider); | ||
return modifiedCapacityProviders; | ||
} | ||
return capacityProviders; | ||
}); | ||
} else if ("Delete".equalsIgnoreCase(requestContext.requestType)) { | ||
// unclear whether we need this.. commenting it out for testing. | ||
LOGGER.info("Detaching capacity provider {} from ecs cluster {} for tenant {}", | ||
requestContext.capacityProvider, requestContext.ecsCluster, requestContext.tenantId); | ||
result = atomicallyUpdateCapacityProviders((capacityProviders) -> { | ||
return capacityProviders.stream() | ||
.filter((capacityProvider) -> !capacityProvider.equals(requestContext.capacityProvider)) | ||
.collect(Collectors.toList()); | ||
}); | ||
result.setSucceeded(); | ||
} else { | ||
LOGGER.error("FAILED unknown requestType {}", requestContext.requestType); | ||
result.putFailureReason("Unknown RequestType " + requestContext.requestType); | ||
result.setFailed(); | ||
} | ||
|
||
return result; | ||
} | ||
|
||
private HandleResult atomicallyUpdateCapacityProviders( | ||
Function<List<String>, List<String>> capacityProvidersMutationFunction) { | ||
HandleResult result = new HandleResult(); | ||
// lock ddb | ||
lock.lock(requestContext); | ||
try { | ||
// read capacity providers into list | ||
List<String> existingCapacityProviders = getExistingCapacityProviders(); | ||
|
||
List<String> mutatedCapacityProviders = capacityProvidersMutationFunction.apply(existingCapacityProviders); | ||
LOGGER.debug("existingCapacityProviders {} mutated to {}", | ||
existingCapacityProviders, mutatedCapacityProviders); | ||
|
||
boolean successful = false; | ||
// if the mutate did nothing, no point in slowing us down to make an ECS call | ||
if (existingCapacityProviders.equals(mutatedCapacityProviders)) { | ||
successful = true; | ||
result.setSucceeded(); | ||
} | ||
while (!successful) { | ||
try { | ||
// set capacity providers. response doesn't really give us anything but a | ||
// description of the new cluster. exceptions are thrown on failure | ||
ecs.putClusterCapacityProviders(PutClusterCapacityProvidersRequest.builder() | ||
.cluster(requestContext.ecsCluster) | ||
.capacityProviders(mutatedCapacityProviders) | ||
.build()); | ||
successful = true; | ||
result.setSucceeded(); | ||
} catch (UpdateInProgressException uipe) { | ||
// There's a Amazon ECS container agent update in progress on this container instance. | ||
// ECS errors indicate this can be retried. Wait 10 seconds and try again. | ||
LOGGER.error("Received error calling putClusterCapacityProviders", uipe); | ||
LOGGER.error(Utils.getFullStackTrace(uipe)); | ||
LOGGER.error("Waiting 10 seconds before retrying.."); | ||
Thread.sleep(10 * 1000); // 10 seconds | ||
} | ||
} | ||
} catch (ClusterNotFoundException cnfe) { | ||
LOGGER.error("Could not find ecs cluster: {}", requestContext.ecsCluster); | ||
LOGGER.error(Utils.getFullStackTrace(cnfe)); | ||
result.putFailureReason(cnfe.getMessage()); | ||
result.setFailed(); | ||
} catch (InterruptedException ie) { | ||
LOGGER.error("Error while waiting between putClusterCapacityProvider calls", ie.getMessage()); | ||
LOGGER.error(Utils.getFullStackTrace(ie)); | ||
result.putFailureReason(ie.getMessage()); | ||
result.setFailed(); | ||
} finally { | ||
// unlock ddb | ||
lock.unlock(requestContext); | ||
} | ||
return result; | ||
} | ||
|
||
private List<String> getExistingCapacityProviders() { | ||
List<Cluster> returnedClusters = ecs.describeClusters( | ||
DescribeClustersRequest.builder().clusters(requestContext.ecsCluster).build()).clusters(); | ||
if (returnedClusters.size() != 1) { | ||
// we only passed one cluster ARN but we received 0 or 2 | ||
LOGGER.error("Expected 1 cluster with name {} but found {}", | ||
requestContext.ecsCluster, returnedClusters.size()); | ||
} | ||
List<String> existingCapacityProviders = returnedClusters.get(0).capacityProviders(); | ||
return existingCapacityProviders == null ? List.of() : existingCapacityProviders; | ||
} | ||
|
||
} |
76 changes: 76 additions & 0 deletions
76
...rc/main/java/com/amazon/aws/partners/saasfactory/saasboost/AttachEcsCapacityProvider.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
/* | ||
* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"). | ||
* You may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.amazon.aws.partners.saasfactory.saasboost; | ||
|
||
import com.amazonaws.services.lambda.runtime.Context; | ||
import com.amazonaws.services.lambda.runtime.RequestHandler; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import software.amazon.awssdk.services.dynamodb.DynamoDbClient; | ||
import software.amazon.awssdk.services.ecs.EcsClient; | ||
|
||
import java.util.*; | ||
import java.util.concurrent.*; | ||
|
||
public class AttachEcsCapacityProvider implements RequestHandler<Map<String, Object>, Object> { | ||
|
||
private static final Logger LOGGER = LoggerFactory.getLogger(AttachEcsCapacityProvider.class); | ||
|
||
private final EcsClient ecs; | ||
private final CapacityProviderLock lock; | ||
|
||
public AttachEcsCapacityProvider() { | ||
LOGGER.info("Version Info: {}", Utils.version(this.getClass())); | ||
ecs = Utils.sdkClient(EcsClient.builder(), EcsClient.SERVICE_NAME); | ||
lock = new CapacityProviderLock(Utils.sdkClient(DynamoDbClient.builder(), DynamoDbClient.SERVICE_NAME)); | ||
} | ||
|
||
@Override | ||
public Object handleRequest(Map<String, Object> event, Context context) { | ||
Utils.logRequestEvent(event); | ||
|
||
Map<String, Object> resourceProperties = (Map<String, Object>) event.get("ResourceProperties"); | ||
RequestContext requestContext = RequestContext.builder() | ||
.requestType((String) event.get("RequestType")) | ||
.capacityProvider((String) resourceProperties.get("CapacityProvider")) | ||
.ecsCluster((String) resourceProperties.get("ECSCluster")) | ||
.onboardingDdbTable((String) resourceProperties.get("OnboardingDdbTable")) | ||
.tenantId((String) resourceProperties.get("TenantId")) | ||
.build(); | ||
HandleResult handleRequestResult = new HandleResult(); | ||
ExecutorService service = Executors.newSingleThreadExecutor(); | ||
try { | ||
Callable<HandleResult> c = new AttachCapacityProviderRequestHandler(requestContext, lock, ecs); | ||
Future<?> f = service.submit(c); | ||
handleRequestResult = (HandleResult) f.get(context.getRemainingTimeInMillis() - 1000, | ||
TimeUnit.MILLISECONDS); | ||
} catch (final TimeoutException | InterruptedException | ExecutionException e) { | ||
// Timed out | ||
LOGGER.error("FAILED unexpected error or request timed out " + e.getMessage()); | ||
String stackTrace = Utils.getFullStackTrace(e); | ||
LOGGER.error(stackTrace); | ||
handleRequestResult.setFailed(); | ||
handleRequestResult.putResponseData("Reason", stackTrace); | ||
} finally { | ||
service.shutdown(); | ||
} | ||
CloudFormationResponse.send(event, context, | ||
handleRequestResult.succeeded() ? "SUCCESS" : "FAILED", | ||
handleRequestResult.getResponseData()); | ||
return null; | ||
} | ||
} |
Oops, something went wrong.