Skip to content

Commit

Permalink
HADOOP-19045. S3A: Validate CreateSession Timeout Propagation (#6470)
Browse files Browse the repository at this point in the history

New test ITestCreateSessionTimeout to verify that the duration set
in fs.s3a.connection.request.timeout is passed all the way down.

This is done by adding a sleep() in a custom signer and verifying
that it is interrupted and that an AWSApiCallTimeoutException is
raised.

+ Fix testRequestTimeout()
* doesn't skip if considered cross-region
* sets a minimum duration of 0 before invocation
* resets the minimum afterwards

Contributed by Steve Loughran
  • Loading branch information
steveloughran committed Jan 30, 2024
1 parent 5657c36 commit f2cc365
Show file tree
Hide file tree
Showing 6 changed files with 262 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -337,16 +337,33 @@ private Constants() {
public static final int DEFAULT_SOCKET_TIMEOUT = (int)DEFAULT_SOCKET_TIMEOUT_DURATION.toMillis();

/**
* Time until a request is timed-out: {@value}.
* If zero, there is no timeout.
* How long should the SDK retry/wait on a response from an S3 store: {@value}
* <i>including the time needed to sign the request</i>.
* <p>
* This is time to response, so for a GET request it is "time to 200 response"
* not the time limit to download the requested data.
* This makes it different from {@link #REQUEST_TIMEOUT}, which is for total
* HTTP request.
* <p>
* Default unit is milliseconds.
* <p>
* There is a minimum duration set in {@link #MINIMUM_NETWORK_OPERATION_DURATION};
* it is impossible to set a delay less than this, even for testing.
* Why so? Too many deployments where the configuration assumed the timeout was in seconds
* and that "120" was a reasonable value rather than "too short to work reliably"
* <p>
* Note for anyone writing tests which need to set a low value for this:
* to avoid the minimum duration overrides, call
* {@code AWSClientConfig.setMinimumOperationDuration()} and set a low value
* before creating the filesystem.
*/
public static final String REQUEST_TIMEOUT =
"fs.s3a.connection.request.timeout";

/**
* Default duration of a request before it is timed out: Zero.
* Default duration of a request before it is timed out: 60s.
*/
public static final Duration DEFAULT_REQUEST_TIMEOUT_DURATION = Duration.ZERO;
public static final Duration DEFAULT_REQUEST_TIMEOUT_DURATION = Duration.ofSeconds(60);

/**
* Default duration of a request before it is timed out: Zero.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
* fs.s3a.http.signer.class = org.apache.hadoop.fs.s3a.auth.CustomHttpSigner
* </pre>
*/
public final class CustomHttpSigner implements HttpSigner<AwsCredentialsIdentity> {
public class CustomHttpSigner implements HttpSigner<AwsCredentialsIdentity> {
private static final Logger LOG = LoggerFactory
.getLogger(CustomHttpSigner.class);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,7 @@ static ClientSettings createApiConnectionSettings(Configuration conf) {

/**
* Build the HTTP connection settings object from the configuration.
* All settings are calculated, including the api call timeout.
* All settings are calculated.
* @param conf configuration to evaluate
* @return connection settings.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.net.ConnectException;
import java.net.URI;
import java.security.PrivilegedExceptionAction;
import java.time.Duration;

import org.assertj.core.api.Assertions;
import org.junit.Rule;
Expand All @@ -49,6 +50,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.apache.hadoop.fs.s3a.auth.STSClientFactory;
import org.apache.hadoop.fs.s3a.impl.AWSClientConfig;
import org.apache.hadoop.fs.s3native.S3xLoginHelper;
import org.apache.hadoop.security.ProviderUtils;
import org.apache.hadoop.security.UserGroupInformation;
Expand Down Expand Up @@ -435,16 +437,22 @@ public void testCustomUserAgent() throws Exception {
@Test
public void testRequestTimeout() throws Exception {
conf = new Configuration();
skipIfCrossRegionClient(conf);
conf.set(REQUEST_TIMEOUT, "120");
fs = S3ATestUtils.createTestFileSystem(conf);
S3Client s3 = getS3Client("Request timeout (ms)");
SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class,
"clientConfiguration");
assertEquals("Configured " + REQUEST_TIMEOUT +
" is different than what AWS sdk configuration uses internally",
120000,
clientConfiguration.option(SdkClientOption.API_CALL_ATTEMPT_TIMEOUT).toMillis());
// remove the safety check on minimum durations.
AWSClientConfig.setMinimumOperationDuration(Duration.ZERO);
try {
Duration timeout = Duration.ofSeconds(120);
conf.set(REQUEST_TIMEOUT, timeout.getSeconds() + "s");
fs = S3ATestUtils.createTestFileSystem(conf);
S3Client s3 = getS3Client("Request timeout (ms)");
SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class,
"clientConfiguration");
Assertions.assertThat(clientConfiguration.option(SdkClientOption.API_CALL_ATTEMPT_TIMEOUT))
.describedAs("Configured " + REQUEST_TIMEOUT +
" is different than what AWS sdk configuration uses internally")
.isEqualTo(timeout);
} finally {
AWSClientConfig.resetMinimumOperationDuration();
}
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,16 @@ public static void skipIfS3ExpressBucket(
!isS3ExpressTestBucket(configuration));
}

/**
* Skip a test if the test bucket is not an S3Express bucket.
* @param configuration configuration to probe
*/
public static void skipIfNotS3ExpressBucket(
Configuration configuration) {
assume("Skipping test as bucket is not an S3Express bucket",
isS3ExpressTestBucket(configuration));
}

/**
* Is the test bucket an S3Express bucket?
* @param conf configuration
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.s3a.performance;

import java.time.Duration;
import java.util.Arrays;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;

import org.assertj.core.api.Assertions;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.http.SdkHttpRequest;
import software.amazon.awssdk.http.auth.spi.signer.AsyncSignRequest;
import software.amazon.awssdk.http.auth.spi.signer.AsyncSignedRequest;
import software.amazon.awssdk.http.auth.spi.signer.HttpSigner;
import software.amazon.awssdk.http.auth.spi.signer.SignRequest;
import software.amazon.awssdk.http.auth.spi.signer.SignedRequest;
import software.amazon.awssdk.identity.spi.AwsCredentialsIdentity;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.AWSApiCallTimeoutException;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.fs.s3a.auth.CustomHttpSigner;
import org.apache.hadoop.fs.s3a.impl.AWSClientConfig;
import org.apache.hadoop.util.DurationInfo;

import static org.apache.hadoop.fs.s3a.Constants.CUSTOM_SIGNERS;
import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_CLASS_NAME;
import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED;
import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT;
import static org.apache.hadoop.fs.s3a.Constants.RETRY_LIMIT;
import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE;
import static org.apache.hadoop.fs.s3a.Constants.S3EXPRESS_CREATE_SESSION;
import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM_S3;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.skipIfNotS3ExpressBucket;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;

/**
* Test timeout of S3 Client CreateSession call, which was originally
* hard coded to 10 seconds.
* Only executed against an S3Express store.
*/
public class ITestCreateSessionTimeout extends AbstractS3ACostTest {

private static final Logger LOG =
LoggerFactory.getLogger(ITestCreateSessionTimeout.class);

/**
* What is the duration for the operation after which the test is considered
* to have failed because timeouts didn't get passed down?
*/
private static final long TIMEOUT_EXCEPTION_THRESHOLD = Duration.ofSeconds(5).toMillis();

/**
* How long to sleep in requests?
*/
private static final AtomicLong SLEEP_DURATION = new AtomicLong(
Duration.ofSeconds(20).toMillis());

/**
* Flag set if the sleep was interrupted during signing.
*/
private static final AtomicBoolean SLEEP_INTERRUPTED = new AtomicBoolean(false);

/**
* Create a configuration with a 10 millisecond timeout on API calls
* and a custom signer which sleeps much longer than that.
* @return the configuration.
*/
@Override
public Configuration createConfiguration() {
final Configuration conf = super.createConfiguration();
skipIfNotS3ExpressBucket(conf);
disableFilesystemCaching(conf);
removeBaseAndBucketOverrides(conf,
CUSTOM_SIGNERS,
HTTP_SIGNER_ENABLED,
REQUEST_TIMEOUT,
RETRY_LIMIT,
S3A_BUCKET_PROBE,
S3EXPRESS_CREATE_SESSION,
SIGNING_ALGORITHM_S3
);

conf.setBoolean(HTTP_SIGNER_ENABLED, true);
conf.setClass(HTTP_SIGNER_CLASS_NAME, SlowSigner.class, HttpSigner.class);
Duration duration = Duration.ofMillis(10);

conf.setLong(REQUEST_TIMEOUT, duration.toMillis());
conf.setInt(RETRY_LIMIT, 1);

return conf;
}

@Override
public void setup() throws Exception {
// remove the safety check on minimum durations.
AWSClientConfig.setMinimumOperationDuration(Duration.ZERO);
try {
super.setup();
} finally {
// restore the safety check on minimum durations.
AWSClientConfig.resetMinimumOperationDuration();
}
}

@Override
protected void deleteTestDirInTeardown() {
// no-op
}

/**
* Make this a no-op to avoid IO.
* @param path path path
*/
@Override
protected void mkdirs(Path path) {

}

@Test
public void testSlowSigningTriggersTimeout() throws Throwable {

final S3AFileSystem fs = getFileSystem();
DurationInfo call = new DurationInfo(LOG, true, "Create session");
final AWSApiCallTimeoutException thrown = intercept(AWSApiCallTimeoutException.class,
() -> fs.getFileStatus(path("testShortTimeout")));
call.finished();
LOG.info("Exception raised after {}", call, thrown);
// if the timeout took too long, fail with details and include the original
// exception
if (call.value() > TIMEOUT_EXCEPTION_THRESHOLD) {
throw new AssertionError("Duration of create session " + call.getDurationString()
+ " exceeds threshold " + TIMEOUT_EXCEPTION_THRESHOLD + " ms: " + thrown, thrown);
}
Assertions.assertThat(SLEEP_INTERRUPTED.get())
.describedAs("Sleep interrupted during signing")
.isTrue();

// now scan the inner exception stack for "createSession"
Arrays.stream(thrown.getCause().getStackTrace())
.filter(e -> e.getMethodName().equals("createSession"))
.findFirst()
.orElseThrow(() ->
new AssertionError("No createSession() in inner stack trace of", thrown));
}

/**
* Sleep for as long as {@link #SLEEP_DURATION} requires.
*/
private static void sleep() {
long sleep = SLEEP_DURATION.get();
if (sleep > 0) {
LOG.info("Sleeping for {} ms", sleep, new Exception());
try (DurationInfo d = new DurationInfo(LOG, true, "Sleep for %d ms", sleep)) {
Thread.sleep(sleep);
} catch (InterruptedException e) {
LOG.info("Interrupted", e);
SLEEP_INTERRUPTED.set(true);
Thread.currentThread().interrupt();
}
}
}

/**
* A signer which calls {@link #sleep()} before signing.
* As this signing takes place within the CreateSession Pipeline,
*/
public static class SlowSigner extends CustomHttpSigner {

@Override
public SignedRequest sign(
final SignRequest<? extends AwsCredentialsIdentity> request) {

final SdkHttpRequest httpRequest = request.request();
LOG.info("Signing request {}", httpRequest);
sleep();
return super.sign(request);
}

@Override
public CompletableFuture<AsyncSignedRequest> signAsync(
final AsyncSignRequest<? extends AwsCredentialsIdentity> request) {
sleep();
return super.signAsync(request);
}

}
}

0 comments on commit f2cc365

Please sign in to comment.