Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HADOOP-18656: [ABFS] Adding Support for Paginated Delete for Large Directories in HNS Account #6409

Merged
merged 16 commits into from
Apr 4, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,10 @@ public class AbfsConfiguration{
FS_AZURE_ABFS_RENAME_RESILIENCE, DefaultValue = DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE)
private boolean renameResilience;

@BooleanConfigurationValidatorAnnotation(ConfigurationKey =
FS_AZURE_ENABLE_PAGINATED_DELETE, DefaultValue = DEFAULT_ENABLE_PAGINATED_DELETE)
private boolean isPaginatedDeleteEnabled;

private String clientProvidedEncryptionKey;

private String clientProvidedEncryptionKeySHA;
Expand Down Expand Up @@ -1205,7 +1209,7 @@ public boolean getRenameResilience() {
return renameResilience;
}

void setRenameResilience(boolean actualResilience) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

any reason to cut this? I presume it means no tests are using it...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes this code was not used any where so removed it

renameResilience = actualResilience;
public boolean isPaginatedDeleteEnabled() {
return isPaginatedDeleteEnabled;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,34 @@ public final class AbfsHttpConstants {
public static final char CHAR_EQUALS = '=';
public static final char CHAR_STAR = '*';
public static final char CHAR_PLUS = '+';
public static final String DECEMBER_2019_API_VERSION = "2019-12-12";
steveloughran marked this conversation as resolved.
Show resolved Hide resolved
public static final String APRIL_2021_API_VERSION = "2021-04-10";

/**
* Specifies the version of the REST protocol used for processing the request.
* Versions should be added in enum list in ascending chronological order.
* Latest one should be added last in the list.
* When upgrading the version for whole driver, update the getCurrentVersion;
*/
public enum ApiVersion {

DEC_12_2019("2019-12-12"),
APR_10_2021("2021-04-10"),
AUG_03_2023("2023-08-03");

private final String xMsApiVersion;

ApiVersion(String xMsApiVersion) {
this.xMsApiVersion = xMsApiVersion;
}

@Override
public String toString() {
return xMsApiVersion;
}

public static ApiVersion getCurrentVersion() {
return DEC_12_2019;
steveloughran marked this conversation as resolved.
Show resolved Hide resolved
}
}

/**
* Value that differentiates categories of the http_status.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,11 @@ public final class ConfigurationKeys {
/** Add extra resilience to rename failures, at the expense of performance. */
public static final String FS_AZURE_ABFS_RENAME_RESILIENCE = "fs.azure.enable.rename.resilience";

/**
* Specify whether paginated behavior is to be expected or not in delete path.
*/
public static final String FS_AZURE_ENABLE_PAGINATED_DELETE = "fs.azure.enable.paginated.delete";

public static String accountProperty(String property, String account) {
return property + "." + account;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ public final class FileSystemConfigurations {
public static final int STREAM_ID_LEN = 12;
public static final boolean DEFAULT_ENABLE_ABFS_LIST_ITERATOR = true;
public static final boolean DEFAULT_ENABLE_ABFS_RENAME_RESILIENCE = true;
public static final boolean DEFAULT_ENABLE_PAGINATED_DELETE = false;

/**
* Limit of queued block upload operations before writes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ public final class HttpQueryParams {
public static final String QUERY_PARAM_CLOSE = "close";
public static final String QUERY_PARAM_UPN = "upn";
public static final String QUERY_PARAM_BLOBTYPE = "blobtype";
public static final String QUERY_PARAM_PAGINATED = "paginated";

//query params for SAS
public static final String QUERY_PARAM_SAOID = "saoid";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ public class AbfsClient implements Closeable {

private final URL baseUrl;
private final SharedKeyCredentials sharedKeyCredentials;
private String xMsVersion = DECEMBER_2019_API_VERSION;
private ApiVersion xMsVersion = ApiVersion.getCurrentVersion();
private final ExponentialRetryPolicy retryPolicy;
private final String filesystem;
private final AbfsConfiguration abfsConfiguration;
Expand Down Expand Up @@ -139,7 +139,7 @@ private AbfsClient(final URL baseUrl,

if (encryptionContextProvider != null) {
this.encryptionContextProvider = encryptionContextProvider;
xMsVersion = APRIL_2021_API_VERSION; // will be default once server change deployed
xMsVersion = ApiVersion.APR_10_2021; // will be default once server change deployed
encryptionType = EncryptionType.ENCRYPTION_CONTEXT;
} else if (abfsConfiguration.getEncodedClientProvidedEncryptionKey() != null) {
clientProvidedEncryptionKey =
Expand Down Expand Up @@ -233,9 +233,9 @@ AbfsThrottlingIntercept getIntercept() {
return intercept;
}

List<AbfsHttpHeader> createDefaultHeaders() {
List<AbfsHttpHeader> createDefaultHeaders(ApiVersion xMsVersion) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Can you add a Javadoc here?
  2. as most invocations use the xMsVersion value, why not retain the original signature as an overloaded invocation -avoids having to change so much of the code.
  3. is this package private just for testing? if so, let's mark it as @VisibleForTesting

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taken all

final List<AbfsHttpHeader> requestHeaders = new ArrayList<AbfsHttpHeader>();
requestHeaders.add(new AbfsHttpHeader(X_MS_VERSION, xMsVersion));
requestHeaders.add(new AbfsHttpHeader(X_MS_VERSION, xMsVersion.toString()));
requestHeaders.add(new AbfsHttpHeader(ACCEPT, APPLICATION_JSON
+ COMMA + SINGLE_WHITE_SPACE + APPLICATION_OCTET_STREAM));
requestHeaders.add(new AbfsHttpHeader(ACCEPT_CHARSET,
Expand Down Expand Up @@ -305,7 +305,7 @@ AbfsUriQueryBuilder createDefaultUriQueryBuilder() {

public AbfsRestOperation createFilesystem(TracingContext tracingContext)
throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);

final AbfsUriQueryBuilder abfsUriQueryBuilder = new AbfsUriQueryBuilder();
abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM);
Expand All @@ -320,7 +320,7 @@ public AbfsRestOperation createFilesystem(TracingContext tracingContext)

public AbfsRestOperation setFilesystemProperties(final String properties,
TracingContext tracingContext) throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);
// JDK7 does not support PATCH, so to work around the issue we will use
// PUT and specify the real method in the X-Http-Method-Override header.
requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE,
Expand All @@ -345,7 +345,7 @@ public AbfsRestOperation setFilesystemProperties(final String properties,
public AbfsRestOperation listPath(final String relativePath, final boolean recursive, final int listMaxResults,
final String continuation, TracingContext tracingContext)
throws IOException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);

final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder();
abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM);
Expand All @@ -367,7 +367,7 @@ public AbfsRestOperation listPath(final String relativePath, final boolean recur
}

public AbfsRestOperation getFilesystemProperties(TracingContext tracingContext) throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);

final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder();
abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM);
Expand All @@ -383,7 +383,7 @@ public AbfsRestOperation getFilesystemProperties(TracingContext tracingContext)
}

public AbfsRestOperation deleteFilesystem(TracingContext tracingContext) throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);

final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder();
abfsUriQueryBuilder.addQuery(QUERY_PARAM_RESOURCE, FILESYSTEM);
Expand Down Expand Up @@ -434,7 +434,7 @@ public AbfsRestOperation createPath(final String path,
final ContextEncryptionAdapter contextEncryptionAdapter,
final TracingContext tracingContext)
throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);
if (isFile) {
addEncryptionKeyRequestHeaders(path, requestHeaders, true,
contextEncryptionAdapter, tracingContext);
Expand Down Expand Up @@ -495,7 +495,7 @@ public AbfsRestOperation createPath(final String path,
}

public AbfsRestOperation acquireLease(final String path, int duration, TracingContext tracingContext) throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);

requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, ACQUIRE_LEASE_ACTION));
requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_DURATION, Integer.toString(duration)));
Expand All @@ -515,7 +515,7 @@ public AbfsRestOperation acquireLease(final String path, int duration, TracingCo

public AbfsRestOperation renewLease(final String path, final String leaseId,
TracingContext tracingContext) throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);

requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, RENEW_LEASE_ACTION));
requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, leaseId));
Expand All @@ -534,7 +534,7 @@ public AbfsRestOperation renewLease(final String path, final String leaseId,

public AbfsRestOperation releaseLease(final String path,
final String leaseId, TracingContext tracingContext) throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);

requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, RELEASE_LEASE_ACTION));
requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ID, leaseId));
Expand All @@ -553,7 +553,7 @@ public AbfsRestOperation releaseLease(final String path,

public AbfsRestOperation breakLease(final String path,
TracingContext tracingContext) throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);

requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_ACTION, BREAK_LEASE_ACTION));
requestHeaders.add(new AbfsHttpHeader(X_MS_LEASE_BREAK_PERIOD, DEFAULT_LEASE_BREAK_PERIOD));
Expand Down Expand Up @@ -601,7 +601,7 @@ public AbfsClientRenameResult renamePath(
boolean isMetadataIncompleteState,
boolean isNamespaceEnabled)
throws IOException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);

final boolean hasEtag = !isEmpty(sourceEtag);

Expand Down Expand Up @@ -797,7 +797,7 @@ public AbfsRestOperation append(final String path, final byte[] buffer,
AppendRequestParameters reqParams, final String cachedSasToken,
ContextEncryptionAdapter contextEncryptionAdapter, TracingContext tracingContext)
throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);
addEncryptionKeyRequestHeaders(path, requestHeaders, false,
contextEncryptionAdapter, tracingContext);
if (reqParams.isExpectHeaderEnabled()) {
Expand Down Expand Up @@ -929,7 +929,7 @@ public AbfsRestOperation flush(final String path, final long position,
final String cachedSasToken, final String leaseId,
ContextEncryptionAdapter contextEncryptionAdapter, TracingContext tracingContext)
throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);
addEncryptionKeyRequestHeaders(path, requestHeaders, false,
contextEncryptionAdapter, tracingContext);
// JDK7 does not support PATCH, so to workaround the issue we will use
Expand Down Expand Up @@ -962,7 +962,7 @@ public AbfsRestOperation flush(final String path, final long position,
public AbfsRestOperation setPathProperties(final String path, final String properties,
final TracingContext tracingContext, final ContextEncryptionAdapter contextEncryptionAdapter)
throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);
addEncryptionKeyRequestHeaders(path, requestHeaders, false,
contextEncryptionAdapter, tracingContext);
// JDK7 does not support PATCH, so to workaround the issue we will use
Expand Down Expand Up @@ -990,7 +990,7 @@ public AbfsRestOperation getPathStatus(final String path,
final boolean includeProperties, final TracingContext tracingContext,
final ContextEncryptionAdapter contextEncryptionAdapter)
throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);

final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder();
String operation = SASTokenProvider.GET_PROPERTIES_OPERATION;
Expand Down Expand Up @@ -1027,7 +1027,7 @@ public AbfsRestOperation read(final String path,
String cachedSasToken,
ContextEncryptionAdapter contextEncryptionAdapter,
TracingContext tracingContext) throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);
addEncryptionKeyRequestHeaders(path, requestHeaders, false,
contextEncryptionAdapter, tracingContext);
requestHeaders.add(new AbfsHttpHeader(RANGE,
Expand All @@ -1053,12 +1053,22 @@ public AbfsRestOperation read(final String path,
return op;
}

public AbfsRestOperation deletePath(final String path, final boolean recursive, final String continuation,
public AbfsRestOperation deletePath(final String path, final boolean recursive,
final String continuation,
TracingContext tracingContext)
throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();

final List<AbfsHttpHeader> requestHeaders
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a comment explaining what is happening, and move the = sign to this line, leaving

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a comment

= (isPaginatedDelete(tracingContext, recursive)
&& xMsVersion.compareTo(ApiVersion.AUG_03_2023) < 0)
? createDefaultHeaders(ApiVersion.AUG_03_2023)
: createDefaultHeaders(xMsVersion);
final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder();

if (isPaginatedDelete(tracingContext, recursive)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what if this is a paginated delete but the L1126 api version test doesn't hold? Is that ok?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is a paginated delete, then API version change condition will only fail if current version is greater than AUG_03_2023.

In that case we can go ahead with Current API Version only as Azure APIs are backward compatible,

// Add paginated query parameter
abfsUriQueryBuilder.addQuery(QUERY_PARAM_PAGINATED, TRUE);
}

abfsUriQueryBuilder.addQuery(QUERY_PARAM_RECURSIVE, String.valueOf(recursive));
abfsUriQueryBuilder.addQuery(QUERY_PARAM_CONTINUATION, continuation);
String operation = recursive ? SASTokenProvider.DELETE_RECURSIVE_OPERATION : SASTokenProvider.DELETE_OPERATION;
Expand Down Expand Up @@ -1130,7 +1140,7 @@ public AbfsRestOperation deleteIdempotencyCheckOp(final AbfsRestOperation op) {
public AbfsRestOperation setOwner(final String path, final String owner, final String group,
TracingContext tracingContext)
throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);
// JDK7 does not support PATCH, so to workaround the issue we will use
// PUT and specify the real method in the X-Http-Method-Override header.
requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE,
Expand Down Expand Up @@ -1160,7 +1170,7 @@ public AbfsRestOperation setOwner(final String path, final String owner, final S
public AbfsRestOperation setPermission(final String path, final String permission,
TracingContext tracingContext)
throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);
// JDK7 does not support PATCH, so to workaround the issue we will use
// PUT and specify the real method in the X-Http-Method-Override header.
requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE,
Expand Down Expand Up @@ -1190,7 +1200,7 @@ public AbfsRestOperation setAcl(final String path, final String aclSpecString,
public AbfsRestOperation setAcl(final String path, final String aclSpecString, final String eTag,
TracingContext tracingContext)
throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);
// JDK7 does not support PATCH, so to workaround the issue we will use
// PUT and specify the real method in the X-Http-Method-Override header.
requestHeaders.add(new AbfsHttpHeader(X_HTTP_METHOD_OVERRIDE,
Expand Down Expand Up @@ -1223,7 +1233,7 @@ public AbfsRestOperation getAclStatus(final String path, TracingContext tracingC

public AbfsRestOperation getAclStatus(final String path, final boolean useUPN,
TracingContext tracingContext) throws AzureBlobFileSystemException {
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders();
final List<AbfsHttpHeader> requestHeaders = createDefaultHeaders(xMsVersion);

final AbfsUriQueryBuilder abfsUriQueryBuilder = createDefaultUriQueryBuilder();
abfsUriQueryBuilder.addQuery(HttpQueryParams.QUERY_PARAM_ACTION, AbfsHttpConstants.GET_ACCESS_CONTROL);
Expand Down Expand Up @@ -1261,7 +1271,7 @@ public AbfsRestOperation checkAccess(String path, String rwx, TracingContext tra
AbfsRestOperationType.CheckAccess,
AbfsHttpConstants.HTTP_METHOD_HEAD,
url,
createDefaultHeaders());
createDefaultHeaders(xMsVersion));
op.execute(tracingContext);
return op;
}
Expand Down Expand Up @@ -1401,6 +1411,16 @@ private synchronized Boolean getIsNamespaceEnabled(TracingContext tracingContext
return isNamespaceEnabled;
}

protected Boolean getIsPaginatedDeleteEnabled() {
return abfsConfiguration.isPaginatedDeleteEnabled();
}

private Boolean isPaginatedDelete(TracingContext tracingContext,
boolean isRecursiveDelete) throws AzureBlobFileSystemException {
return getIsPaginatedDeleteEnabled()
&& getIsNamespaceEnabled(tracingContext) && isRecursiveDelete;
}

public AuthType getAuthType() {
return authType;
}
Expand Down Expand Up @@ -1501,7 +1521,7 @@ protected AbfsCounters getAbfsCounters() {
return abfsCounters;
}

public String getxMsVersion() {
public ApiVersion getxMsVersion() {
return xMsVersion;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

package org.apache.hadoop.fs.azurebfs.services;

import java.util.List;

import org.apache.hadoop.fs.azurebfs.extensions.EncryptionContextProvider;

public final class AbfsClientUtils {
Expand All @@ -31,4 +33,13 @@ public static void setIsNamespaceEnabled(final AbfsClient abfsClient, final Bool
public static void setEncryptionContextProvider(final AbfsClient abfsClient, final EncryptionContextProvider provider) {
abfsClient.setEncryptionContextProvider(provider);
}

public static String getHeaderValue(List<AbfsHttpHeader> reqHeaders, String headerName) {
for (AbfsHttpHeader header : reqHeaders) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you could probably do something involving java 8 streaming/filtering here if you wanted to...

if (header.getName().equals(headerName)) {
return header.getValue();
}
}
return "";
}
}
Loading