Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HADOOP-19072. S3A: expand optimisations on stores with "fs.s3a.performance.flags" for mkdir #6543

Merged
merged 11 commits into from
Aug 8, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,8 @@ Prioritize file creation performance over safety checks for filesystem consisten
This:
1. Skips the `LIST` call which makes sure a file is being created over a directory.
Risk: a file is created over a directory.
1. Ignores the overwrite flag.
1. Never issues a `DELETE` call to delete parent directory markers.
2. Ignores the overwrite flag.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to add numbering in md files.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't make any difference; IDEs often add them automatically. I personally prefer just 1 because its easier to reorder things, but don't care what others do

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, this was done by IDE so i thought of keeping it. earlier i updated this description but now that we have new config, i removed the description here and moved it to fs.s3a.performance.flags.

3. Never issues a `DELETE` call to delete parent directory markers.

It is possible to probe an S3A Filesystem instance for this capability through
the `hasPathCapability(path, "fs.s3a.create.performance")` check.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import static org.apache.hadoop.fs.FileContextTestHelper.*;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsDirectory;
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsFile;
import static org.apache.hadoop.test.LambdaTestUtils.intercept;

import org.apache.hadoop.test.GenericTestUtils;
import org.slf4j.event.Level;
Expand Down Expand Up @@ -55,7 +56,10 @@ public abstract class FileContextCreateMkdirBaseTest {

protected final FileContextTestHelper fileContextTestHelper;
protected static FileContext fc;


public static final String MKDIR_FILE_PRESENT_ERROR =
" should have failed as a file was present";

static {
GenericTestUtils.setLogLevel(FileSystem.LOG, Level.DEBUG);
}
Expand Down Expand Up @@ -128,7 +132,7 @@ public void testMkdirsRecursiveWithExistingDir() throws IOException {
}

@Test
public void testMkdirRecursiveWithExistingFile() throws IOException {
public void testMkdirRecursiveWithExistingFile() throws Exception {
Path f = getTestRootPath(fc, "NonExistant3/aDir");
fc.mkdir(f, FileContext.DEFAULT_PERM, true);
assertIsDirectory(fc.getFileStatus(f));
Expand All @@ -141,13 +145,12 @@ public void testMkdirRecursiveWithExistingFile() throws IOException {

// try creating another folder which conflicts with filePath
Path dirPath = new Path(filePath, "bDir/cDir");
try {
fc.mkdir(dirPath, FileContext.DEFAULT_PERM, true);
Assert.fail("Mkdir for " + dirPath
+ " should have failed as a file was present");
} catch(IOException e) {
// failed as expected
}
intercept(
IOException.class,
null,
"Mkdir for " + dirPath + MKDIR_FILE_PRESENT_ERROR,
() -> fc.mkdir(dirPath, FileContext.DEFAULT_PERM, true)
);
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
*/
public abstract class AbstractContractMkdirTest extends AbstractFSContractTestBase {

public static final String MKDIRS_NOT_FAILED_OVER_FILE =
"mkdirs did not fail over a file but returned ";

@Test
public void testMkDirRmDir() throws Throwable {
FileSystem fs = getFileSystem();
Expand Down Expand Up @@ -66,7 +69,7 @@ public void testNoMkdirOverFile() throws Throwable {
createFile(getFileSystem(), path, false, dataset);
try {
boolean made = fs.mkdirs(path);
fail("mkdirs did not fail over a file but returned " + made
fail(MKDIRS_NOT_FAILED_OVER_FILE + made
+ "; " + ls(path));
} catch (ParentNotDirectoryException | FileAlreadyExistsException e) {
//parent is a directory
Expand All @@ -93,7 +96,7 @@ public void testMkdirOverParentFile() throws Throwable {
Path child = new Path(path,"child-to-mkdir");
try {
boolean made = fs.mkdirs(child);
fail("mkdirs did not fail over a file but returned " + made
fail(MKDIRS_NOT_FAILED_OVER_FILE + made
+ "; " + ls(path));
} catch (ParentNotDirectoryException | FileAlreadyExistsException e) {
//parent is a directory
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3828,7 +3828,8 @@ public boolean mkdirs(Path p, FsPermission permission) throws IOException,
createStoreContext(),
path,
createMkdirOperationCallbacks(),
isMagicCommitPath(path)));
isMagicCommitPath(path),
performanceFlags.enabled(PerformanceFlagEnum.Mkdir)));
}

/**
Expand Down Expand Up @@ -4281,7 +4282,9 @@ public boolean createEmptyDir(Path path, StoreContext storeContext)
new MkdirOperation(
storeContext,
path,
createMkdirOperationCallbacks(), false));
createMkdirOperationCallbacks(),
false,
performanceFlags.enabled(PerformanceFlagEnum.Mkdir)));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
Expand Down Expand Up @@ -54,30 +56,54 @@
* <li>If needed, one PUT</li>
* </ol>
*/
@InterfaceAudience.Private
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not needed given .impl paackage is tagged privat/unstable

@InterfaceStability.Evolving
public class MkdirOperation extends ExecutingStoreOperation<Boolean> {

private static final Logger LOG = LoggerFactory.getLogger(
MkdirOperation.class);

/**
* Path of the directory to be created.
*/
private final Path dir;

/**
* Mkdir Callbacks object to be used by the Mkdir operation.
*/
private final MkdirCallbacks callbacks;

/**
* Should checks for ancestors existing be skipped?
* This flag is set when working with magic directories.
* Whether to skip the validation of the parent directory.
*/
private final boolean performanceMkdir;

/**
* Whether the path is magic commit path.
*/
private final boolean isMagicPath;

/**
* Initialize Mkdir Operation context for S3A.
*
* @param storeContext Store context.
* @param dir Dir path of the directory.
* @param callbacks MkdirCallbacks object used by the Mkdir operation.
* @param isMagicPath True if the path is magic commit path.
* @param performanceMkdir If true, skip validation of the parent directory
* structure.
*/
public MkdirOperation(
final StoreContext storeContext,
final Path dir,
final MkdirCallbacks callbacks,
final boolean isMagicPath) {
final boolean isMagicPath,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually, these should be the same flag. so rename it performanceCreation and in s3aFS set to true if the path is magic or performanceCreation is true.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is from previous patch version.

final boolean performanceMkdir) {
super(storeContext);
this.dir = dir;
this.callbacks = callbacks;
this.isMagicPath = isMagicPath;
this.performanceMkdir = performanceMkdir;
}

/**
Expand Down Expand Up @@ -124,7 +150,32 @@ public Boolean execute() throws IOException {
return true;
}

// Walk path to root, ensuring closest ancestor is a directory, not file
// if performance creation mode is set, no need to check
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how about on L116 we only do a HEAD check for the path without /, (maybe need new callback), so no LIST probe for a dir via HEAD/LIST

S3AFileStatus fileStatus = performanceCreation
    ? probePathStatusOrNull(dir, StatusProbeEnum.Head) 
    ? getPathStatusExpectingDir(dir);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds reasonable, i was curious about whether we need full probe for magic, i think yes we can make it much performant.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i was wrong; now the patch is in I see where I was mistaken. Its the versioned buckets where problems surface. sorry!

// whether the closest ancestor is dir.
if (!performanceMkdir) {
verifyFileStatusOfClosestAncestor();
}

// if we get here there is no directory at the destination.
// so create one.

// Create the marker file, delete the parent entries
// if the filesystem isn't configured to retain them
callbacks.createFakeDirectory(dir, false);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pass down performanceCreation here; so always keep parent dirs. I know the marker retention default has changed, but we are in perfomance mode here...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is from previous patch version.

return true;
}

/**
* Verify the file status of the closest ancestor, if it is
* dir, the mkdir operation should proceed. If it is file,
* the mkdir operation should throw error.
*
* @throws IOException If either file status could not be retrieved,
* or if the closest ancestor is a file.
*/
private void verifyFileStatusOfClosestAncestor() throws IOException {
FileStatus fileStatus;
// Walk path to root, ensuring the closest ancestor is a directory, not file
Path fPart = dir.getParent();
try {
while (fPart != null && !fPart.isRoot()) {
mukund-thakur marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -140,24 +191,18 @@ public Boolean execute() throws IOException {
}

// there's a file at the parent entry
throw new FileAlreadyExistsException(String.format(
"Can't make directory for path '%s' since it is a file.",
fPart));
throw new FileAlreadyExistsException(
String.format(
"Can't make directory for path '%s' since it is a file.",
fPart));
}
} catch (AccessDeniedException e) {
LOG.info("mkdirs({}}: Access denied when looking"
+ " for parent directory {}; skipping checks",
dir, fPart);
dir,
fPart);
LOG.debug("{}", e, e);
}

// if we get here there is no directory at the destination.
// so create one.

// Create the marker file, delete the parent entries
// if the filesystem isn't configured to retain them
callbacks.createFakeDirectory(dir, false);
return true;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -299,8 +299,11 @@ understands the risks.
| *Option* | *Meaning* | Since |
|----------|--------------------|:------|
| `create` | Create Performance | 3.4.1 |
| `mkdir` | Mkdir Performance | 3.4.1 |

The `create` flag has the same semantics as [`fs.s3a.create.performance`](#create-performance)

* The `create` flag has the same semantics as [`fs.s3a.create.performance`](#create-performance)
* The `mkdir` flag semantics are explained in [Mkdir Performance](#mkdir-performance)


### <a name="create-performance"></a> Create Performance `fs.s3a.create.performance`
Expand All @@ -321,6 +324,22 @@ It may however result in

Use with care, and, ideally, enable versioning on the S3 store.


### <a name="mkdir-performance"></a> Mkdir Performance

`fs.s3a.performance.flag` flag option `mkdir`:

* Mkdir does not check whether the parent is directory or file.

This avoids the verification of the file status of the parent file
or the closest ancestor. Unlike the default mkdir operation, if the
parent is not a directory, the mkdir operation does not throw any
error.

This option can help with mkdir performance improvement but must be used
only if the person setting them understands the above-mentioned risk.


### <a name="threads"></a> Thread and connection pool settings.

Each S3A client interacting with a single bucket, as a single user, has its
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,22 @@
import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;

import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;

/**
* Test dir operations on S3A.
*/
public class ITestS3AContractMkdir extends AbstractContractMkdirTest {

@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
removeBaseAndBucketOverrides(conf,
FS_S3A_CREATE_PERFORMANCE);
return conf;
}

@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.contract.s3a;

import org.junit.Test;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.contract.ContractTestUtils;

import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile;
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE;
import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_PERFORMANCE_FLAGS;
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides;

/**
* Test mkdir operations on S3A with create performance mode.
*/
public class ITestS3AContractMkdirWithCreatePerf extends AbstractContractMkdirTest {

@Override
protected Configuration createConfiguration() {
Configuration conf = super.createConfiguration();
removeBaseAndBucketOverrides(
conf,
FS_S3A_CREATE_PERFORMANCE,
FS_S3A_PERFORMANCE_FLAGS);
conf.setStrings(FS_S3A_PERFORMANCE_FLAGS,
"create,mkdir");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use just "set" unless you want to provide a list of the enum elements with .toString() after each

return conf;
}

@Override
protected AbstractFSContract createContract(Configuration conf) {
return new S3AContract(conf);
}

@Test
public void testMkdirOverParentFile() throws Throwable {
describe("try to mkdir where a parent is a file, should pass");
FileSystem fs = getFileSystem();
Path path = methodPath();
byte[] dataset = dataset(1024, ' ', 'z');
createFile(getFileSystem(), path, false, dataset);
Path child = new Path(path, "child-to-mkdir");
boolean childCreated = fs.mkdirs(child);
assertTrue("Child dir is created", childCreated);
assertIsFile(path);
byte[] bytes = ContractTestUtils.readDataset(getFileSystem(), path, dataset.length);
ContractTestUtils.compareByteArrays(dataset, bytes, dataset.length);
assertPathExists("mkdir failed", child);
assertDeleted(child, true);
}

}
Loading
Loading