Skip to content

Commit e16cce2

Browse files
committed
fix(file_scanner): fix hardlink processing in presence of errors
1 parent 9ce4605 commit e16cce2

File tree

2 files changed

+58
-18
lines changed

2 files changed

+58
-18
lines changed

src/dwarfs/file_scanner.cpp

+16-13
Original file line numberDiff line numberDiff line change
@@ -234,20 +234,23 @@ void file_scanner_<LoggerPolicy>::scan_dedupe(file* p) {
234234
uint64_t size = p->size();
235235
uint64_t start_hash{0};
236236

237-
if (size >= kLargeFileThreshold && !p->is_invalid()) {
238-
try {
239-
auto mm = os_.map_file(p->fs_path(), kLargeFileStartHashSize);
240-
checksum cs(checksum::algorithm::XXH3_64);
241-
cs.update(mm->addr(), kLargeFileStartHashSize);
242-
cs.finalize(&start_hash);
243-
file_start_hash_.emplace(p, start_hash);
244-
} catch (...) {
245-
LOG_ERROR << "failed to map file " << p->path_as_string() << ": "
246-
<< folly::exceptionStr(std::current_exception())
247-
<< ", creating empty file";
248-
++prog_.errors;
249-
p->set_invalid();
237+
if (size >= kLargeFileThreshold) {
238+
if (!p->is_invalid()) {
239+
try {
240+
auto mm = os_.map_file(p->fs_path(), kLargeFileStartHashSize);
241+
checksum cs(checksum::algorithm::XXH3_64);
242+
cs.update(mm->addr(), kLargeFileStartHashSize);
243+
cs.finalize(&start_hash);
244+
} catch (...) {
245+
LOG_ERROR << "failed to map file " << p->path_as_string() << ": "
246+
<< folly::exceptionStr(std::current_exception())
247+
<< ", creating empty file";
248+
++prog_.errors;
249+
p->set_invalid();
250+
}
250251
}
252+
253+
file_start_hash_.emplace(p, start_hash);
251254
}
252255

253256
auto [it, is_new] = unique_size_.emplace(std::make_pair(size, start_hash),

test/tool_main_test.cpp

+42-5
Original file line numberDiff line numberDiff line change
@@ -281,12 +281,13 @@ class mkdwarfs_tester : public tester_common {
281281
return filesystem_v2(*lgr, *os, mm, opt);
282282
}
283283

284-
filesystem_v2 fs_from_file(std::string path) {
284+
filesystem_v2
285+
fs_from_file(std::string path, filesystem_options const& opt = {}) {
285286
auto fsimage = fa->get_file(path);
286287
if (!fsimage) {
287288
throw std::runtime_error("file not found: " + path);
288289
}
289-
return fs_from_data(std::move(fsimage.value()));
290+
return fs_from_data(std::move(fsimage.value()), opt);
290291
}
291292

292293
filesystem_v2 fs_from_stdout(filesystem_options const& opt = {}) {
@@ -2404,8 +2405,6 @@ class map_file_error_test : public testing::TestWithParam<char const*> {};
24042405
TEST_P(map_file_error_test, delayed) {
24052406
std::string extra_args{GetParam()};
24062407

2407-
// TODO: we must also simulate hardlinks here...
2408-
24092408
auto t = mkdwarfs_tester::create_empty();
24102409
t.add_root_dir();
24112410
t.os->add_local_files(audio_data_dir);
@@ -2414,6 +2413,25 @@ TEST_P(map_file_error_test, delayed) {
24142413
.max_name_len = 8,
24152414
.with_errors = true});
24162415

2416+
static constexpr size_t const kSizeSmall{1 << 10};
2417+
static constexpr size_t const kSizeLarge{1 << 20};
2418+
auto gen_small = [] { return test::loremipsum(kSizeLarge); };
2419+
auto gen_large = [] { return test::loremipsum(kSizeLarge); };
2420+
t.os->add("large_link1", {43, 0100755, 2, 1000, 100, kSizeLarge, 42, 0, 0, 0},
2421+
gen_large);
2422+
t.os->add("large_link2", {43, 0100755, 2, 1000, 100, kSizeLarge, 42, 0, 0, 0},
2423+
gen_large);
2424+
t.os->add("small_link1", {44, 0100755, 2, 1000, 100, kSizeSmall, 42, 0, 0, 0},
2425+
gen_small);
2426+
t.os->add("small_link2", {44, 0100755, 2, 1000, 100, kSizeSmall, 42, 0, 0, 0},
2427+
gen_small);
2428+
for (auto const& link :
2429+
{"large_link1", "large_link2", "small_link1", "small_link2"}) {
2430+
t.os->set_map_file_error(
2431+
fs::path{"/"} / link,
2432+
std::make_exception_ptr(std::runtime_error("map_file_error")), 0);
2433+
}
2434+
24172435
{
24182436
std::mt19937_64 rng{42};
24192437

@@ -2444,9 +2462,28 @@ TEST_P(map_file_error_test, delayed) {
24442462

24452463
EXPECT_EQ(2, t.run(args)) << t.err();
24462464

2447-
auto fs = t.fs_from_file("test.dwarfs");
2465+
auto fs = t.fs_from_file("test.dwarfs", {.metadata = {.enable_nlink = true}});
24482466
// fs.dump(std::cout, 2);
24492467

2468+
{
2469+
auto large_link1 = fs.find("/large_link1");
2470+
auto large_link2 = fs.find("/large_link2");
2471+
auto small_link1 = fs.find("/small_link1");
2472+
auto small_link2 = fs.find("/small_link2");
2473+
2474+
ASSERT_TRUE(large_link1);
2475+
ASSERT_TRUE(large_link2);
2476+
ASSERT_TRUE(small_link1);
2477+
ASSERT_TRUE(small_link2);
2478+
EXPECT_EQ(large_link1->inode_num(), large_link2->inode_num());
2479+
EXPECT_EQ(small_link1->inode_num(), small_link2->inode_num());
2480+
file_stat st;
2481+
ASSERT_EQ(0, fs.getattr(*large_link1, &st));
2482+
EXPECT_EQ(0, st.size);
2483+
ASSERT_EQ(0, fs.getattr(*small_link1, &st));
2484+
EXPECT_EQ(0, st.size);
2485+
}
2486+
24502487
std::unordered_map<fs::path, std::string, fs_path_hash> actual_files;
24512488
fs.walk([&](auto const& dev) {
24522489
auto iv = dev.inode();

0 commit comments

Comments
 (0)