Skip to content

Commit 863f8dc

Browse files
committed
clippy!
1 parent eafa6a7 commit 863f8dc

File tree

1 file changed

+100
-62
lines changed
  • turbopack/crates/turbo-tasks-fs/src

1 file changed

+100
-62
lines changed

turbopack/crates/turbo-tasks-fs/src/glob.rs

Lines changed: 100 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -380,10 +380,49 @@ impl GlobProgram {
380380
// If we hit a `}` then we are done so compute the jumps and pop the prefix.
381381
if t == GlobToken::RBracket {
382382
let mut branches = left_bracket_starts.pop().unwrap();
383-
let mut prefix = &mut branches.prefix;
383+
let prefix = &mut branches.prefix;
384384
let branches = &mut branches.branches;
385-
build_alternatives(prefix, branches)?;
386-
std::mem::swap(&mut instructions, &mut prefix);
385+
386+
let num_branches = branches.len();
387+
if num_branches <= 1 {
388+
bail!(
389+
"Cannot have an alternation with less than 2 members, remove the \
390+
brackets?"
391+
);
392+
}
393+
let mut next_branch_offset = num_branches - 1;
394+
for branch in &branches[0..num_branches - 1] {
395+
// to jump past the branch we need to jump past all its instructions
396+
// +1 to account for the JUMP
397+
// instruction at the end
398+
next_branch_offset += branch.len() + 1;
399+
prefix.push(GlobInstruction::Fork(
400+
next_branch_offset.try_into().context(
401+
"glob too large, cannot have more than 32K instructions",
402+
)?,
403+
));
404+
next_branch_offset -= 1; // subtract one since we added a fork
405+
// instruction.
406+
}
407+
let mut end_of_alternation =
408+
next_branch_offset + branches.last().unwrap().len();
409+
for branch in &mut branches[0..num_branches - 1] {
410+
end_of_alternation -= branch.len(); // from the end of this branch, this is how far it is to the end of
411+
// the
412+
// alternation
413+
prefix.append(branch);
414+
prefix.push(GlobInstruction::Jump(
415+
end_of_alternation.try_into().context(
416+
"glob too large, cannot have more than 64K instructions",
417+
)?,
418+
));
419+
end_of_alternation -= 1; // account for the jump instruction
420+
}
421+
end_of_alternation -= branches.last().unwrap().len();
422+
prefix.append(branches.last_mut().unwrap());
423+
debug_assert!(end_of_alternation == 0);
424+
425+
std::mem::swap(&mut instructions, prefix);
387426
}
388427
}
389428
GlobToken::End => {
@@ -416,8 +455,8 @@ impl GlobProgram {
416455
let mut has_path_to_prefix_match = BitSet::new(instructions.len());
417456

418457
for start in (0..instructions.len()).rev() {
419-
visited.set(start as usize);
420-
let (valid_prefix_end, valid_match) = match instructions[start as usize] {
458+
visited.set(start);
459+
let (valid_prefix_end, valid_match) = match instructions[start] {
421460
GlobInstruction::MatchLiteral(byte) => (byte == b'/', false),
422461
GlobInstruction::MatchAnyNonDelim => (false, false),
423462
GlobInstruction::MatchGlobStar { terminal } => {
@@ -451,7 +490,7 @@ impl GlobProgram {
451490
)
452491
}
453492
GlobInstruction::Fork(offset) => {
454-
let next_instruction = (start + 1) as usize;
493+
let next_instruction = start + 1;
455494
debug_assert!(
456495
visited.has(next_instruction),
457496
"should have already visited the target"
@@ -466,7 +505,7 @@ impl GlobProgram {
466505
// So we don't need to follow them now
467506
next
468507
} else {
469-
let fork_target = start as usize + offset as usize;
508+
let fork_target = start + offset as usize;
470509
debug_assert!(
471510
visited.has(fork_target),
472511
"should have already visited the target"
@@ -486,10 +525,10 @@ impl GlobProgram {
486525
}
487526
};
488527
if valid_prefix_end {
489-
has_path_to_prefix_match.set(start as usize)
528+
has_path_to_prefix_match.set(start)
490529
}
491530
if valid_match {
492-
has_path_to_match.set(start as usize)
531+
has_path_to_match.set(start)
493532
}
494533
}
495534

@@ -520,22 +559,34 @@ impl GlobProgram {
520559
// program but typically far less
521560
// This bounds execution at O(N*M) where N is the size of the path and M is the size of the
522561
// program
523-
for &byte in v.as_bytes() {
562+
let mut n_threads = 1;
563+
let mut ip = 0;
564+
'outer: for &byte in v.as_bytes() {
524565
let mut thread_index = 0;
525-
// We need to use this looping construct since we may add elements to `cur` as we go.
526-
// `cur.n` will never be > `len` so this loop is bounded to N iterations.
527-
let mut n_threads = cur.n;
528-
while thread_index < n_threads {
529-
let ip = cur.get(thread_index);
566+
// We manage the loop manually at the bottom to make it easier to skip it when hitting
567+
// some fast paths
568+
loop {
530569
match self.instructions[ip as usize] {
531570
GlobInstruction::MatchLiteral(m) => {
532571
if byte == m {
572+
if n_threads == 1 {
573+
cur.clear();
574+
ip += 1;
575+
cur.add(ip);
576+
continue 'outer;
577+
}
533578
// We matched, proceed to the next character
534579
next.add(ip + 1);
535580
}
536581
}
537582
GlobInstruction::MatchAnyNonDelim => {
538583
if byte != b'/' {
584+
if n_threads == 1 {
585+
cur.clear();
586+
ip += 1;
587+
cur.add(ip);
588+
continue 'outer;
589+
}
539590
next.add(ip + 1);
540591
}
541592
}
@@ -548,17 +599,34 @@ impl GlobProgram {
548599
// If we see a `/` then we need to consider ending the globstar.
549600
if byte == b'/' {
550601
next.add(ip + 1);
602+
// but even so we should keep trying to match, just like a fork.
603+
next.add(ip);
604+
} else {
605+
if n_threads == 1 {
606+
continue 'outer;
607+
}
608+
next.add(ip);
551609
}
552-
// but even so we should keep trying to match, just like a fork.
553-
next.add(ip);
554610
}
555611
GlobInstruction::MatchClass(index) => {
556612
if self.range_sets[index as usize].contains(byte) {
613+
if n_threads == 1 {
614+
cur.clear();
615+
ip += 1;
616+
cur.add(ip);
617+
continue 'outer;
618+
}
557619
next.add(ip + 1);
558620
}
559621
}
560622
GlobInstruction::NegativeMatchClass(index) => {
561623
if !self.range_sets[index as usize].contains(byte) {
624+
if n_threads == 1 {
625+
cur.clear();
626+
ip += 1;
627+
cur.add(ip);
628+
continue 'outer;
629+
}
562630
next.add(ip + 1);
563631
}
564632
}
@@ -569,24 +637,33 @@ impl GlobProgram {
569637
}
570638
}
571639
GlobInstruction::Fork(offset) => {
572-
let added1 = cur.add(ip + 1);
573-
let added2 = cur.add((offset + (ip as i16)) as u16);
574-
if added1 || added2 {
575-
n_threads = cur.n;
640+
if cur.add(ip + 1) {
641+
n_threads += 1;
642+
}
643+
if cur.add((offset + (ip as i16)) as u16) {
644+
n_threads += 1;
576645
}
577646
}
578647
GlobInstruction::Match => {
579648
// We ran out of instructions while we still have characters
580649
// so this thread dies
581650
}
582651
}
652+
// Do this at the bottom of the loop
583653
thread_index += 1;
654+
if thread_index < n_threads {
655+
ip = cur.get(thread_index);
656+
} else {
657+
break;
658+
}
584659
}
585-
if next.n == 0 {
660+
n_threads = next.n;
661+
if n_threads == 0 {
586662
// This means that all threads exited early. This isn't needed for correctness,
587663
// but there is no point iterating the rest of the characters.
588664
return false;
589665
}
666+
ip = next.get(0);
590667
// We have some progress! clear current and swap the two lists to advance to the next
591668
// character.
592669
cur.clear();
@@ -618,45 +695,6 @@ impl GlobProgram {
618695
}
619696
}
620697

621-
fn build_alternatives(
622-
prefix: &mut Vec<GlobInstruction>,
623-
branches: &mut Vec<Vec<GlobInstruction>>,
624-
) -> Result<(), anyhow::Error> {
625-
let num_branches = branches.len();
626-
if num_branches <= 1 {
627-
bail!("Cannot have an alternation with less than 2 members, remove the brackets?");
628-
}
629-
let mut next_branch_offset = num_branches - 1;
630-
for branch in &branches[0..num_branches - 1] {
631-
// to jump past the branch we need to jump past all its instructions +1
632-
// to account for the JUMP instruction at the end
633-
next_branch_offset += branch.len() + 1;
634-
prefix.push(GlobInstruction::Fork(
635-
next_branch_offset
636-
.try_into()
637-
.context("glob too large, cannot have more than 32K instructions")?,
638-
));
639-
next_branch_offset -= 1; // subtract one since we added a fork
640-
// instruction.
641-
}
642-
let mut end_of_alternation = next_branch_offset + branches.last().unwrap().len();
643-
for branch in &mut branches[0..num_branches - 1] {
644-
end_of_alternation -= branch.len(); // from the end of this branch, this is how far it is to the end of the
645-
// alternation
646-
prefix.extend(branch.drain(..));
647-
prefix.push(GlobInstruction::Jump(
648-
end_of_alternation
649-
.try_into()
650-
.context("glob too large, cannot have more than 64K instructions")?,
651-
));
652-
end_of_alternation -= 1; // account for the jump instruction
653-
}
654-
end_of_alternation -= branches.last().unwrap().len();
655-
prefix.extend(branches.last_mut().unwrap().drain(..));
656-
debug_assert!(end_of_alternation == 0);
657-
Ok(())
658-
}
659-
660698
// Consider a more compact encoding.
661699
// The jump offsets force this to 4 bytes
662700
// A variable length instruction encoding would help a lot
@@ -966,7 +1004,7 @@ mod tests {
9661004
#[test]
9671005
fn test_tokenizer() {
9681006
let mut tok = Tokenizer::new("foo/bar[a-z]/?/**");
969-
let prefix: Vec<GlobToken> = "foo/bar".bytes().map(|c| GlobToken::Literal(c)).collect();
1007+
let prefix: Vec<GlobToken> = "foo/bar".bytes().map(GlobToken::Literal).collect();
9701008
for t in prefix {
9711009
assert_eq!(t, tok.next_token());
9721010
}

0 commit comments

Comments
 (0)