From 07af65a22480db3b2d2d2d51ab1f2d74ef74528c Mon Sep 17 00:00:00 2001 From: Jacob Griffith Konrad Date: Sat, 2 Apr 2022 16:25:52 +0100 Subject: [PATCH 01/12] Added a par_group_by method to immutable slices --- src/slice/group_by.rs | 124 ++++++++++++++++++++++++++++++++++++++++++ src/slice/mod.rs | 25 +++++++++ 2 files changed, 149 insertions(+) create mode 100644 src/slice/group_by.rs diff --git a/src/slice/group_by.rs b/src/slice/group_by.rs new file mode 100644 index 000000000..b10a7466e --- /dev/null +++ b/src/slice/group_by.rs @@ -0,0 +1,124 @@ + +use crate::iter::plumbing::*; +use crate::iter::*; + +fn find_index(xs: &[T], pred: &F) -> Option +where + F: Fn(&T, &T) -> bool, +{ + let n = xs.len() / 2; + + for (start, end) in (0..).scan(true, |cont, i| { + if *cont { + let offset = 2 * i; + let start = n.saturating_sub(offset); + let end = n + offset; + Some( + if !(1..xs.len()).contains(&start) || !(2..xs.len()).contains(&end) { + *cont = false; + (0, xs.len()) + } + else { + (start, end) + }, + ) + } + else { + None + } + }) { + match xs[start..end] + .windows(2) + .enumerate() + .find_map(|(i, win)| { + if pred(&win[0], &win[1]) { + None + } + else { + Some(i) + } + }) + .map(|i| start + i) + { + Some(i) => return Some(i), + None => {} + } + } + None +} + +struct GroupByProducer<'data, 'p, T, P> +{ + pred: &'p P, + slice: &'data [T], +} + +impl<'data, 'p, T, P> UnindexedProducer for GroupByProducer<'data, 'p, T, P> +where + T: Sync, + P: Fn(&T, &T) -> bool + Send + Sync, +{ + type Item = &'data [T]; + + fn split(self) -> (Self, Option) + { + match find_index(self.slice, self.pred) { + Some(i) => { + let (ys, zs) = self.slice.split_at(i + 1); + (Self { pred: self.pred, slice: ys }, + Some(Self { pred: self.pred, slice: zs })) + } + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + folder.consume_iter(self.slice.chunk_by(self.pred)) + } +} + +/// Parallel iterator over slice in (non-overlapping) chunks separated by a predicate. +/// +/// This struct is created by the [`group_by`] method on `&[T]`. +/// +/// [`group_by`]: trait.ParallelSlice.html#method.par_group_by +#[derive(Debug)] +pub struct GroupBy<'data, T, F> +where + T: Sync, + F: Fn(&T, &T) -> bool + Send + Sync +{ + pred: F, + slice: &'data [T], +} + +impl<'data, T, F> GroupBy<'data, T, F> +where + T: Sync, + F: Fn(&T, &T) -> bool + Send + Sync +{ + pub(super) fn new(slice: &'data [T], pred: F) -> Self + { + Self { pred, slice } + } +} + + +impl<'data, T, F> ParallelIterator for GroupBy<'data, T, F> +where + T: Sync, + F: Fn(&T, &T) -> bool + Send + Sync, +{ + type Item = &'data [T]; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge_unindexed(GroupByProducer { pred: &self.pred, slice: self.slice }, consumer) + } +} + diff --git a/src/slice/mod.rs b/src/slice/mod.rs index 9e8dcc906..593e55ca1 100644 --- a/src/slice/mod.rs +++ b/src/slice/mod.rs @@ -9,6 +9,7 @@ mod chunks; mod mergesort; mod quicksort; mod rchunks; +mod group_by; mod test; @@ -24,6 +25,7 @@ use std::mem; pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut}; pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut}; +pub use self::group_by::GroupBy; /// Parallel extensions for slices. pub trait ParallelSlice { @@ -173,6 +175,29 @@ pub trait ParallelSlice { assert!(chunk_size != 0, "chunk_size must not be zero"); RChunksExact::new(chunk_size, self.as_parallel_slice()) } + + /// Returns a parallel iterator over the slice producing non-overlapping runs + /// of elements using the predicate to separate them. + /// + /// The predicate is called on two elements following themselves, + /// it means the predicate is called on `slice[0]` and `slice[1]` + /// then on `slice[1]` and `slice[2]` and so on. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let groups: Vec<_> = [1, 2, 2, 3, 3, 3].par_group_by(|&x, &y| x == y).collect(); + /// assert_eq!(groups[0], &[1]); + /// assert_eq!(groups[1], &[2, 2]); + /// assert_eq!(groups[2], &[3, 3, 3]); + /// ``` + fn par_group_by(&self, pred: F) -> GroupBy<'_, T, F> + where + F: Fn(&T, &T) -> bool + Send + Sync, + { + GroupBy::new(self.as_parallel_slice(), pred) + } } impl ParallelSlice for [T] { From 145711ed73d2df165fe55fb72885be21844c32e1 Mon Sep 17 00:00:00 2001 From: Jacob Griffith Konrad Date: Sat, 2 Apr 2022 16:36:13 +0100 Subject: [PATCH 02/12] Formatting --- src/slice/group_by.rs | 47 ++++++++++++++++++++++++------------------- src/slice/mod.rs | 4 ++-- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/slice/group_by.rs b/src/slice/group_by.rs index b10a7466e..7c89e9417 100644 --- a/src/slice/group_by.rs +++ b/src/slice/group_by.rs @@ -1,4 +1,3 @@ - use crate::iter::plumbing::*; use crate::iter::*; @@ -17,13 +16,11 @@ where if !(1..xs.len()).contains(&start) || !(2..xs.len()).contains(&end) { *cont = false; (0, xs.len()) - } - else { + } else { (start, end) }, ) - } - else { + } else { None } }) { @@ -33,8 +30,7 @@ where .find_map(|(i, win)| { if pred(&win[0], &win[1]) { None - } - else { + } else { Some(i) } }) @@ -47,8 +43,7 @@ where None } -struct GroupByProducer<'data, 'p, T, P> -{ +struct GroupByProducer<'data, 'p, T, P> { pred: &'p P, slice: &'data [T], } @@ -60,13 +55,20 @@ where { type Item = &'data [T]; - fn split(self) -> (Self, Option) - { + fn split(self) -> (Self, Option) { match find_index(self.slice, self.pred) { Some(i) => { let (ys, zs) = self.slice.split_at(i + 1); - (Self { pred: self.pred, slice: ys }, - Some(Self { pred: self.pred, slice: zs })) + ( + Self { + pred: self.pred, + slice: ys, + }, + Some(Self { + pred: self.pred, + slice: zs, + }), + ) } None => (self, None), } @@ -89,24 +91,22 @@ where pub struct GroupBy<'data, T, F> where T: Sync, - F: Fn(&T, &T) -> bool + Send + Sync + F: Fn(&T, &T) -> bool + Send + Sync, { pred: F, slice: &'data [T], } -impl<'data, T, F> GroupBy<'data, T, F> +impl<'data, T, F> GroupBy<'data, T, F> where T: Sync, - F: Fn(&T, &T) -> bool + Send + Sync + F: Fn(&T, &T) -> bool + Send + Sync, { - pub(super) fn new(slice: &'data [T], pred: F) -> Self - { + pub(super) fn new(slice: &'data [T], pred: F) -> Self { Self { pred, slice } } } - impl<'data, T, F> ParallelIterator for GroupBy<'data, T, F> where T: Sync, @@ -118,7 +118,12 @@ where where C: UnindexedConsumer, { - bridge_unindexed(GroupByProducer { pred: &self.pred, slice: self.slice }, consumer) + bridge_unindexed( + GroupByProducer { + pred: &self.pred, + slice: self.slice, + }, + consumer, + ) } } - diff --git a/src/slice/mod.rs b/src/slice/mod.rs index 593e55ca1..eb1159965 100644 --- a/src/slice/mod.rs +++ b/src/slice/mod.rs @@ -6,10 +6,10 @@ //! [std::slice]: https://doc.rust-lang.org/stable/std/slice/ mod chunks; +mod group_by; mod mergesort; mod quicksort; mod rchunks; -mod group_by; mod test; @@ -24,8 +24,8 @@ use std::fmt::{self, Debug}; use std::mem; pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut}; -pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut}; pub use self::group_by::GroupBy; +pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut}; /// Parallel extensions for slices. pub trait ParallelSlice { From d440fa800884224880c1dfea7af8c98122e8f714 Mon Sep 17 00:00:00 2001 From: Jacob Griffith Konrad Date: Sun, 10 Apr 2022 16:43:08 +0100 Subject: [PATCH 03/12] Added group_by_mut for slices --- src/slice/group_by.rs | 128 +++++++++++++++++++++++++++++++----------- src/slice/mod.rs | 26 ++++++++- 2 files changed, 121 insertions(+), 33 deletions(-) diff --git a/src/slice/group_by.rs b/src/slice/group_by.rs index 7c89e9417..4f783bfc9 100644 --- a/src/slice/group_by.rs +++ b/src/slice/group_by.rs @@ -7,37 +7,13 @@ where { let n = xs.len() / 2; - for (start, end) in (0..).scan(true, |cont, i| { - if *cont { - let offset = 2 * i; - let start = n.saturating_sub(offset); - let end = n + offset; - Some( - if !(1..xs.len()).contains(&start) || !(2..xs.len()).contains(&end) { - *cont = false; - (0, xs.len()) - } else { - (start, end) - }, - ) - } else { - None - } - }) { - match xs[start..end] - .windows(2) - .enumerate() - .find_map(|(i, win)| { - if pred(&win[0], &win[1]) { - None - } else { - Some(i) - } - }) - .map(|i| start + i) - { - Some(i) => return Some(i), - None => {} + for m in (1..((n / 2) + 1)).map(|x| 2 * x) { + let start = n.saturating_sub(m); + let end = std::cmp::min(n + m, xs.len()); + for i in start..(end - 1) { + if !pred(&xs[i], &xs[i + 1]) { + return Some(start + i); + } } } None @@ -58,7 +34,7 @@ where fn split(self) -> (Self, Option) { match find_index(self.slice, self.pred) { Some(i) => { - let (ys, zs) = self.slice.split_at(i + 1); + let (ys, zs) = self.slice.split_at(i); ( Self { pred: self.pred, @@ -127,3 +103,91 @@ where ) } } + +// Mutable + +struct GroupByMutProducer<'data, 'p, T, P> { + pred: &'p P, + slice: &'data mut [T], +} + +impl<'data, 'p, T, P> UnindexedProducer for GroupByMutProducer<'data, 'p, T, P> +where + T: Send, + P: Fn(&T, &T) -> bool + Send + Sync, +{ + type Item = &'data mut [T]; + + fn split(self) -> (Self, Option) { + match find_index(self.slice, self.pred) { + Some(i) => { + let (ys, zs) = self.slice.split_at_mut(i); + ( + Self { + pred: self.pred, + slice: ys, + }, + Some(Self { + pred: self.pred, + slice: zs, + }), + ) + } + None => (self, None), + } + } + + fn fold_with(self, folder: F) -> F + where + F: Folder, + { + folder.consume_iter(self.slice.chunk_by_mut(self.pred)) + } +} + +/// Parallel iterator over slice in (non-overlapping) mutable chunks +/// separated by a predicate. +/// +/// This struct is created by the [`group_by_mut`] method on `&[T]`. +/// +/// [`group_by_mut`]: trait.ParallelSlice.html#method.par_group_by_mut +#[derive(Debug)] +pub struct GroupByMut<'data, T, F> +where + T: Send, + F: Fn(&T, &T) -> bool + Send + Sync, +{ + pred: F, + slice: &'data mut [T], +} + +impl<'data, T, F> GroupByMut<'data, T, F> +where + T: Send, + F: Fn(&T, &T) -> bool + Send + Sync, +{ + pub(super) fn new(slice: &'data mut [T], pred: F) -> Self { + Self { pred, slice } + } +} + +impl<'data, T, F> ParallelIterator for GroupByMut<'data, T, F> +where + T: Send, + F: Fn(&T, &T) -> bool + Send + Sync, +{ + type Item = &'data mut [T]; + + fn drive_unindexed(self, consumer: C) -> C::Result + where + C: UnindexedConsumer, + { + bridge_unindexed( + GroupByMutProducer { + pred: &self.pred, + slice: self.slice, + }, + consumer, + ) + } +} diff --git a/src/slice/mod.rs b/src/slice/mod.rs index eb1159965..063d01252 100644 --- a/src/slice/mod.rs +++ b/src/slice/mod.rs @@ -24,7 +24,7 @@ use std::fmt::{self, Debug}; use std::mem; pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut}; -pub use self::group_by::GroupBy; +pub use self::group_by::{GroupBy, GroupByMut}; pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut}; /// Parallel extensions for slices. @@ -729,6 +729,30 @@ pub trait ParallelSliceMut { { par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b))); } + + /// Returns a parallel iterator over the slice producing non-overlapping mutable + /// runs of elements using the predicate to separate them. + /// + /// The predicate is called on two elements following themselves, + /// it means the predicate is called on `slice[0]` and `slice[1]` + /// then on `slice[1]` and `slice[2]` and so on. + /// + /// # Examples + /// + /// ``` + /// use rayon::prelude::*; + /// let mut xs = [1, 2, 2, 3, 3, 3]; + /// let groups: Vec<_> = xs.par_group_by_mut(|&x, &y| x == y).collect(); + /// assert_eq!(groups[0], &mut [1]); + /// assert_eq!(groups[1], &mut [2, 2]); + /// assert_eq!(groups[2], &mut [3, 3, 3]); + /// ``` + fn par_group_by_mut(&mut self, pred: F) -> GroupByMut<'_, T, F> + where + F: Fn(&T, &T) -> bool + Send + Sync, + { + GroupByMut::new(self.as_parallel_slice_mut(), pred) + } } impl ParallelSliceMut for [T] { From eb7c368f8d20ed21340df82da9ac609d4b2aea1b Mon Sep 17 00:00:00 2001 From: Jacob Griffith Konrad Date: Sun, 10 Apr 2022 16:45:26 +0100 Subject: [PATCH 04/12] Fixed documentation --- src/slice/group_by.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/slice/group_by.rs b/src/slice/group_by.rs index 4f783bfc9..3b67ab76b 100644 --- a/src/slice/group_by.rs +++ b/src/slice/group_by.rs @@ -150,7 +150,7 @@ where /// /// This struct is created by the [`group_by_mut`] method on `&[T]`. /// -/// [`group_by_mut`]: trait.ParallelSlice.html#method.par_group_by_mut +/// [`group_by_mut`]: trait.ParallelSliceMut.html#method.par_group_by_mut #[derive(Debug)] pub struct GroupByMut<'data, T, F> where From c4a5a9ef066029772f5d9db2767fab36446cac58 Mon Sep 17 00:00:00 2001 From: Jacob Griffith Konrad Date: Sun, 10 Apr 2022 16:51:33 +0100 Subject: [PATCH 05/12] Formatting --- src/slice/group_by.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/slice/group_by.rs b/src/slice/group_by.rs index 3b67ab76b..c926ebb5c 100644 --- a/src/slice/group_by.rs +++ b/src/slice/group_by.rs @@ -104,7 +104,7 @@ where } } -// Mutable +// Mutable struct GroupByMutProducer<'data, 'p, T, P> { pred: &'p P, From 848e7597b0a6acb5b432c2807d21d279b36b6a8c Mon Sep 17 00:00:00 2001 From: Jacob Griffith Konrad Date: Thu, 8 Dec 2022 14:54:36 +0000 Subject: [PATCH 06/12] Fixed bug --- src/slice/group_by.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/slice/group_by.rs b/src/slice/group_by.rs index c926ebb5c..02896a5d8 100644 --- a/src/slice/group_by.rs +++ b/src/slice/group_by.rs @@ -5,15 +5,16 @@ fn find_index(xs: &[T], pred: &F) -> Option where F: Fn(&T, &T) -> bool, { - let n = xs.len() / 2; + let n = (xs.len() / 2).saturating_sub(1); for m in (1..((n / 2) + 1)).map(|x| 2 * x) { let start = n.saturating_sub(m); let end = std::cmp::min(n + m, xs.len()); - for i in start..(end - 1) { - if !pred(&xs[i], &xs[i + 1]) { - return Some(start + i); - } + let fsts = &xs[start..end]; + let (_, snds) = fsts.split_first()?; + match fsts.iter().zip(snds).position(|(x , y)| !pred(x, y)) { + None => (), + Some(i) => return Some(start + i + 1), } } None From 9d32efe8abe7865838a0cf286248e423a2c2408b Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Thu, 21 Mar 2024 18:11:32 -0700 Subject: [PATCH 07/12] Rename group_by to chunk_by, matching Rust 1.77 --- src/slice/{group_by.rs => chunk_by.rs} | 58 +++++++++++++------------- src/slice/mod.rs | 30 ++++++------- 2 files changed, 44 insertions(+), 44 deletions(-) rename src/slice/{group_by.rs => chunk_by.rs} (70%) diff --git a/src/slice/group_by.rs b/src/slice/chunk_by.rs similarity index 70% rename from src/slice/group_by.rs rename to src/slice/chunk_by.rs index 02896a5d8..b10433695 100644 --- a/src/slice/group_by.rs +++ b/src/slice/chunk_by.rs @@ -1,9 +1,9 @@ use crate::iter::plumbing::*; use crate::iter::*; -fn find_index(xs: &[T], pred: &F) -> Option +fn find_index(xs: &[T], pred: &P) -> Option where - F: Fn(&T, &T) -> bool, + P: Fn(&T, &T) -> bool, { let n = (xs.len() / 2).saturating_sub(1); @@ -12,7 +12,7 @@ where let end = std::cmp::min(n + m, xs.len()); let fsts = &xs[start..end]; let (_, snds) = fsts.split_first()?; - match fsts.iter().zip(snds).position(|(x , y)| !pred(x, y)) { + match fsts.iter().zip(snds).position(|(x, y)| !pred(x, y)) { None => (), Some(i) => return Some(start + i + 1), } @@ -20,12 +20,12 @@ where None } -struct GroupByProducer<'data, 'p, T, P> { +struct ChunkByProducer<'data, 'p, T, P> { pred: &'p P, slice: &'data [T], } -impl<'data, 'p, T, P> UnindexedProducer for GroupByProducer<'data, 'p, T, P> +impl<'data, 'p, T, P> UnindexedProducer for ChunkByProducer<'data, 'p, T, P> where T: Sync, P: Fn(&T, &T) -> bool + Send + Sync, @@ -61,33 +61,33 @@ where /// Parallel iterator over slice in (non-overlapping) chunks separated by a predicate. /// -/// This struct is created by the [`group_by`] method on `&[T]`. +/// This struct is created by the [`par_chunk_by`] method on `&[T]`. /// -/// [`group_by`]: trait.ParallelSlice.html#method.par_group_by +/// [`par_chunk_by`]: trait.ParallelSlice.html#method.par_chunk_by #[derive(Debug)] -pub struct GroupBy<'data, T, F> +pub struct ChunkBy<'data, T, P> where T: Sync, - F: Fn(&T, &T) -> bool + Send + Sync, + P: Fn(&T, &T) -> bool + Send + Sync, { - pred: F, + pred: P, slice: &'data [T], } -impl<'data, T, F> GroupBy<'data, T, F> +impl<'data, T, P> ChunkBy<'data, T, P> where T: Sync, - F: Fn(&T, &T) -> bool + Send + Sync, + P: Fn(&T, &T) -> bool + Send + Sync, { - pub(super) fn new(slice: &'data [T], pred: F) -> Self { + pub(super) fn new(slice: &'data [T], pred: P) -> Self { Self { pred, slice } } } -impl<'data, T, F> ParallelIterator for GroupBy<'data, T, F> +impl<'data, T, P> ParallelIterator for ChunkBy<'data, T, P> where T: Sync, - F: Fn(&T, &T) -> bool + Send + Sync, + P: Fn(&T, &T) -> bool + Send + Sync, { type Item = &'data [T]; @@ -96,7 +96,7 @@ where C: UnindexedConsumer, { bridge_unindexed( - GroupByProducer { + ChunkByProducer { pred: &self.pred, slice: self.slice, }, @@ -107,12 +107,12 @@ where // Mutable -struct GroupByMutProducer<'data, 'p, T, P> { +struct ChunkByMutProducer<'data, 'p, T, P> { pred: &'p P, slice: &'data mut [T], } -impl<'data, 'p, T, P> UnindexedProducer for GroupByMutProducer<'data, 'p, T, P> +impl<'data, 'p, T, P> UnindexedProducer for ChunkByMutProducer<'data, 'p, T, P> where T: Send, P: Fn(&T, &T) -> bool + Send + Sync, @@ -149,33 +149,33 @@ where /// Parallel iterator over slice in (non-overlapping) mutable chunks /// separated by a predicate. /// -/// This struct is created by the [`group_by_mut`] method on `&[T]`. +/// This struct is created by the [`par_chunk_by_mut`] method on `&mut [T]`. /// -/// [`group_by_mut`]: trait.ParallelSliceMut.html#method.par_group_by_mut +/// [`par_chunk_by_mut`]: trait.ParallelSliceMut.html#method.par_chunk_by_mut #[derive(Debug)] -pub struct GroupByMut<'data, T, F> +pub struct ChunkByMut<'data, T, P> where T: Send, - F: Fn(&T, &T) -> bool + Send + Sync, + P: Fn(&T, &T) -> bool + Send + Sync, { - pred: F, + pred: P, slice: &'data mut [T], } -impl<'data, T, F> GroupByMut<'data, T, F> +impl<'data, T, P> ChunkByMut<'data, T, P> where T: Send, - F: Fn(&T, &T) -> bool + Send + Sync, + P: Fn(&T, &T) -> bool + Send + Sync, { - pub(super) fn new(slice: &'data mut [T], pred: F) -> Self { + pub(super) fn new(slice: &'data mut [T], pred: P) -> Self { Self { pred, slice } } } -impl<'data, T, F> ParallelIterator for GroupByMut<'data, T, F> +impl<'data, T, P> ParallelIterator for ChunkByMut<'data, T, P> where T: Send, - F: Fn(&T, &T) -> bool + Send + Sync, + P: Fn(&T, &T) -> bool + Send + Sync, { type Item = &'data mut [T]; @@ -184,7 +184,7 @@ where C: UnindexedConsumer, { bridge_unindexed( - GroupByMutProducer { + ChunkByMutProducer { pred: &self.pred, slice: self.slice, }, diff --git a/src/slice/mod.rs b/src/slice/mod.rs index 063d01252..171675e53 100644 --- a/src/slice/mod.rs +++ b/src/slice/mod.rs @@ -5,8 +5,8 @@ //! //! [std::slice]: https://doc.rust-lang.org/stable/std/slice/ +mod chunk_by; mod chunks; -mod group_by; mod mergesort; mod quicksort; mod rchunks; @@ -23,8 +23,8 @@ use std::cmp::Ordering; use std::fmt::{self, Debug}; use std::mem; +pub use self::chunk_by::{ChunkBy, ChunkByMut}; pub use self::chunks::{Chunks, ChunksExact, ChunksExactMut, ChunksMut}; -pub use self::group_by::{GroupBy, GroupByMut}; pub use self::rchunks::{RChunks, RChunksExact, RChunksExactMut, RChunksMut}; /// Parallel extensions for slices. @@ -187,16 +187,16 @@ pub trait ParallelSlice { /// /// ``` /// use rayon::prelude::*; - /// let groups: Vec<_> = [1, 2, 2, 3, 3, 3].par_group_by(|&x, &y| x == y).collect(); - /// assert_eq!(groups[0], &[1]); - /// assert_eq!(groups[1], &[2, 2]); - /// assert_eq!(groups[2], &[3, 3, 3]); + /// let chunks: Vec<_> = [1, 2, 2, 3, 3, 3].par_chunk_by(|&x, &y| x == y).collect(); + /// assert_eq!(chunks[0], &[1]); + /// assert_eq!(chunks[1], &[2, 2]); + /// assert_eq!(chunks[2], &[3, 3, 3]); /// ``` - fn par_group_by(&self, pred: F) -> GroupBy<'_, T, F> + fn par_chunk_by(&self, pred: F) -> ChunkBy<'_, T, F> where F: Fn(&T, &T) -> bool + Send + Sync, { - GroupBy::new(self.as_parallel_slice(), pred) + ChunkBy::new(self.as_parallel_slice(), pred) } } @@ -730,7 +730,7 @@ pub trait ParallelSliceMut { par_quicksort(self.as_parallel_slice_mut(), |a, b| f(a).lt(&f(b))); } - /// Returns a parallel iterator over the slice producing non-overlapping mutable + /// Returns a parallel iterator over the slice producing non-overlapping mutable /// runs of elements using the predicate to separate them. /// /// The predicate is called on two elements following themselves, @@ -742,16 +742,16 @@ pub trait ParallelSliceMut { /// ``` /// use rayon::prelude::*; /// let mut xs = [1, 2, 2, 3, 3, 3]; - /// let groups: Vec<_> = xs.par_group_by_mut(|&x, &y| x == y).collect(); - /// assert_eq!(groups[0], &mut [1]); - /// assert_eq!(groups[1], &mut [2, 2]); - /// assert_eq!(groups[2], &mut [3, 3, 3]); + /// let chunks: Vec<_> = xs.par_chunk_by_mut(|&x, &y| x == y).collect(); + /// assert_eq!(chunks[0], &mut [1]); + /// assert_eq!(chunks[1], &mut [2, 2]); + /// assert_eq!(chunks[2], &mut [3, 3, 3]); /// ``` - fn par_group_by_mut(&mut self, pred: F) -> GroupByMut<'_, T, F> + fn par_chunk_by_mut(&mut self, pred: F) -> ChunkByMut<'_, T, F> where F: Fn(&T, &T) -> bool + Send + Sync, { - GroupByMut::new(self.as_parallel_slice_mut(), pred) + ChunkByMut::new(self.as_parallel_slice_mut(), pred) } } From f22326977c19d1008bb76cd4acd9c25b6b84d2e3 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Thu, 21 Mar 2024 18:17:13 -0700 Subject: [PATCH 08/12] Relax constraints on ChunkBy and ChunkByMut --- src/slice/chunk_by.rs | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/src/slice/chunk_by.rs b/src/slice/chunk_by.rs index b10433695..aa7e9e53c 100644 --- a/src/slice/chunk_by.rs +++ b/src/slice/chunk_by.rs @@ -65,20 +65,12 @@ where /// /// [`par_chunk_by`]: trait.ParallelSlice.html#method.par_chunk_by #[derive(Debug)] -pub struct ChunkBy<'data, T, P> -where - T: Sync, - P: Fn(&T, &T) -> bool + Send + Sync, -{ +pub struct ChunkBy<'data, T, P> { pred: P, slice: &'data [T], } -impl<'data, T, P> ChunkBy<'data, T, P> -where - T: Sync, - P: Fn(&T, &T) -> bool + Send + Sync, -{ +impl<'data, T, P> ChunkBy<'data, T, P> { pub(super) fn new(slice: &'data [T], pred: P) -> Self { Self { pred, slice } } @@ -153,20 +145,12 @@ where /// /// [`par_chunk_by_mut`]: trait.ParallelSliceMut.html#method.par_chunk_by_mut #[derive(Debug)] -pub struct ChunkByMut<'data, T, P> -where - T: Send, - P: Fn(&T, &T) -> bool + Send + Sync, -{ +pub struct ChunkByMut<'data, T, P> { pred: P, slice: &'data mut [T], } -impl<'data, T, P> ChunkByMut<'data, T, P> -where - T: Send, - P: Fn(&T, &T) -> bool + Send + Sync, -{ +impl<'data, T, P> ChunkByMut<'data, T, P> { pub(super) fn new(slice: &'data mut [T], pred: P) -> Self { Self { pred, slice } } From 29c922699faa51f606699d4db7fc9107f9b8c099 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Thu, 21 Mar 2024 18:23:03 -0700 Subject: [PATCH 09/12] Make Debug for ChunkBy independent of the predicate --- src/slice/chunk_by.rs | 19 +++++++++++++++++-- tests/debug.rs | 2 ++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/slice/chunk_by.rs b/src/slice/chunk_by.rs index aa7e9e53c..69fbabf89 100644 --- a/src/slice/chunk_by.rs +++ b/src/slice/chunk_by.rs @@ -1,5 +1,6 @@ use crate::iter::plumbing::*; use crate::iter::*; +use std::fmt; fn find_index(xs: &[T], pred: &P) -> Option where @@ -64,12 +65,19 @@ where /// This struct is created by the [`par_chunk_by`] method on `&[T]`. /// /// [`par_chunk_by`]: trait.ParallelSlice.html#method.par_chunk_by -#[derive(Debug)] pub struct ChunkBy<'data, T, P> { pred: P, slice: &'data [T], } +impl<'data, T: fmt::Debug, P> fmt::Debug for ChunkBy<'data, T, P> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ChunkBy") + .field("slice", &self.slice) + .finish() + } +} + impl<'data, T, P> ChunkBy<'data, T, P> { pub(super) fn new(slice: &'data [T], pred: P) -> Self { Self { pred, slice } @@ -144,12 +152,19 @@ where /// This struct is created by the [`par_chunk_by_mut`] method on `&mut [T]`. /// /// [`par_chunk_by_mut`]: trait.ParallelSliceMut.html#method.par_chunk_by_mut -#[derive(Debug)] pub struct ChunkByMut<'data, T, P> { pred: P, slice: &'data mut [T], } +impl<'data, T: fmt::Debug, P> fmt::Debug for ChunkByMut<'data, T, P> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ChunkByMut") + .field("slice", &self.slice) + .finish() + } +} + impl<'data, T, P> ChunkByMut<'data, T, P> { pub(super) fn new(slice: &'data mut [T], pred: P) -> Self { Self { pred, slice } diff --git a/tests/debug.rs b/tests/debug.rs index bf16a2fdd..97d89cd6a 100644 --- a/tests/debug.rs +++ b/tests/debug.rs @@ -121,6 +121,8 @@ fn debug_vec() { let mut v: Vec<_> = (0..10).collect(); check(v.par_iter()); check(v.par_iter_mut()); + check(v.par_chunk_by(i32::eq)); + check(v.par_chunk_by_mut(i32::eq)); check(v.par_chunks(42)); check(v.par_chunks_exact(42)); check(v.par_chunks_mut(42)); From e60b57e2d60954e9e70d7fe274b9373cdcae960a Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Thu, 21 Mar 2024 18:25:00 -0700 Subject: [PATCH 10/12] impl Clone for ChunkBy --- src/slice/chunk_by.rs | 9 +++++++++ tests/clones.rs | 1 + 2 files changed, 10 insertions(+) diff --git a/src/slice/chunk_by.rs b/src/slice/chunk_by.rs index 69fbabf89..6fb318b04 100644 --- a/src/slice/chunk_by.rs +++ b/src/slice/chunk_by.rs @@ -70,6 +70,15 @@ pub struct ChunkBy<'data, T, P> { slice: &'data [T], } +impl<'data, T, P: Clone> Clone for ChunkBy<'data, T, P> { + fn clone(&self) -> Self { + ChunkBy { + pred: self.pred.clone(), + slice: self.slice, + } + } +} + impl<'data, T: fmt::Debug, P> fmt::Debug for ChunkBy<'data, T, P> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("ChunkBy") diff --git a/tests/clones.rs b/tests/clones.rs index 1306147f5..9ffa1d131 100644 --- a/tests/clones.rs +++ b/tests/clones.rs @@ -109,6 +109,7 @@ fn clone_str() { fn clone_vec() { let v: Vec<_> = (0..1000).collect(); check(v.par_iter()); + check(v.par_chunk_by(i32::eq)); check(v.par_chunks(42)); check(v.par_chunks_exact(42)); check(v.par_rchunks(42)); From ad4134540544fe94a0bbd42d4a2d3480ef345ffd Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Thu, 21 Mar 2024 18:47:43 -0700 Subject: [PATCH 11/12] Make ChunkBy compatible with MSRV --- src/slice/chunk_by.rs | 41 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/src/slice/chunk_by.rs b/src/slice/chunk_by.rs index 6fb318b04..23bb40363 100644 --- a/src/slice/chunk_by.rs +++ b/src/slice/chunk_by.rs @@ -2,6 +2,15 @@ use crate::iter::plumbing::*; use crate::iter::*; use std::fmt; +fn find_first_index(xs: &[T], pred: &P) -> Option +where + P: Fn(&T, &T) -> bool, +{ + xs.windows(2) + .position(|w| !pred(&w[0], &w[1])) + .map(|i| i + 1) +} + fn find_index(xs: &[T], pred: &P) -> Option where P: Fn(&T, &T) -> bool, @@ -52,11 +61,23 @@ where } } - fn fold_with(self, folder: F) -> F + fn fold_with(mut self, folder: F) -> F where F: Folder, { - folder.consume_iter(self.slice.chunk_by(self.pred)) + // TODO (MSRV 1.77): + // folder.consume_iter(self.slice.chunk_by(self.pred)) + + folder.consume_iter(std::iter::from_fn(move || { + if self.slice.is_empty() { + None + } else { + let i = find_first_index(self.slice, self.pred).unwrap_or(self.slice.len()); + let (head, tail) = self.slice.split_at(i); + self.slice = tail; + Some(head) + } + })) } } @@ -147,11 +168,23 @@ where } } - fn fold_with(self, folder: F) -> F + fn fold_with(mut self, folder: F) -> F where F: Folder, { - folder.consume_iter(self.slice.chunk_by_mut(self.pred)) + // TODO (MSRV 1.77): + // folder.consume_iter(self.slice.chunk_by_mut(self.pred)) + + folder.consume_iter(std::iter::from_fn(move || { + if self.slice.is_empty() { + None + } else { + let i = find_first_index(self.slice, self.pred).unwrap_or(self.slice.len()); + let (head, tail) = std::mem::take(&mut self.slice).split_at_mut(i); + self.slice = tail; + Some(head) + } + })) } } From e37ec9ee56dfaf2e780ad891b67c3251ffefd608 Mon Sep 17 00:00:00 2001 From: Josh Stone Date: Sat, 23 Mar 2024 13:18:01 -0700 Subject: [PATCH 12/12] Rewrite `ChunkByProducer` to be more like `SplitProducer` In particular, this now ensures we only call the predicate once on each pair of items. --- src/slice/chunk_by.rs | 239 ++++++++++++++++++++++-------------------- src/slice/test.rs | 46 ++++++++ 2 files changed, 170 insertions(+), 115 deletions(-) diff --git a/src/slice/chunk_by.rs b/src/slice/chunk_by.rs index 23bb40363..25833cabe 100644 --- a/src/slice/chunk_by.rs +++ b/src/slice/chunk_by.rs @@ -1,83 +1,141 @@ use crate::iter::plumbing::*; use crate::iter::*; -use std::fmt; +use std::marker::PhantomData; +use std::{fmt, mem}; -fn find_first_index(xs: &[T], pred: &P) -> Option -where - P: Fn(&T, &T) -> bool, -{ - xs.windows(2) - .position(|w| !pred(&w[0], &w[1])) - .map(|i| i + 1) +trait ChunkBySlice: AsRef<[T]> + Default + Send { + fn split(self, index: usize) -> (Self, Self); + + fn find(&self, pred: &impl Fn(&T, &T) -> bool, start: usize, end: usize) -> Option { + self.as_ref()[start..end] + .windows(2) + .position(move |w| !pred(&w[0], &w[1])) + .map(|i| i + 1) + } + + fn rfind(&self, pred: &impl Fn(&T, &T) -> bool, end: usize) -> Option { + self.as_ref()[..end] + .windows(2) + .rposition(move |w| !pred(&w[0], &w[1])) + .map(|i| i + 1) + } } -fn find_index(xs: &[T], pred: &P) -> Option -where - P: Fn(&T, &T) -> bool, -{ - let n = (xs.len() / 2).saturating_sub(1); - - for m in (1..((n / 2) + 1)).map(|x| 2 * x) { - let start = n.saturating_sub(m); - let end = std::cmp::min(n + m, xs.len()); - let fsts = &xs[start..end]; - let (_, snds) = fsts.split_first()?; - match fsts.iter().zip(snds).position(|(x, y)| !pred(x, y)) { - None => (), - Some(i) => return Some(start + i + 1), - } +impl ChunkBySlice for &[T] { + fn split(self, index: usize) -> (Self, Self) { + self.split_at(index) } - None } -struct ChunkByProducer<'data, 'p, T, P> { - pred: &'p P, - slice: &'data [T], +impl ChunkBySlice for &mut [T] { + fn split(self, index: usize) -> (Self, Self) { + self.split_at_mut(index) + } +} + +struct ChunkByProducer<'p, T, Slice, Pred> { + slice: Slice, + pred: &'p Pred, + tail: usize, + marker: PhantomData, } -impl<'data, 'p, T, P> UnindexedProducer for ChunkByProducer<'data, 'p, T, P> +// Note: this implementation is very similar to `SplitProducer`. +impl UnindexedProducer for ChunkByProducer<'_, T, Slice, Pred> where - T: Sync, - P: Fn(&T, &T) -> bool + Send + Sync, + Slice: ChunkBySlice, + Pred: Fn(&T, &T) -> bool + Send + Sync, { - type Item = &'data [T]; + type Item = Slice; fn split(self) -> (Self, Option) { - match find_index(self.slice, self.pred) { - Some(i) => { - let (ys, zs) = self.slice.split_at(i); - ( - Self { - pred: self.pred, - slice: ys, - }, - Some(Self { - pred: self.pred, - slice: zs, - }), - ) - } - None => (self, None), + if self.tail < 2 { + return (Self { tail: 0, ..self }, None); + } + + // Look forward for the separator, and failing that look backward. + let mid = self.tail / 2; + let index = match self.slice.find(self.pred, mid, self.tail) { + Some(i) => Some(mid + i), + None => self.slice.rfind(self.pred, mid + 1), + }; + + if let Some(index) = index { + let (left, right) = self.slice.split(index); + + let (left_tail, right_tail) = if index <= mid { + // If we scanned backwards to find the separator, everything in + // the right side is exhausted, with no separators left to find. + (index, 0) + } else { + (mid + 1, self.tail - index) + }; + + // Create the left split before the separator. + let left = Self { + slice: left, + tail: left_tail, + ..self + }; + + // Create the right split following the separator. + let right = Self { + slice: right, + tail: right_tail, + ..self + }; + + (left, Some(right)) + } else { + // The search is exhausted, no more separators... + (Self { tail: 0, ..self }, None) } } - fn fold_with(mut self, folder: F) -> F + fn fold_with(self, mut folder: F) -> F where F: Folder, { - // TODO (MSRV 1.77): - // folder.consume_iter(self.slice.chunk_by(self.pred)) + let Self { + slice, pred, tail, .. + } = self; + + let (slice, tail) = if tail == slice.as_ref().len() { + // No tail section, so just let `consume_iter` do it all. + (Some(slice), None) + } else if let Some(index) = slice.rfind(pred, tail) { + // We found the last separator to complete the tail, so + // end with that slice after `consume_iter` finds the rest. + let (left, right) = slice.split(index); + (Some(left), Some(right)) + } else { + // We know there are no separators at all, so it's all "tail". + (None, Some(slice)) + }; + + if let Some(mut slice) = slice { + // TODO (MSRV 1.77) use either: + // folder.consume_iter(slice.chunk_by(pred)) + // folder.consume_iter(slice.chunk_by_mut(pred)) + + folder = folder.consume_iter(std::iter::from_fn(move || { + let len = slice.as_ref().len(); + if len > 0 { + let i = slice.find(pred, 0, len).unwrap_or(len); + let (head, tail) = mem::take(&mut slice).split(i); + slice = tail; + Some(head) + } else { + None + } + })); + } - folder.consume_iter(std::iter::from_fn(move || { - if self.slice.is_empty() { - None - } else { - let i = find_first_index(self.slice, self.pred).unwrap_or(self.slice.len()); - let (head, tail) = self.slice.split_at(i); - self.slice = tail; - Some(head) - } - })) + if let Some(tail) = tail { + folder = folder.consume(tail); + } + + folder } } @@ -127,67 +185,16 @@ where { bridge_unindexed( ChunkByProducer { - pred: &self.pred, + tail: self.slice.len(), slice: self.slice, + pred: &self.pred, + marker: PhantomData, }, consumer, ) } } -// Mutable - -struct ChunkByMutProducer<'data, 'p, T, P> { - pred: &'p P, - slice: &'data mut [T], -} - -impl<'data, 'p, T, P> UnindexedProducer for ChunkByMutProducer<'data, 'p, T, P> -where - T: Send, - P: Fn(&T, &T) -> bool + Send + Sync, -{ - type Item = &'data mut [T]; - - fn split(self) -> (Self, Option) { - match find_index(self.slice, self.pred) { - Some(i) => { - let (ys, zs) = self.slice.split_at_mut(i); - ( - Self { - pred: self.pred, - slice: ys, - }, - Some(Self { - pred: self.pred, - slice: zs, - }), - ) - } - None => (self, None), - } - } - - fn fold_with(mut self, folder: F) -> F - where - F: Folder, - { - // TODO (MSRV 1.77): - // folder.consume_iter(self.slice.chunk_by_mut(self.pred)) - - folder.consume_iter(std::iter::from_fn(move || { - if self.slice.is_empty() { - None - } else { - let i = find_first_index(self.slice, self.pred).unwrap_or(self.slice.len()); - let (head, tail) = std::mem::take(&mut self.slice).split_at_mut(i); - self.slice = tail; - Some(head) - } - })) - } -} - /// Parallel iterator over slice in (non-overlapping) mutable chunks /// separated by a predicate. /// @@ -225,9 +232,11 @@ where C: UnindexedConsumer, { bridge_unindexed( - ChunkByMutProducer { - pred: &self.pred, + ChunkByProducer { + tail: self.slice.len(), slice: self.slice, + pred: &self.pred, + marker: PhantomData, }, consumer, ) diff --git a/src/slice/test.rs b/src/slice/test.rs index f74ca0f74..2538a86b9 100644 --- a/src/slice/test.rs +++ b/src/slice/test.rs @@ -5,6 +5,7 @@ use rand::distributions::Uniform; use rand::seq::SliceRandom; use rand::{thread_rng, Rng}; use std::cmp::Ordering::{Equal, Greater, Less}; +use std::sync::atomic::{AtomicUsize, Ordering::Relaxed}; macro_rules! sort { ($f:ident, $name:ident) => { @@ -168,3 +169,48 @@ fn test_par_rchunks_exact_mut_remainder() { assert_eq!(c.take_remainder(), &[]); assert_eq!(c.len(), 2); } + +#[test] +fn slice_chunk_by() { + let v: Vec<_> = (0..1000).collect(); + assert_eq!(v[..0].par_chunk_by(|_, _| todo!()).count(), 0); + assert_eq!(v[..1].par_chunk_by(|_, _| todo!()).count(), 1); + assert_eq!(v[..2].par_chunk_by(|_, _| true).count(), 1); + assert_eq!(v[..2].par_chunk_by(|_, _| false).count(), 2); + + let count = AtomicUsize::new(0); + let par: Vec<_> = v + .par_chunk_by(|x, y| { + count.fetch_add(1, Relaxed); + (x % 10 < 3) == (y % 10 < 3) + }) + .collect(); + assert_eq!(count.into_inner(), v.len() - 1); + + let seq: Vec<_> = v.chunk_by(|x, y| (x % 10 < 3) == (y % 10 < 3)).collect(); + assert_eq!(par, seq); +} + +#[test] +fn slice_chunk_by_mut() { + let mut v: Vec<_> = (0..1000).collect(); + assert_eq!(v[..0].par_chunk_by_mut(|_, _| todo!()).count(), 0); + assert_eq!(v[..1].par_chunk_by_mut(|_, _| todo!()).count(), 1); + assert_eq!(v[..2].par_chunk_by_mut(|_, _| true).count(), 1); + assert_eq!(v[..2].par_chunk_by_mut(|_, _| false).count(), 2); + + let mut v2 = v.clone(); + let count = AtomicUsize::new(0); + let par: Vec<_> = v + .par_chunk_by_mut(|x, y| { + count.fetch_add(1, Relaxed); + (x % 10 < 3) == (y % 10 < 3) + }) + .collect(); + assert_eq!(count.into_inner(), v2.len() - 1); + + let seq: Vec<_> = v2 + .chunk_by_mut(|x, y| (x % 10 < 3) == (y % 10 < 3)) + .collect(); + assert_eq!(par, seq); +}