diff --git a/CHANGELOG.md b/CHANGELOG.md index c80cb36a63..cc58650516 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,13 @@ A [separate changelog is kept for rand_core](rand_core/CHANGELOG.md). You may also find the [Upgrade Guide](https://rust-random.github.io/book/update.html) useful. +## [Unreleased] +### Changes +- Rename fns `IndexedRandom::choose_multiple` -> `sample`, `choose_multiple_array` -> `sample_array`, `choose_multiple_weighted` -> `sample_weighted`, struct `SliceChooseIter` -> `IndexedSamples` and fns `IteratorRandom::choose_multiple` -> `sample`, `choose_multiple_fill` -> `sample_fill` (#1632) + +### Additions +- Add fns `IndexedRandom::choose_iter`, `choose_weighted_iter` (#1632) + ## [0.9.1] - 2025-04-17 ### Security and unsafe - Revise "not a crypto library" policy again (#1565) diff --git a/benches/benches/seq_choose.rs b/benches/benches/seq_choose.rs index 56223dd0a6..a3e5af1531 100644 --- a/benches/benches/seq_choose.rs +++ b/benches/benches/seq_choose.rs @@ -30,7 +30,7 @@ pub fn bench(c: &mut Criterion) { let lens = [(1, 1000), (950, 1000), (10, 100), (90, 100)]; for (amount, len) in lens { - let name = format!("seq_slice_choose_multiple_{}_of_{}", amount, len); + let name = format!("seq_slice_sample_{}_of_{}", amount, len); c.bench_function(name.as_str(), |b| { let mut rng = Pcg32::from_rng(&mut rand::rng()); let mut buf = [0i32; 1000]; @@ -44,7 +44,7 @@ pub fn bench(c: &mut Criterion) { b.iter(|| { // Collect full result to prevent unwanted shortcuts getting // first element (in case sample_indices returns an iterator). - for (slot, sample) in y.iter_mut().zip(x.choose_multiple(&mut rng, amount)) { + for (slot, sample) in y.iter_mut().zip(x.sample(&mut rng, amount)) { *slot = *sample; } y[amount - 1] @@ -54,7 +54,7 @@ pub fn bench(c: &mut Criterion) { let lens = [(1, 1000), (950, 1000), (10, 100), (90, 100)]; for (amount, len) in lens { - let name = format!("seq_slice_choose_multiple_weighted_{}_of_{}", amount, len); + let name = format!("seq_slice_sample_weighted_{}_of_{}", amount, len); c.bench_function(name.as_str(), |b| { let mut rng = Pcg32::from_rng(&mut rand::rng()); let mut buf = [0i32; 1000]; @@ -68,7 +68,7 @@ pub fn bench(c: &mut Criterion) { b.iter(|| { // Collect full result to prevent unwanted shortcuts getting // first element (in case sample_indices returns an iterator). - let samples_iter = x.choose_multiple_weighted(&mut rng, amount, |_| 1.0).unwrap(); + let samples_iter = x.sample_weighted(&mut rng, amount, |_| 1.0).unwrap(); for (slot, sample) in y.iter_mut().zip(samples_iter) { *slot = *sample; } @@ -77,21 +77,21 @@ pub fn bench(c: &mut Criterion) { }); } - c.bench_function("seq_iter_choose_multiple_10_of_100", |b| { + c.bench_function("seq_iter_sample_10_of_100", |b| { let mut rng = Pcg32::from_rng(&mut rand::rng()); let mut buf = [0i32; 100]; rng.fill(&mut buf); let x = black_box(&buf); - b.iter(|| x.iter().cloned().choose_multiple(&mut rng, 10)) + b.iter(|| x.iter().cloned().sample(&mut rng, 10)) }); - c.bench_function("seq_iter_choose_multiple_fill_10_of_100", |b| { + c.bench_function("seq_iter_sample_fill_10_of_100", |b| { let mut rng = Pcg32::from_rng(&mut rand::rng()); let mut buf = [0i32; 100]; rng.fill(&mut buf); let x = black_box(&buf); let mut buf = [0; 10]; - b.iter(|| x.iter().cloned().choose_multiple_fill(&mut rng, &mut buf)) + b.iter(|| x.iter().cloned().sample_fill(&mut rng, &mut buf)) }); bench_rng::(c, "ChaCha20"); diff --git a/src/seq/iterator.rs b/src/seq/iterator.rs index a9a9e56155..6fb4457eff 100644 --- a/src/seq/iterator.rs +++ b/src/seq/iterator.rs @@ -195,8 +195,8 @@ pub trait IteratorRandom: Iterator + Sized { /// case this equals the number of elements available. /// /// Complexity is `O(n)` where `n` is the length of the iterator. - /// For slices, prefer [`IndexedRandom::choose_multiple`]. - fn choose_multiple_fill(mut self, rng: &mut R, buf: &mut [Self::Item]) -> usize + /// For slices, prefer [`IndexedRandom::sample`]. + fn sample_fill(mut self, rng: &mut R, buf: &mut [Self::Item]) -> usize where R: Rng + ?Sized, { @@ -224,7 +224,7 @@ pub trait IteratorRandom: Iterator + Sized { /// Uniformly sample `amount` distinct elements into a [`Vec`] /// - /// This is equivalent to `choose_multiple_fill` except for the result type. + /// This is equivalent to `sample_fill` except for the result type. /// /// Although the elements are selected randomly, the order of elements in /// the buffer is neither stable nor fully random. If random ordering is @@ -235,9 +235,9 @@ pub trait IteratorRandom: Iterator + Sized { /// elements available. /// /// Complexity is `O(n)` where `n` is the length of the iterator. - /// For slices, prefer [`IndexedRandom::choose_multiple`]. + /// For slices, prefer [`IndexedRandom::sample`]. #[cfg(feature = "alloc")] - fn choose_multiple(mut self, rng: &mut R, amount: usize) -> Vec + fn sample(mut self, rng: &mut R, amount: usize) -> Vec where R: Rng + ?Sized, { @@ -262,6 +262,25 @@ pub trait IteratorRandom: Iterator + Sized { } reservoir } + + /// Deprecated: use [`Self::sample_fill`] instead + #[deprecated(since = "0.9.2", note = "Renamed to `sample_fill`")] + fn choose_multiple_fill(self, rng: &mut R, buf: &mut [Self::Item]) -> usize + where + R: Rng + ?Sized, + { + self.sample_fill(rng, buf) + } + + /// Deprecated: use [`Self::sample`] instead + #[cfg(feature = "alloc")] + #[deprecated(since = "0.9.2", note = "Renamed to `sample`")] + fn choose_multiple(self, rng: &mut R, amount: usize) -> Vec + where + R: Rng + ?Sized, + { + self.sample(rng, amount) + } } impl IteratorRandom for I where I: Iterator + Sized {} @@ -538,8 +557,8 @@ mod test { let mut r = crate::test::rng(401); let vals = (min_val..max_val).collect::>(); - let small_sample = vals.iter().choose_multiple(&mut r, 5); - let large_sample = vals.iter().choose_multiple(&mut r, vals.len() + 5); + let small_sample = vals.iter().sample(&mut r, 5); + let large_sample = vals.iter().sample(&mut r, vals.len() + 5); assert_eq!(small_sample.len(), 5); assert_eq!(large_sample.len(), vals.len()); @@ -644,20 +663,17 @@ mod test { } #[test] - fn value_stability_choose_multiple() { + fn value_stability_sample() { fn do_test>(iter: I, v: &[u32]) { let mut rng = crate::test::rng(412); let mut buf = [0u32; 8]; - assert_eq!( - iter.clone().choose_multiple_fill(&mut rng, &mut buf), - v.len() - ); + assert_eq!(iter.clone().sample_fill(&mut rng, &mut buf), v.len()); assert_eq!(&buf[0..v.len()], v); #[cfg(feature = "alloc")] { let mut rng = crate::test::rng(412); - assert_eq!(iter.choose_multiple(&mut rng, v.len()), v); + assert_eq!(iter.sample(&mut rng, v.len()), v); } } diff --git a/src/seq/mod.rs b/src/seq/mod.rs index 91d634d865..b82d884344 100644 --- a/src/seq/mod.rs +++ b/src/seq/mod.rs @@ -40,6 +40,9 @@ mod index_; pub use crate::distr::weighted::Error as WeightError; pub use iterator::IteratorRandom; #[cfg(feature = "alloc")] +pub use slice::IndexedSamples; +#[allow(deprecated)] +#[cfg(feature = "alloc")] pub use slice::SliceChooseIter; pub use slice::{IndexedMutRandom, IndexedRandom, SliceRandom}; diff --git a/src/seq/slice.rs b/src/seq/slice.rs index f909418bc4..0b355a4674 100644 --- a/src/seq/slice.rs +++ b/src/seq/slice.rs @@ -60,6 +60,29 @@ pub trait IndexedRandom: Index { } } + /// Return an iterator which samples from `self` with replacement + /// + /// Returns `None` if and only if `self.is_empty()`. + /// + /// # Example + /// + /// ``` + /// use rand::seq::IndexedRandom; + /// + /// let choices = [1, 2, 4, 8, 16, 32]; + /// let mut rng = rand::rng(); + /// for choice in choices.choose_iter(&mut rng).unwrap().take(3) { + /// println!("{:?}", choice); + /// } + /// ``` + fn choose_iter(&self, rng: &mut R) -> Option> + where + R: Rng + ?Sized, + { + let distr = crate::distr::Uniform::new(0, self.len()).ok()?; + Some(rng.sample_iter(distr).map(|i| &self[i])) + } + /// Uniformly sample `amount` distinct elements from self /// /// Chooses `amount` elements from the slice at random, without repetition, @@ -78,22 +101,22 @@ pub trait IndexedRandom: Index { /// let sample = "Hello, audience!".as_bytes(); /// /// // collect the results into a vector: - /// let v: Vec = sample.choose_multiple(&mut rng, 3).cloned().collect(); + /// let v: Vec = sample.sample(&mut rng, 3).cloned().collect(); /// /// // store in a buffer: /// let mut buf = [0u8; 5]; - /// for (b, slot) in sample.choose_multiple(&mut rng, buf.len()).zip(buf.iter_mut()) { + /// for (b, slot) in sample.sample(&mut rng, buf.len()).zip(buf.iter_mut()) { /// *slot = *b; /// } /// ``` #[cfg(feature = "alloc")] - fn choose_multiple(&self, rng: &mut R, amount: usize) -> SliceChooseIter + fn sample(&self, rng: &mut R, amount: usize) -> IndexedSamples where Self::Output: Sized, R: Rng + ?Sized, { let amount = core::cmp::min(amount, self.len()); - SliceChooseIter { + IndexedSamples { slice: self, _phantom: Default::default(), indices: index::sample(rng, self.len(), amount).into_iter(), @@ -114,9 +137,9 @@ pub trait IndexedRandom: Index { /// let mut rng = &mut rand::rng(); /// let sample = "Hello, audience!".as_bytes(); /// - /// let a: [u8; 3] = sample.choose_multiple_array(&mut rng).unwrap(); + /// let a: [u8; 3] = sample.sample_array(&mut rng).unwrap(); /// ``` - fn choose_multiple_array(&self, rng: &mut R) -> Option<[Self::Output; N]> + fn sample_array(&self, rng: &mut R) -> Option<[Self::Output; N]> where Self::Output: Clone + Sized, R: Rng + ?Sized, @@ -128,7 +151,7 @@ pub trait IndexedRandom: Index { /// Biased sampling for one element /// /// Returns a reference to one element of the slice, sampled according - /// to the provided weights. Returns `None` only if the slice is empty. + /// to the provided weights. Returns `None` if and only if `self.is_empty()`. /// /// The specified function `weight` maps each item `x` to a relative /// likelihood `weight(x)`. The probability of each item being selected is @@ -166,14 +189,38 @@ pub trait IndexedRandom: Index { B: SampleBorrow, X: SampleUniform + Weight + PartialOrd, { - use crate::distr::{weighted::WeightedIndex, Distribution}; + use crate::distr::weighted::WeightedIndex; let distr = WeightedIndex::new((0..self.len()).map(|idx| weight(&self[idx])))?; - Ok(&self[distr.sample(rng)]) + Ok(&self[rng.sample(distr)]) + } + + /// Biased sampling with replacement + /// + /// Returns an iterator which samples elements from `self` according to the + /// given weights with replacement (i.e. elements may be repeated). + /// Returns `None` if and only if `self.is_empty()`. + /// + /// See also doc for [`Self::choose_weighted`]. + #[cfg(feature = "alloc")] + fn choose_weighted_iter( + &self, + rng: &mut R, + weight: F, + ) -> Result, WeightError> + where + R: Rng + ?Sized, + F: Fn(&Self::Output) -> B, + B: SampleBorrow, + X: SampleUniform + Weight + PartialOrd, + { + use crate::distr::weighted::WeightedIndex; + let distr = WeightedIndex::new((0..self.len()).map(|idx| weight(&self[idx])))?; + Ok(rng.sample_iter(distr).map(|i| &self[i])) } /// Biased sampling of `amount` distinct elements /// - /// Similar to [`choose_multiple`], but where the likelihood of each + /// Similar to [`sample`], but where the likelihood of each /// element's inclusion in the output may be specified. Zero-weighted /// elements are never returned; the result may therefore contain fewer /// elements than `amount` even when `self.len() >= amount`. The elements @@ -198,18 +245,18 @@ pub trait IndexedRandom: Index { /// // (50% * 50%) + (25% * 67%) = 41.7% chance that the output is `['a', 'b']` in some order. /// // (50% * 50%) + (25% * 67%) = 41.7% chance that the output is `['a', 'c']` in some order. /// // (25% * 33%) + (25% * 33%) = 16.6% chance that the output is `['b', 'c']` in some order. - /// println!("{:?}", choices.choose_multiple_weighted(&mut rng, 2, |item| item.1).unwrap().collect::>()); + /// println!("{:?}", choices.sample_weighted(&mut rng, 2, |item| item.1).unwrap().collect::>()); /// ``` - /// [`choose_multiple`]: IndexedRandom::choose_multiple + /// [`sample`]: IndexedRandom::sample // Note: this is feature-gated on std due to usage of f64::powf. // If necessary, we may use alloc+libm as an alternative (see PR #1089). #[cfg(feature = "std")] - fn choose_multiple_weighted( + fn sample_weighted( &self, rng: &mut R, amount: usize, weight: F, - ) -> Result, WeightError> + ) -> Result, WeightError> where Self::Output: Sized, R: Rng + ?Sized, @@ -217,7 +264,7 @@ pub trait IndexedRandom: Index { X: Into, { let amount = core::cmp::min(amount, self.len()); - Ok(SliceChooseIter { + Ok(IndexedSamples { slice: self, _phantom: Default::default(), indices: index::sample_weighted( @@ -229,6 +276,45 @@ pub trait IndexedRandom: Index { .into_iter(), }) } + + /// Deprecated: use [`Self::sample`] instead + #[cfg(feature = "alloc")] + #[deprecated(since = "0.9.2", note = "Renamed to `sample`")] + fn choose_multiple(&self, rng: &mut R, amount: usize) -> IndexedSamples + where + Self::Output: Sized, + R: Rng + ?Sized, + { + self.sample(rng, amount) + } + + /// Deprecated: use [`Self::sample_array`] instead + #[deprecated(since = "0.9.2", note = "Renamed to `sample_array`")] + fn choose_multiple_array(&self, rng: &mut R) -> Option<[Self::Output; N]> + where + Self::Output: Clone + Sized, + R: Rng + ?Sized, + { + self.sample_array(rng) + } + + /// Deprecated: use [`Self::sample_weighted`] instead + #[cfg(feature = "std")] + #[deprecated(since = "0.9.2", note = "Renamed to `sample_weighted`")] + fn choose_multiple_weighted( + &self, + rng: &mut R, + amount: usize, + weight: F, + ) -> Result, WeightError> + where + Self::Output: Sized, + R: Rng + ?Sized, + F: Fn(&Self::Output) -> X, + X: Into, + { + self.sample_weighted(rng, amount, weight) + } } /// Extension trait on indexable lists, providing random sampling methods. @@ -412,17 +498,17 @@ impl SliceRandom for [T] { /// An iterator over multiple slice elements. /// /// This struct is created by -/// [`IndexedRandom::choose_multiple`](trait.IndexedRandom.html#tymethod.choose_multiple). +/// [`IndexedRandom::sample`](trait.IndexedRandom.html#tymethod.sample). #[cfg(feature = "alloc")] #[derive(Debug)] -pub struct SliceChooseIter<'a, S: ?Sized + 'a, T: 'a> { +pub struct IndexedSamples<'a, S: ?Sized + 'a, T: 'a> { slice: &'a S, _phantom: core::marker::PhantomData, indices: index::IndexVecIntoIter, } #[cfg(feature = "alloc")] -impl<'a, S: Index + ?Sized + 'a, T: 'a> Iterator for SliceChooseIter<'a, S, T> { +impl<'a, S: Index + ?Sized + 'a, T: 'a> Iterator for IndexedSamples<'a, S, T> { type Item = &'a T; fn next(&mut self) -> Option { @@ -437,13 +523,18 @@ impl<'a, S: Index + ?Sized + 'a, T: 'a> Iterator for SliceCho #[cfg(feature = "alloc")] impl<'a, S: Index + ?Sized + 'a, T: 'a> ExactSizeIterator - for SliceChooseIter<'a, S, T> + for IndexedSamples<'a, S, T> { fn len(&self) -> usize { self.indices.len() } } +/// Deprecated: renamed to [`IndexedSamples`] +#[cfg(feature = "alloc")] +#[deprecated(since = "0.9.2", note = "Renamed to `IndexedSamples`")] +pub type SliceChooseIter<'a, S, T> = IndexedSamples<'a, S, T>; + #[cfg(test)] mod test { use super::*; @@ -492,16 +583,13 @@ mod test { assert_eq!(nums.choose_mut(&mut r), Some(&mut 3)); assert_eq!( - &chars.choose_multiple_array(&mut r), + &chars.sample_array(&mut r), &Some(['f', 'i', 'd', 'b', 'c', 'm', 'j', 'k']) ); #[cfg(feature = "alloc")] assert_eq!( - &chars - .choose_multiple(&mut r, 8) - .cloned() - .collect::>(), + &chars.sample(&mut r, 8).cloned().collect::>(), &['h', 'm', 'd', 'b', 'c', 'e', 'n', 'f'] ); @@ -668,7 +756,7 @@ mod test { let choices = [('a', 2), ('b', 1), ('c', 0)]; for _ in 0..100 { let result = choices - .choose_multiple_weighted(&mut rng, 2, |item| item.1) + .sample_weighted(&mut rng, 2, |item| item.1) .unwrap() .collect::>(); @@ -678,29 +766,29 @@ mod test { // Case 2: All of the weights are 0 let choices = [('a', 0), ('b', 0), ('c', 0)]; - let r = choices.choose_multiple_weighted(&mut rng, 2, |item| item.1); + let r = choices.sample_weighted(&mut rng, 2, |item| item.1); assert_eq!(r.unwrap().len(), 0); // Case 3: Negative weights let choices = [('a', -1), ('b', 1), ('c', 1)]; - let r = choices.choose_multiple_weighted(&mut rng, 2, |item| item.1); + let r = choices.sample_weighted(&mut rng, 2, |item| item.1); assert_eq!(r.unwrap_err(), WeightError::InvalidWeight); // Case 4: Empty list let choices = []; - let r = choices.choose_multiple_weighted(&mut rng, 0, |_: &()| 0); + let r = choices.sample_weighted(&mut rng, 0, |_: &()| 0); assert_eq!(r.unwrap().count(), 0); // Case 5: NaN weights let choices = [('a', f64::NAN), ('b', 1.0), ('c', 1.0)]; - let r = choices.choose_multiple_weighted(&mut rng, 2, |item| item.1); + let r = choices.sample_weighted(&mut rng, 2, |item| item.1); assert_eq!(r.unwrap_err(), WeightError::InvalidWeight); // Case 6: +infinity weights let choices = [('a', f64::INFINITY), ('b', 1.0), ('c', 1.0)]; for _ in 0..100 { let result = choices - .choose_multiple_weighted(&mut rng, 2, |item| item.1) + .sample_weighted(&mut rng, 2, |item| item.1) .unwrap() .collect::>(); assert_eq!(result.len(), 2); @@ -709,12 +797,12 @@ mod test { // Case 7: -infinity weights let choices = [('a', f64::NEG_INFINITY), ('b', 1.0), ('c', 1.0)]; - let r = choices.choose_multiple_weighted(&mut rng, 2, |item| item.1); + let r = choices.sample_weighted(&mut rng, 2, |item| item.1); assert_eq!(r.unwrap_err(), WeightError::InvalidWeight); // Case 8: -0 weights let choices = [('a', -0.0), ('b', 1.0), ('c', 1.0)]; - let r = choices.choose_multiple_weighted(&mut rng, 2, |item| item.1); + let r = choices.sample_weighted(&mut rng, 2, |item| item.1); assert!(r.is_ok()); } @@ -737,7 +825,7 @@ mod test { let expected_results = [5833, 2667, 1500]; for _ in 0..10000 { let result = choices - .choose_multiple_weighted(&mut rng, 2, |item| item.1) + .sample_weighted(&mut rng, 2, |item| item.1) .unwrap() .collect::>();