diff --git a/communication/src/allocator/generic.rs b/communication/src/allocator/generic.rs index 4cab96846..e626b2c85 100644 --- a/communication/src/allocator/generic.rs +++ b/communication/src/allocator/generic.rs @@ -113,15 +113,6 @@ impl Allocate for Generic { fn receive(&mut self) { self.receive(); } fn release(&mut self) { self.release(); } fn events(&self) -> &Rc>> { self.events() } - fn await_events(&self, _duration: Option) { - match self { - Generic::Thread(t) => t.await_events(_duration), - Generic::Process(p) => p.await_events(_duration), - Generic::ProcessBinary(pb) => pb.await_events(_duration), - Generic::ZeroCopy(z) => z.await_events(_duration), - Generic::ZeroCopyBinary(z) => z.await_events(_duration), - } - } } diff --git a/communication/src/allocator/mod.rs b/communication/src/allocator/mod.rs index 536237631..4a5ef1a13 100644 --- a/communication/src/allocator/mod.rs +++ b/communication/src/allocator/mod.rs @@ -2,7 +2,6 @@ use std::rc::Rc; use std::cell::RefCell; -use std::time::Duration; pub use self::thread::Thread; pub use self::process::Process; @@ -57,14 +56,6 @@ pub trait Allocate { /// into a performance problem. fn events(&self) -> &Rc>>; - /// Awaits communication events. - /// - /// This method may park the current thread, for at most `duration`, - /// until new events arrive. - /// The method is not guaranteed to wait for any amount of time, but - /// good implementations should use this as a hint to park the thread. - fn await_events(&self, _duration: Option) { } - /// Ensure that received messages are surfaced in each channel. /// /// This method should be called to ensure that received messages are diff --git a/communication/src/allocator/process.rs b/communication/src/allocator/process.rs index cc7471f9e..526eb87da 100644 --- a/communication/src/allocator/process.rs +++ b/communication/src/allocator/process.rs @@ -4,7 +4,6 @@ use std::rc::Rc; use std::cell::RefCell; use std::sync::{Arc, Mutex}; use std::any::Any; -use std::time::Duration; use std::collections::{HashMap}; use std::sync::mpsc::{Sender, Receiver}; @@ -182,10 +181,6 @@ impl Allocate for Process { self.inner.events() } - fn await_events(&self, duration: Option) { - self.inner.await_events(duration); - } - fn receive(&mut self) { let mut events = self.inner.events().borrow_mut(); while let Ok(index) = self.counters_recv.try_recv() { diff --git a/communication/src/allocator/thread.rs b/communication/src/allocator/thread.rs index 9857ed5ba..3750dafab 100644 --- a/communication/src/allocator/thread.rs +++ b/communication/src/allocator/thread.rs @@ -2,7 +2,6 @@ use std::rc::Rc; use std::cell::RefCell; -use std::time::Duration; use std::collections::VecDeque; use crate::allocator::{Allocate, AllocateBuilder}; @@ -36,16 +35,6 @@ impl Allocate for Thread { fn events(&self) -> &Rc>> { &self.events } - fn await_events(&self, duration: Option) { - if self.events.borrow().is_empty() { - if let Some(duration) = duration { - std::thread::park_timeout(duration); - } - else { - std::thread::park(); - } - } - } } /// Thread-local counting channel push endpoint. diff --git a/communication/src/allocator/zero_copy/allocator.rs b/communication/src/allocator/zero_copy/allocator.rs index 35cac7f4a..94ba0757b 100644 --- a/communication/src/allocator/zero_copy/allocator.rs +++ b/communication/src/allocator/zero_copy/allocator.rs @@ -314,7 +314,4 @@ impl Allocate for TcpAllocator { fn events(&self) -> &Rc>> { self.inner.events() } - fn await_events(&self, duration: Option) { - self.inner.await_events(duration); - } } diff --git a/communication/src/allocator/zero_copy/allocator_process.rs b/communication/src/allocator/zero_copy/allocator_process.rs index 8aee89b23..58ea6514b 100644 --- a/communication/src/allocator/zero_copy/allocator_process.rs +++ b/communication/src/allocator/zero_copy/allocator_process.rs @@ -242,14 +242,4 @@ impl Allocate for ProcessAllocator { fn events(&self) -> &Rc>> { &self.events } - fn await_events(&self, duration: Option) { - if self.events.borrow().is_empty() { - if let Some(duration) = duration { - std::thread::park_timeout(duration); - } - else { - std::thread::park(); - } - } - } } diff --git a/timely/src/progress/subgraph.rs b/timely/src/progress/subgraph.rs index 868fd10d9..b30524f4e 100644 --- a/timely/src/progress/subgraph.rs +++ b/timely/src/progress/subgraph.rs @@ -300,10 +300,11 @@ where self.propagate_pointstamps(); { // Enqueue active children; scoped to let borrow drop. + use crate::scheduling::activate::Scheduler; let temp_active = &mut self.temp_active; self.activations .borrow_mut() - .for_extensions(&self.path[..], |index| temp_active.push(Reverse(index))); + .extensions(&self.path[..], temp_active); } // Schedule child operators. diff --git a/timely/src/scheduling/activate.rs b/timely/src/scheduling/activate.rs index 2642b520b..b6dcc35c2 100644 --- a/timely/src/scheduling/activate.rs +++ b/timely/src/scheduling/activate.rs @@ -8,30 +8,45 @@ use std::time::{Duration, Instant}; use std::cmp::Reverse; use crossbeam_channel::{Sender, Receiver}; -/// Methods required to act as a timely scheduler. +/// Methods required to act as a scheduler for timely operators. /// -/// The core methods are the activation of "paths", sequences of integers, and -/// the enumeration of active paths by prefix. A scheduler may delay the report -/// of a path indefinitely, but it should report at least one extension for the -/// empty path `&[]` or risk parking the worker thread without a certain unpark. +/// Operators are described by "paths" of integers, indicating the path along +/// a tree of regions, arriving at the the operator. Each path is either "idle" +/// or "active", where the latter indicates that someone has requested that the +/// operator be scheduled by the worker. Operators go from idle to active when +/// the `activate(path)` method is called, and from active to idle when the path +/// is returned through a call to `extensions(path, _)`. /// -/// There is no known harm to "spurious wake-ups" where a not-active path is -/// returned through `extensions()`. +/// The worker will continually probe for extensions to the root empty path `[]`, +/// and then follow all returned addresses, recursively. A scheduler need not +/// schedule all active paths, but it should return *some* active path when the +/// worker probes the empty path, or the worker may put the thread to sleep. +/// +/// There is no known harm to scheduling an idle path. +/// The worker may speculatively schedule paths of its own accord. pub trait Scheduler { /// Mark a path as immediately scheduleable. + /// + /// The scheduler is not required to immediately schedule the path, but it + /// should not signal that it has no work until the path has been scheduled. fn activate(&mut self, path: &[usize]); /// Populates `dest` with next identifiers on active extensions of `path`. /// /// This method is where a scheduler is allowed to exercise some discretion, /// in that it does not need to present *all* extensions, but it can instead - /// present only those that the runtime should schedule. - fn extensions(&mut self, path: &[usize], dest: &mut Vec); + /// present only those that the runtime should immediately schedule. + /// + /// The worker *will* schedule all extensions before probing new prefixes. + /// The scheduler is invited to rely on this, and to schedule in "batches", + /// where the next time the worker probes for extensions to the empty path + /// then all addresses in the batch have certainly been scheduled. + fn extensions(&mut self, path: &[usize], dest: &mut BinaryHeap>); } // Trait objects can be schedulers too. impl Scheduler for Box { fn activate(&mut self, path: &[usize]) { (**self).activate(path) } - fn extensions(&mut self, path: &[usize], dest: &mut Vec) { (**self).extensions(path, dest) } + fn extensions(&mut self, path: &[usize], dest: &mut BinaryHeap>) { (**self).extensions(path, dest) } } /// Allocation-free activation tracker. @@ -93,7 +108,7 @@ impl Activations { } /// Discards the current active set and presents the next active set. - pub fn advance(&mut self) { + fn advance(&mut self) { // Drain inter-thread activations. while let Ok(path) = self.rx.try_recv() { @@ -130,15 +145,15 @@ impl Activations { self.clean = self.bounds.len(); } - /// Maps a function across activated paths. - pub fn map_active(&self, logic: impl Fn(&[usize])) { - for (offset, length) in self.bounds.iter() { - logic(&self.slices[*offset .. (*offset + *length)]); - } - } - /// Sets as active any symbols that follow `path`. - pub fn for_extensions(&self, path: &[usize], mut action: impl FnMut(usize)) { + fn for_extensions(&mut self, path: &[usize], mut action: impl FnMut(usize)) { + + // Each call for the root path is a moment where the worker has reset. + // This relies on a worker implementation that follows the scheduling + // instructions perfectly; if any offered paths are not explored, oops. + if path.is_empty() { + self.advance(); + } let position = self.bounds[..self.clean] @@ -179,7 +194,7 @@ impl Activations { /// This method should be used before putting a worker thread to sleep, as it /// indicates the amount of time before the thread should be unparked for the /// next scheduled activation. - pub fn empty_for(&self) -> Option { + fn empty_for(&self) -> Option { if !self.bounds.is_empty() || self.timer.is_none() { Some(Duration::new(0,0)) } @@ -191,6 +206,35 @@ impl Activations { }) } } + + /// Indicates that there is nothing to do for `timeout`, and that the scheduler + /// can allow the thread to sleep until then. + /// + /// The method does not *need* to park the thread, and indeed it may elect to + /// unpark earlier if there are deferred activations. + pub fn park_timeout(&self, timeout: Option) { + let empty_for = self.empty_for(); + let timeout = match (timeout, empty_for) { + (Some(x), Some(y)) => Some(std::cmp::min(x,y)), + (x, y) => x.or(y), + }; + + if let Some(timeout) = timeout { + std::thread::park_timeout(timeout); + } + else { + std::thread::park(); + } + } +} + +impl Scheduler for Activations { + fn activate(&mut self, path: &[usize]) { + self.activate(path); + } + fn extensions(&mut self, path: &[usize], dest: &mut BinaryHeap>) { + self.for_extensions(path, |index| dest.push(Reverse(index))); + } } /// A thread-safe handle to an `Activations`. diff --git a/timely/src/worker.rs b/timely/src/worker.rs index 36f66d1a6..379681ead 100644 --- a/timely/src/worker.rs +++ b/timely/src/worker.rs @@ -2,10 +2,11 @@ use std::rc::Rc; use std::cell::{RefCell, RefMut}; +use std::cmp::Reverse; use std::any::Any; use std::str::FromStr; use std::time::{Instant, Duration}; -use std::collections::HashMap; +use std::collections::{HashMap, BinaryHeap}; use std::collections::hash_map::Entry; use std::sync::Arc; @@ -230,7 +231,7 @@ pub struct Worker { logging: Option>>, activations: Rc>, - active_dataflows: Vec, + active_dataflows: BinaryHeap>, // Temporary storage for channel identifiers during dataflow construction. // These are then associated with a dataflow once constructed. @@ -346,7 +347,7 @@ impl Worker { /// worker.step_or_park(Some(Duration::from_secs(1))); /// }); /// ``` - pub fn step_or_park(&mut self, duration: Option) -> bool { + pub fn step_or_park(&mut self, timeout: Option) -> bool { { // Process channel events. Activate responders. let mut allocator = self.allocator.borrow_mut(); @@ -370,43 +371,41 @@ impl Worker { } } - // Organize activations. - self.activations - .borrow_mut() - .advance(); - - // Consider parking only if we have no pending events, some dataflows, and a non-zero duration. - let empty_for = self.activations.borrow().empty_for(); - // Determine the minimum park duration, where `None` are an absence of a constraint. - let delay = match (duration, empty_for) { - (Some(x), Some(y)) => Some(std::cmp::min(x,y)), - (x, y) => x.or(y), - }; + // Commence a new round of scheduling, starting with dataflows. + // We probe the scheduler for active prefixes, where an empty response + // indicates that the scheduler has no work for us at the moment. + { // Scoped to let borrow of `self.active_dataflows` drop. + use crate::scheduling::activate::Scheduler; + let active_dataflows = &mut self.active_dataflows; + self.activations + .borrow_mut() + .extensions(&[], active_dataflows); + } - if delay != Some(Duration::new(0,0)) { + // If no dataflows are active, there is nothing to do. Consider parking. + if self.active_dataflows.is_empty() { - // Log parking and flush log. - if let Some(l) = self.logging().as_mut() { - l.log(crate::logging::ParkEvent::park(delay)); - l.flush(); - } + // If the timeout is zero, don't bother trying to park. + // More generally, we could put some threshold in here. + if timeout != Some(Duration::new(0, 0)) { + // Log parking and flush log. + if let Some(l) = self.logging().as_mut() { + l.log(crate::logging::ParkEvent::park(timeout)); + l.flush(); + } - self.allocator - .borrow() - .await_events(delay); + // We have just drained `allocator.events()` up above; + // otherwise we should first check it for emptiness. + self.activations.borrow().park_timeout(timeout); - // Log return from unpark. - self.logging().as_mut().map(|l| l.log(crate::logging::ParkEvent::unpark())); + // Log return from unpark. + self.logging().as_mut().map(|l| l.log(crate::logging::ParkEvent::unpark())); + } } - else { // Schedule active dataflows. - - let active_dataflows = &mut self.active_dataflows; - self.activations - .borrow_mut() - .for_extensions(&[], |index| active_dataflows.push(index)); + else { // Schedule all active dataflows. let mut dataflows = self.dataflows.borrow_mut(); - for index in active_dataflows.drain(..) { + for Reverse(index) in self.active_dataflows.drain() { // Step dataflow if it exists, remove if not incomplete. if let Entry::Occupied(mut entry) = dataflows.entry(index) { // TODO: This is a moment at which a scheduling decision is being made. @@ -745,7 +744,7 @@ impl Clone for Worker { dataflow_counter: Rc::clone(&self.dataflow_counter), logging: self.logging.clone(), activations: Rc::clone(&self.activations), - active_dataflows: Vec::new(), + active_dataflows: Default::default(), temp_channel_ids: Rc::clone(&self.temp_channel_ids), } }