diff --git a/Cargo.toml b/Cargo.toml
index c376fd7..1d2cdbf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,9 +1,9 @@
 [package]
 name = "orx-tree"
-version = "1.6.0"
+version = "1.7.0"
 edition = "2024"
 authors = ["orxfun <orx.ugur.arikan@gmail.com>"]
-description = "A beautiful tree 🌳 with convenient and efficient growth, mutation and traversal features with support for parallel computation."
+description = "A beautiful tree 🌳 with convenient, efficient, parallelizable growth, mutation and features."
 license = "MIT OR Apache-2.0"
 repository = "https://github.com/orxfun/orx-tree/"
 keywords = ["tree", "data-structures", "traversal", "traverse", "binarytree"]
@@ -16,7 +16,7 @@ orx-pinned-vec = "3.16.0"
 orx-self-or = "1.2.0"
 serde = { version = "1.0.219", optional = true, default-features = false }
 orx-split-vec = { version = "3.17.0", default-features = false }
-orx-selfref-col = { version = "2.9.0", default-features = false }
+orx-selfref-col = { version = "2.10.0", default-features = false }
 orx-concurrent-iter = { version = "2.1.0", default-features = false }
 orx-parallel = { version = "2.1.0", default-features = false, optional = true }
 
@@ -34,5 +34,5 @@ default = ["orx-parallel"]
 serde = ["dep:serde"]
 
 [[bench]]
-name = "parallelization_ref"
+name = "walk_iterator"
 harness = false
diff --git a/README.md b/README.md
index f193915..c9c189d 100644
--- a/README.md
+++ b/README.md
@@ -4,71 +4,113 @@
 [![orx-tree crate](https://img.shields.io/crates/d/orx-tree.svg)](https://crates.io/crates/orx-tree)
 [![orx-tree documentation](https://docs.rs/orx-tree/badge.svg)](https://docs.rs/orx-tree)
 
-A beautiful tree 🌳 with convenient and efficient growth, mutation and traversal features with support for parallel computation.
+A beautiful tree 🌳 with convenient, efficient, parallelizable growth, mutation and features.
 
-## Features
-
-### Generic Variants
+## Tree Variants
 
 [`Tree`](https://docs.rs/orx-tree/latest/orx_tree/struct.Tree.html) is generic over variants that define the way the children are stored:
 
 * [`DynTree<T>`](https://docs.rs/orx-tree/latest/orx_tree/type.DynTree.html), or equivalently **Tree&lt;Dyn&lt;T&gt;&gt;**, is a tree where each node may contain references to any number of children stored as a vector.
-* [`DaryTree<D, T>`](https://docs.rs/orx-tree/latest/orx_tree/type.DaryTree.html), or equivalently **Tree&lt;DaryTree&lt;D, T&gt;&gt;**, is a tree where each node may contain at most **D** child references stored inlined as an array.
+* [`DaryTree<D, T>`](https://docs.rs/orx-tree/latest/orx_tree/type.DaryTree.html), or equivalently **Tree&lt;Dary&lt;D, T&gt;&gt;**, is a tree where each node may contain at most **D** child references stored inlined as an array.
 * [`BinaryTree<T>`](https://docs.rs/orx-tree/latest/orx_tree/type.BinaryTree.html) is simply a shorthand for **DaryTree&lt;2, T&gt;**.
 
-### Recursive Nature of Trees
+## Recursive Nature of Trees
+
+Note that [`Tree`](https://docs.rs/orx-tree/latest/orx_tree/struct.Tree.html) has only few methods which mainly allow access to the root or to any node using node indices. Since every node is the root of a subtree, the core tree functionalities are provided as methods of [`NodeRef`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html) and [`NodeMut`](https://docs.rs/orx-tree/latest/orx_tree/struct.NodeMut.html), which are immutable and mutable nodes, respectively.
 
-Note that [`Tree`](https://docs.rs/orx-tree/latest/orx_tree/struct.Tree.html) has only few methods which mainly allow access to the root or to any node using node indices. Since every node represents a subtree with itself being the root of, the core tree functionalities are provided as methods of [`NodeRef`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html) and [`NodeMut`](https://docs.rs/orx-tree/latest/orx_tree/struct.NodeMut.html), which are immutable and mutable nodes, respectively.
+## Iterations
 
-### Traversals
+### Walks over the Tree
 
-We can iterate over all nodes of a subtree in various ways. In other words, we can *walk* the nodes of any subtree using a generic parameter which defines the order of traversal.
+We can visit all nodes of the tree in various ways. The way we *walk*, or the order of visited nodes, is determined by a generic traversal parameter.
 
-To illustrate, let `node` be any node of the tree. Then:
+To demonstrate, consider the following methods of a tree node:
 
-* [`node.walk::<Bfs>()`](https://docs.rs/orx-tree/latest/orx_tree/traversal/struct.Bfs.html) creates an iterator that visits all the nodes belonging to the subtree rooted at the *node* in the breadth-first order.
-* [`node.walk_mut::<Dfs>()`](https://docs.rs/orx-tree/latest/orx_tree/traversal/struct.Dfs.html) creates a mutable iterator, this time in depth-first (pre-)order.
-* [`node_into_walk::<PostOrder>()`](https://docs.rs/orx-tree/latest/orx_tree/traversal/struct.PostOrder.html), on the other hand, takes the subtree rooted at the *node* out of the tree and yields the elements in post-order.
+* [`walk::<Bfs>()`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.walk) creates an iterator that visits all the nodes belonging to the subtree rooted at the *node* in the breadth-first order.
+* [`walk_mut::<Dfs>()`](https://docs.rs/orx-tree/latest/orx_tree/struct.NodeMut.html#method.walk_mut) creates a mutable iterator, this time in depth-first order.
+* [`into_walk::<PostOrder>()`](https://docs.rs/orx-tree/latest/orx_tree/struct.NodeMut.html#method.into_walk), on the other hand, takes the subtree rooted at the *node* out of the tree and yields the elements in post-order.
 
-We can iterate over the data of the nodes, or over the nodes themselves with access to children, parent, siblings, etc. Further, just like *enumerate* appends the iteration order in a regular iterator, we can append tree-specific values to the iteration elements. Specifically, we can add the depth and/or the sibling position of each yield node. These more specialized traversals can be created conveniently using the [`Traversal`](https://docs.rs/orx-tree/latest/orx_tree/traversal/struct.Traversal.html) builder type.
+Walk iterators might yield node values or nodes with access to children, parent, siblings, etc. Further, node depth and/or its position among its siblings might be added. These more specialized traversals can be created conveniently using the [`Traversal`](https://docs.rs/orx-tree/latest/orx_tree/traversal/struct.Traversal.html) builder type.
 
+*You may see the [walks](https://github.com/orxfun/orx-tree/blob/main/examples/walks.rs) example that demonstrates different ways to walk the tree with traversal variants (`cargo run --example walks`).*
 
 ```rust
 use orx_tree::*;
 
+//      1
+//     ╱ ╲
+//    ╱   ╲
+//   2     3
+//  ╱     ╱ ╲
+// 4     5   6
 let mut tree = DynTree::new(1);
-let [id2, _] = tree.root_mut().push_children([2, 3]);
-tree.node_mut(&id2).push_child(4);
-
-// create a re-usable BFS traverser: over nodes, appending depth and sibling-idx
-let mut t = Traversal.bfs().over_nodes().with_depth().with_sibling_idx();
-
-let vals: Vec<_> = tree
-    .root()
-    .walk_with(&mut t)
-    .map(|(depth, sibling_idx, node)| (depth, sibling_idx, *node.data()))
-    .collect();
-assert_eq!(vals, [(0, 0, 1), (1, 0, 2), (1, 1, 3), (2, 0, 4)]);
+let [id2, id3] = tree.root_mut().push_children([2, 3]);
+let id4 = tree.node_mut(&id2).push_child(4);
+tree.node_mut(&id3).push_children([5, 6]);
+
+let root = tree.root();
+assert_eq!(root.walk::<Dfs>().copied().collect::<Vec<_>>(), [1, 2, 4, 3, 5, 6]);
+assert_eq!(root.walk::<Bfs>().copied().collect::<Vec<_>>(), [1, 2, 3, 4, 5, 6]);
+assert_eq!(root.walk::<PostOrder>().copied().collect::<Vec<_>>(), [4, 2, 5, 6, 3, 1]);
+
+// create a re-usable BFS traverser, with additional access to depth and sibling-idx
+let mut t = Traversal.bfs().with_depth().with_sibling_idx();
+assert_eq!(
+    root.walk_with(&mut t).collect::<Vec<_>>(),
+     [(0, 0, &1), (1, 0, &2), (1, 1, &3), (2, 0, &4), (2, 0, &5), (2, 1, &6)]
+);
 ```
 
+### Custom Walk
+
+In addition to common traversal strategies, we can create a **custom iterator** by simply calling [`custom_walk(next_node)`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.custom_walk) where the argument `next_node` is a function with signature `Fn(Node) -> Option<Node>` defining the traversal strategy.
+
 ### Special Iterators
 
-In addition to iterators over all nodes of a subtree, we can create specialized iterators as well:
+In addition to walks over subtrees, the following iterators are useful in special use cases.
+
+* [`leaves::<Bfs>()`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.leaves) returns leaf nodes in breadth-first order.
+* [`paths::<Dfs>()`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.paths) returns all the paths or sequences of nodes connecting the *node* to all of its leaves in the depth-first order.
+* [`ancestors()`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.ancestors) provides an upward iterator from the *node* to the root of the tree.
 
-* [`node.leaves::<Bfs>()`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.leaves) yields the leaf nodes in the subtree rooted at *node* in breadth-first order.
-* [`node.paths::<Dfs>()`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.paths) yields all the paths or sequences of nodes connecting the *node* to all of its leaves in the depth-first order.
-* [`node.ancestors()`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.ancestors) provides an upward iterator from the *node* to the root of the tree.
+*You may see the [special iterators](https://github.com/orxfun/orx-tree/blob/main/examples/special_iterators.rs) example.*
 
-Alternatively, we can walk the tree freely using methods to step the links in different ways, such as:
+### Manual Traversals
 
-* [`node.child(child_idx)`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.child), [`node.children()`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.children), [`node.children_mut()`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.children_mut), [`node.into_child_mut(child_idx)`](https://docs.rs/orx-tree/latest/orx_tree/struct.NodeMut.html#method.into_child_mut)
-* [`node.parent()`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.parent), [`node.into_parent_mut()`](https://docs.rs/orx-tree/latest/orx_tree/struct.NodeMut.html#method.into_parent_mut), etc.
+Alternatively, we can move on nodes of the tree freely:
+
+* **↓** [`child(child_idx)`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.child), [`children()`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.children), [`children_mut()`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.children_mut), [`into_child_mut(child_idx)`](https://docs.rs/orx-tree/latest/orx_tree/struct.NodeMut.html#method.into_child_mut)
+* **↑** [`parent()`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.parent), [`into_parent_mut()`](https://docs.rs/orx-tree/latest/orx_tree/struct.NodeMut.html#method.into_parent_mut), etc.
+
+*You may see [manual iteration](https://github.com/orxfun/orx-tree/blob/main/examples/manual_iteration.rs) and [mutable_recursive_traversal](https://github.com/orxfun/orx-tree/blob/main/examples/mutable_recursive_traversal.rs) examples*
 
 ### Arbitrary Order Iterators
 
-The tree naturally implements [`Collection`](https://docs.rs/orx-iterable/latest/orx_iterable/trait.Collection.html) and [`CollectionMut`](https://docs.rs/orx-iterable/latest/orx_iterable/trait.CollectionMut.html) providing iterators via `iter` and `iter_mut` methods. Since the tree is not a linear data structure, these iterators yield elements in an arbitrary (but deterministic) order, which is useful in certain situations such as updating the values of the tree using a transformation or applying reductions.
+The tree naturally implements `IntoIterator`, [`Collection`](https://docs.rs/orx-iterable/latest/orx_iterable/trait.Collection.html) and [`CollectionMut`](https://docs.rs/orx-iterable/latest/orx_iterable/trait.CollectionMut.html) providing iterators via `into_iter`, `iter` and `iter_mut` methods. These iterators yield elements in an arbitrary but deterministic order.
+
+## Parallelization
+
+`Tree` aims to enable convenient parallel computation for all iterators, traversals or walks mentioned above using the [orx-parallel](https://crates.io/crates/orx-parallel) feature (see [features](#features) section). Parallel counterparts return a [`ParIter`](https://docs.rs/orx-parallel/latest/orx_parallel/trait.ParIter.html) rather than a sequential `Iterator`.
+
+[`tree.par()`](https://docs.rs/orx-tree/latest/orx_tree/struct.Tree.html#method.par) and [`tree.into_par()`](https://docs.rs/orx-tree/latest/orx_tree/struct.Tree.html#method.into_par) return parallel iterators over all nodes of the tree. Examples can be found in [`demo_parallelization`](https://github.com/orxfun/orx-tree/blob/main/examples/demo_parallelization.rs) example. Importantly note that the tree defines its own concurrent iterators, and hence, allows for efficient computation, which is often not possible with generic implementations. In order to check the impact in performance, you may use the lightweight benchmark example [`bench_parallelization`](https://github.com/orxfun/orx-tree/blob/main/examples/bench_parallelization.rs):
 
-### Constant Time Access to Nodes via Node Indices
+* `Sequential computation over Tree : 18.96s`
+* `Parallelized over Tree using orx-parallel : 6.02s`
+* `Parallelized over Tree using rayon's par-bridge : 81.10s`
+
+Remaining walks and traversals can be parallelized by simply by adding **_par** suffix to names of their sequential counterparts:
+
+[`children_par`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.children_par) | 
+[`ancestors_par`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.ancestors_par) |
+[`custom_walk_par`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.custom_walk_par) |
+[`walk_par`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.walk_par) |
+[`walk_into_par`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.walk_into_par) |
+[`paths_par`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.paths_par) |
+[`paths_with_par`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.paths_with_par) |
+[`leaves_par`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.leaves_par) |
+[`leaves_with_par`](https://docs.rs/orx-tree/latest/orx_tree/trait.NodeRef.html#method.leaves_with_par) |
+
+## Constant Time Access to Nodes via Node Indices
 
 A [`NodeIdx`](https://docs.rs/orx-tree/latest/orx_tree/struct.NodeIdx.html) for a [`Tree`](https://docs.rs/orx-tree/latest/orx_tree/struct.Tree.html) is similar to `usize` for a slice in that it allows constant time access to the node it is created for.
 
@@ -89,16 +131,15 @@ Here, `idx` does not have a lifetime attached to the `tree`, yet it refers to th
 * `another_tree.node(&idx)` is an out-of-bounds error.
 * `tree.node(&idx)` after removing the node from the tree, say by `tree.node_mut(&idx).prune()` call, is a removed-node error.
 
-
-### Cache Locality
+## Cache Locality
 
 Nodes of the tree are stored in an underlying [`PinnedVec`](https://crates.io/crates/orx-pinned-vec) with pinned element guarantees. This allows for keeping the nodes close to each other improving cache locality while still providing with constant time mutation methods.
 
-### Convenient Mutations
+## Convenient Mutations
 
 The tree aims to make every move on the tree possible, convenient and efficient.
 
-#### Growth & Move Subtrees Around
+### Growth & Move Subtrees Around
 
 The following methods demonstrate downward growth by adding descendants to a node:
 
@@ -118,7 +159,7 @@ Note that all the growth methods return the indices of the created nodes allowin
 
 Additionally, the tree provides methods for special moves such as [`swap_subtrees`](https://docs.rs/orx-tree/latest/orx_tree/struct.Tree.html#method.swap_subtrees) to swap components of the same tree.
 
-#### Removals
+### Removals
 
 We can take out a node from the tree, while connecting its parent to its children via the [`take_out`](https://docs.rs/orx-tree/latest/orx_tree/struct.NodeMut.html#method.take_out) method.
 
@@ -130,17 +171,9 @@ Alternatively, we can turn a mutable node into an [`into_walk`](https://docs.rs/
 * We can iterate over the removed nodes in the order of the generic traversal parameter and use the data however we need.
 * Or we can attach the removed subtree at a desired position of another tree by passing it to methods such as [`push_child_tree(subtree)`](https://docs.rs/orx-tree/latest/orx_tree/struct.NodeMut.html#method.push_child_tree).
 
-## Features
+# Features
 
-* **orx-parallel**: Tree allows efficient parallel processing through [concurrent iterators](https://crates.io/crates/orx-concurrent-iter) and [parallel iterators](https://crates.io/crates/orx-parallel).
-  * This feature is added as default and requires **std**; hence, please use `cargo add orx-tree --no-default-features` for **no-std** use cases.
-  * Currently, parallel iteration over all nodes of the tree in arbitrary order is supported by methods [`par`](https://docs.rs/orx-tree/latest/orx_tree/struct.Tree.html#method.par) and [`into_par`](https://docs.rs/orx-tree/latest/orx_tree/struct.Tree.html#method.into_par).
-  * Parallelization of all walks or traversals in particular order are under development.
-  * Parallelization examples can be found in [`demo_parallelization`](https://github.com/orxfun/orx-tree/blob/main/examples/demo_parallelization.rs) example.
-  * Importantly note that the tree defines its own concurrent iterators, and hence, allows for efficient computation, which is often not possible with generic implementations such as rayon's `par_bridge`. In order to check the impact in performance, you may use the lightweight benchmark example [`bench_parallelization`](https://github.com/orxfun/orx-linked-list/blob/main/examples/bench_parallelization.rs):
-    * `Sequential computation over Tree : 18.96s`
-    * `Parallelized over Tree using orx-parallel : 6.02s`
-    * `Parallelized over Tree using rayon's par-bridge : 81.10s`
+* **orx-parallel**: Tree allows efficient parallel processing through [concurrent iterators](https://crates.io/crates/orx-concurrent-iter) and [parallel iterators](https://crates.io/crates/orx-parallel). See [parallelization section](#parallelization) for details. This feature is added as default and requires **std**. Therefore, please use `cargo add orx-tree --no-default-features` for **no-std** use cases.
 
 * **serde**: Tree implements `Serialize` and `Deserialize` traits; the "serde" feature needs to be added when required. It uses a linearized representation of the tree as a [`DepthFirstSequence`](https://docs.rs/orx-tree/latest/orx_tree/struct.DepthFirstSequence.html). You may find de-serialization examples in the corresponding [test file](https://github.com/orxfun/orx-tree/blob/main/tests/serde.rs).
 
@@ -198,7 +231,7 @@ assert_eq!(node4.num_children(), 1);
 assert_eq!(node4.get_child(0), Some(tree.node(&id8)));
 
 let ancestors: Vec<_> = node4.ancestors().map(|x| *x.data()).collect();
-assert_eq!(ancestors, [4, 2, 1]);
+assert_eq!(ancestors, [2, 1]);
 
 let new_tree: BinaryTree<_> = node4.clone_as_tree();
 assert_eq!(new_tree.root().data(), &4);
diff --git a/benches/children_iterator.rs b/benches/children_iterator.rs
new file mode 100644
index 0000000..e5be502
--- /dev/null
+++ b/benches/children_iterator.rs
@@ -0,0 +1,110 @@
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
+#[cfg(feature = "orx-parallel")]
+use orx_parallel::ParIter;
+use orx_tree::*;
+
+fn build_tree(n: usize) -> DynTree<String> {
+    let mut tree = DynTree::new(0.to_string());
+    let mut dfs = Traversal.dfs().over_nodes();
+    while tree.len() < n {
+        let root = tree.root();
+        let x: Vec<_> = root.leaves_with(&mut dfs).map(|x| x.idx()).collect();
+        for idx in x.iter() {
+            let count = tree.len();
+            let mut node = tree.node_mut(idx);
+            let num_children = 4;
+            for j in 0..num_children {
+                node.push_child((count + j).to_string());
+            }
+        }
+    }
+    tree
+}
+
+fn fibonacci(n: u64) -> u64 {
+    let mut a = 0;
+    let mut b = 1;
+    for _ in 0..n {
+        let c = a + b;
+        a = b;
+        b = c;
+    }
+    a
+}
+
+fn compute_subtree_value(subtree: &DynNode<String>) -> u64 {
+    subtree
+        .walk::<Dfs>()
+        .map(|x| x.parse::<u64>().unwrap())
+        .map(|x| fibonacci(x % 1000))
+        .sum()
+}
+
+fn children(tree: &DynTree<String>) -> u64 {
+    tree.root()
+        .children()
+        .map(|x| compute_subtree_value(&x))
+        .sum()
+}
+
+#[cfg(feature = "orx-parallel")]
+fn children_par(tree: &DynTree<String>) -> u64 {
+    tree.root()
+        .children_par()
+        .map(|x| compute_subtree_value(&x))
+        .sum()
+}
+
+#[cfg(feature = "orx-parallel")]
+fn children_par_2threads(tree: &DynTree<String>) -> u64 {
+    tree.root()
+        .children_par()
+        .num_threads(2)
+        .map(|x| compute_subtree_value(&x))
+        .sum()
+}
+
+fn bench(c: &mut Criterion) {
+    let treatments = vec![1_024 * 64];
+
+    let mut group = c.benchmark_group("children_iterator");
+
+    for n in &treatments {
+        let tree = build_tree(*n);
+
+        let expected = children(&tree);
+
+        group.bench_with_input(BenchmarkId::new("NodeRef::children()", n), n, |b, _| {
+            let result = children(&tree);
+            assert_eq!(result, expected);
+            b.iter(|| children(&tree))
+        });
+
+        #[cfg(feature = "orx-parallel")]
+        group.bench_with_input(
+            BenchmarkId::new("NNodeRef::children_par()", n),
+            n,
+            |b, _| {
+                let result = children_par(&tree);
+                assert_eq!(result, expected);
+                b.iter(|| children_par(&tree))
+            },
+        );
+
+        #[cfg(feature = "orx-parallel")]
+        group.bench_with_input(
+            BenchmarkId::new("NodeRef::children_par().num_threads(2)", n),
+            n,
+            |b, _| {
+                let result = children_par_2threads(&tree);
+                assert_eq!(result, expected);
+                b.iter(|| children_par_2threads(&tree))
+            },
+        );
+    }
+
+    group.finish();
+}
+
+criterion_group!(benches, bench);
+criterion_main!(benches);
diff --git a/benches/parallelization_owned.rs b/benches/parallelization_owned.rs
index 74aac32..217f75d 100644
--- a/benches/parallelization_owned.rs
+++ b/benches/parallelization_owned.rs
@@ -56,6 +56,7 @@ fn tree_into_bfs(mut tree: DynTree<String>) -> i64 {
         .sum()
 }
 
+#[cfg(feature = "orx-parallel")]
 fn tree_into_par_x(tree: DynTree<String>) -> i64 {
     tree.into_par()
         .map(|x| x.parse::<usize>().unwrap())
@@ -107,6 +108,7 @@ fn bench(c: &mut Criterion) {
             },
         );
 
+        #[cfg(feature = "orx-parallel")]
         group.bench_with_input(
             BenchmarkId::new("Tree::into_par_x() - orx-parallel", n),
             n,
diff --git a/benches/parallelization_ref.rs b/benches/parallelization_ref.rs
index 5a8cf0c..dba5c69 100644
--- a/benches/parallelization_ref.rs
+++ b/benches/parallelization_ref.rs
@@ -56,6 +56,7 @@ fn tree_bfs(tree: &DynTree<String>) -> i64 {
         .sum()
 }
 
+#[cfg(feature = "orx-parallel")]
 fn tree_par_x(tree: &DynTree<String>) -> i64 {
     tree.par()
         .map(|x| x.parse::<usize>().unwrap())
@@ -107,6 +108,7 @@ fn bench(c: &mut Criterion) {
             },
         );
 
+        #[cfg(feature = "orx-parallel")]
         group.bench_with_input(
             BenchmarkId::new("Tree::par_x() - orx-parallel", n),
             n,
diff --git a/benches/paths_iterator.rs b/benches/paths_iterator.rs
new file mode 100644
index 0000000..bc87772
--- /dev/null
+++ b/benches/paths_iterator.rs
@@ -0,0 +1,98 @@
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
+use orx_iterable::{IntoCloningIterable, Iterable};
+#[cfg(feature = "orx-parallel")]
+use orx_parallel::ParIter;
+use orx_tree::*;
+
+fn build_tree(n: usize) -> DynTree<String> {
+    let mut tree = DynTree::new(0.to_string());
+    let mut dfs = Traversal.dfs().over_nodes();
+    while tree.len() < n {
+        let root = tree.root();
+        let x: Vec<_> = root.leaves_with(&mut dfs).map(|x| x.idx()).collect();
+        for idx in x.iter() {
+            let count = tree.len();
+            let mut node = tree.node_mut(idx);
+            let num_children = 4;
+            for j in 0..num_children {
+                node.push_child((count + j).to_string());
+            }
+        }
+    }
+    tree
+}
+
+fn fibonacci(n: i64) -> i64 {
+    let mut a = 0;
+    let mut b = 1;
+    for _ in 0..n {
+        let c = a + b;
+        a = b;
+        b = c;
+    }
+    a
+}
+
+fn path_value<'a>(path: impl IntoIterator<Item = &'a String>) -> i64 {
+    path.into_iter()
+        .map(|x| x.parse::<i64>().unwrap())
+        .map(|x| x % 500)
+        .map(fibonacci)
+        .max()
+        .unwrap()
+}
+
+fn paths<T: Traverser>(tree: &DynTree<String>) -> Vec<String> {
+    let root = tree.root();
+    root.paths::<T>()
+        .map(|x| x.into_iterable())
+        .max_by_key(|x| path_value(x.iter()))
+        .map(|x| x.iter().cloned().collect())
+        .unwrap()
+}
+
+#[cfg(feature = "orx-parallel")]
+fn paths_par<T: Traverser>(tree: &DynTree<String>) -> Vec<String> {
+    let root = tree.root();
+    root.paths_par::<T>()
+        .map(|x| x.into_iterable())
+        .max_by_key(|x| path_value(x.iter()))
+        .map(|x| x.iter().cloned().collect())
+        .unwrap()
+}
+
+type TRAVERSER = Dfs;
+
+fn bench(c: &mut Criterion) {
+    let treatments = vec![1_024 * 64];
+
+    let mut group = c.benchmark_group("paths_iterator");
+
+    for n in &treatments {
+        let data = build_tree(*n);
+
+        let expected = paths::<TRAVERSER>(&data);
+
+        group.bench_with_input(BenchmarkId::new("NodeRef::paths::<T>()", n), n, |b, _| {
+            let result = paths::<TRAVERSER>(&data);
+            assert_eq!(path_value(&result), path_value(&expected));
+            b.iter(|| paths::<TRAVERSER>(&data))
+        });
+
+        #[cfg(feature = "orx-parallel")]
+        group.bench_with_input(
+            BenchmarkId::new("NodeRef::paths_par::<T>()", n),
+            n,
+            |b, _| {
+                let result = paths_par::<TRAVERSER>(&data);
+                assert_eq!(path_value(&result), path_value(&expected));
+                b.iter(|| paths_par::<TRAVERSER>(&data))
+            },
+        );
+    }
+
+    group.finish();
+}
+
+criterion_group!(benches, bench);
+criterion_main!(benches);
diff --git a/benches/walk_iterator.rs b/benches/walk_iterator.rs
new file mode 100644
index 0000000..f0227f9
--- /dev/null
+++ b/benches/walk_iterator.rs
@@ -0,0 +1,157 @@
+use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
+#[cfg(feature = "orx-parallel")]
+use orx_parallel::*;
+use orx_tree::*;
+use rayon::iter::{ParallelBridge, ParallelIterator};
+
+fn build_tree(n: usize) -> DynTree<String> {
+    let mut tree = DynTree::new(0.to_string());
+    let mut dfs = Traversal.dfs().over_nodes();
+    while tree.len() < n {
+        let root = tree.root();
+        let x: Vec<_> = root.leaves_with(&mut dfs).map(|x| x.idx()).collect();
+        for idx in x.iter() {
+            let count = tree.len();
+            let mut node = tree.node_mut(idx);
+            let num_children = 10;
+            for j in 0..num_children {
+                node.push_child((count + j).to_string());
+            }
+        }
+    }
+    tree
+}
+
+fn fibonacci(n: i64) -> i64 {
+    let mut a = 0;
+    let mut b = 1;
+    for _ in 0..n {
+        let c = a + b;
+        a = b;
+        b = c;
+    }
+    a
+}
+
+fn arbitrary_order_iter(tree: &DynTree<String>) -> i64 {
+    tree.iter()
+        .map(|x| x.parse::<usize>().unwrap())
+        .map(|x| fibonacci(x as i64 % 500))
+        .sum()
+}
+
+#[cfg(feature = "orx-parallel")]
+fn arbitrary_order_par_iter(tree: &DynTree<String>) -> i64 {
+    tree.par()
+        .map(|x| x.parse::<usize>().unwrap())
+        .map(|x| fibonacci(x as i64 % 500))
+        .sum()
+}
+
+fn arbitrary_order_par_iter_with_rayon(tree: &DynTree<String>) -> i64 {
+    tree.iter()
+        .par_bridge()
+        .map(|x| x.parse::<usize>().unwrap())
+        .map(|x| fibonacci(x as i64 % 500))
+        .sum()
+}
+
+fn walk<T: Traverser>(tree: &DynTree<String>) -> i64 {
+    tree.root()
+        .walk::<T>()
+        .map(|x| x.parse::<usize>().unwrap())
+        .map(|x| fibonacci(x as i64 % 500))
+        .sum()
+}
+
+#[cfg(feature = "orx-parallel")]
+fn walk_par<T: Traverser>(tree: &DynTree<String>) -> i64 {
+    tree.root()
+        .walk_par::<T>()
+        .map(|x| x.parse::<usize>().unwrap())
+        .map(|x| fibonacci(x as i64 % 500))
+        .sum()
+}
+
+fn bench(c: &mut Criterion) {
+    let treatments = vec![1_024 * 64];
+
+    let mut group = c.benchmark_group("walk_iterator");
+
+    for n in &treatments {
+        let data = build_tree(*n);
+
+        let expected = arbitrary_order_iter(&data);
+
+        group.bench_with_input(BenchmarkId::new("arbitrary_order_iter", n), n, |b, _| {
+            let result = arbitrary_order_iter(&data);
+            assert_eq!(result, expected);
+            b.iter(|| arbitrary_order_iter(&data))
+        });
+
+        #[cfg(feature = "orx-parallel")]
+        group.bench_with_input(
+            BenchmarkId::new("arbitrary_order_par_iter", n),
+            n,
+            |b, _| {
+                let result = arbitrary_order_par_iter(&data);
+                assert_eq!(result, expected);
+                b.iter(|| arbitrary_order_par_iter(&data))
+            },
+        );
+
+        group.bench_with_input(
+            BenchmarkId::new("arbitrary_order_par_iter_with_rayon", n),
+            n,
+            |b, _| {
+                let result = arbitrary_order_par_iter_with_rayon(&data);
+                assert_eq!(result, expected);
+                b.iter(|| arbitrary_order_par_iter_with_rayon(&data))
+            },
+        );
+
+        group.bench_with_input(BenchmarkId::new("walk::<Dfs>", n), n, |b, _| {
+            let result = walk::<Dfs>(&data);
+            assert_eq!(result, expected);
+            b.iter(|| walk::<Dfs>(&data))
+        });
+
+        #[cfg(feature = "orx-parallel")]
+        group.bench_with_input(BenchmarkId::new("walk_par::<Dfs>", n), n, |b, _| {
+            let result = walk_par::<Dfs>(&data);
+            assert_eq!(result, expected);
+            b.iter(|| walk_par::<Dfs>(&data))
+        });
+
+        group.bench_with_input(BenchmarkId::new("walk::<Bfs>", n), n, |b, _| {
+            let result = walk::<Bfs>(&data);
+            assert_eq!(result, expected);
+            b.iter(|| walk::<Bfs>(&data))
+        });
+
+        #[cfg(feature = "orx-parallel")]
+        group.bench_with_input(BenchmarkId::new("walk_par::<Bfs>", n), n, |b, _| {
+            let result = walk_par::<Bfs>(&data);
+            assert_eq!(result, expected);
+            b.iter(|| walk_par::<Bfs>(&data))
+        });
+
+        group.bench_with_input(BenchmarkId::new("walk::<PostOrder>", n), n, |b, _| {
+            let result = walk::<PostOrder>(&data);
+            assert_eq!(result, expected);
+            b.iter(|| walk::<PostOrder>(&data))
+        });
+
+        #[cfg(feature = "orx-parallel")]
+        group.bench_with_input(BenchmarkId::new("walk_par::<PostOrder>", n), n, |b, _| {
+            let result = walk_par::<PostOrder>(&data);
+            assert_eq!(result, expected);
+            b.iter(|| walk_par::<PostOrder>(&data))
+        });
+    }
+
+    group.finish();
+}
+
+criterion_group!(benches, bench);
+criterion_main!(benches);
diff --git a/examples/manual_iteration.rs b/examples/manual_iteration.rs
new file mode 100644
index 0000000..fa6f859
--- /dev/null
+++ b/examples/manual_iteration.rs
@@ -0,0 +1,93 @@
+use orx_tree::*;
+
+fn main() {
+    // build tree
+    let mut tree = DynTree::new(1);
+
+    // A. build the tree with mutable references
+
+    let mut root = tree.root_mut();
+    root.push_child(2);
+    let mut n2 = root.child_mut(0);
+    n2.push_children([4, 5]);
+    n2.child_mut(0).push_child(8);
+
+    root.push_child(3);
+    let mut n3 = root.child_mut(1);
+    n3.push_children([6, 7]);
+    n3.child_mut(0).push_child(9);
+    n3.child_mut(1).push_children([10, 11]);
+
+    println!("building the tree with manual iteration:\n{}", &tree);
+    // 1
+    // ├──2
+    // │  ├──4
+    // │  │  └──8
+    // │  └──5
+    // └──3
+    //    ├──6
+    //    │  └──9
+    //    └──7
+    //       ├──10
+    //       └──11
+
+    // B. custom iterator
+
+    fn next_node<'a, T>(node: DynNode<'a, T>) -> Option<DynNode<'a, T>> {
+        let sibling_idx = node.sibling_idx();
+        let is_last_sibling = sibling_idx == node.num_siblings() - 1;
+
+        match is_last_sibling {
+            true => node.get_child(0),
+            false => match node.parent() {
+                Some(parent) => {
+                    let child_idx = sibling_idx + 1;
+                    parent.get_child(child_idx)
+                }
+                None => None,
+            },
+        }
+    }
+
+    // manual loop over the custom next_node
+    let mut values = vec![];
+    let mut current = tree.root();
+    values.push(*current.data());
+    while let Some(node) = next_node(current) {
+        values.push(*node.data());
+        current = node;
+    }
+    println!(
+        "\na custom iterator:\n* start from a node (root here)\n* move to next sibling if any\n* move to first child otherwise\n=> {:?}\n",
+        &values
+    );
+    assert_eq!(values, [1, 2, 3, 6, 7, 10, 11]);
+
+    // or simply use `custom_walk`
+    let root = tree.root();
+    let values: Vec<_> = root.custom_walk(next_node).copied().collect();
+    assert_eq!(values, [1, 2, 3, 6, 7, 10, 11]);
+
+    // B. mutate the structure of the tree with manual mutable traversal
+    let root = tree.root_mut();
+    let n3 = root.into_child_mut(1).unwrap();
+    let mut n6 = n3.into_child_mut(0).unwrap();
+    n6.push_child(12);
+    let n3 = n6.into_parent_mut().unwrap();
+    let n7 = n3.into_child_mut(1).unwrap();
+    n7.prune();
+
+    print!(
+        "mutating structure of the tree with manual iteration:\n{}",
+        &tree
+    );
+    // 1
+    // ├──2
+    // │  ├──4
+    // │  │  └──8
+    // │  └──5
+    // └──3
+    //    └──6
+    //       ├──9
+    //       └──12
+}
diff --git a/examples/special_iterators.rs b/examples/special_iterators.rs
new file mode 100644
index 0000000..a141a01
--- /dev/null
+++ b/examples/special_iterators.rs
@@ -0,0 +1,124 @@
+use orx_iterable::{IntoCloningIterable, Iterable};
+use orx_tree::*;
+
+fn main() {
+    // build tree
+    let mut tree = DynTree::new(1);
+
+    let mut root = tree.root_mut();
+    let [id2, id3] = root.push_children([2, 3]);
+    let [id4, _] = tree.node_mut(&id2).push_children([4, 5]);
+    let id8 = tree.node_mut(&id4).push_child(8);
+    let [id6, id7] = tree.node_mut(&id3).push_children([6, 7]);
+    tree.node_mut(&id6).push_child(9);
+    let [_, id11] = tree.node_mut(&id7).push_children([10, 11]);
+
+    print!("{}", &tree);
+    // 1
+    // ├──2
+    // │  ├──4
+    // │  │  └──8
+    // │  └──5
+    // └──3
+    //    ├──6
+    //    │  └──9
+    //    └──7
+    //       ├──10
+    //       └──11
+
+    let root = tree.root();
+    let n3 = tree.node(&id3);
+    let n8 = tree.node(&id8);
+    let n11 = tree.node(&id11);
+
+    // leaves
+
+    println!("\nleaves");
+
+    println!(
+        "leaves from root in depth-first order:   {:?}",
+        root.leaves::<Dfs>().collect::<Vec<_>>()
+    );
+
+    println!(
+        "leaves from root in breadth-first order: {:?}",
+        root.leaves::<Bfs>().collect::<Vec<_>>()
+    );
+
+    println!(
+        "leaves from root in post-order:          {:?}",
+        root.leaves::<PostOrder>().collect::<Vec<_>>()
+    );
+
+    println!(
+        "leaves from n3 in depth-first order: {:?}",
+        n3.leaves::<Dfs>().collect::<Vec<_>>()
+    );
+
+    // leaves_with
+
+    let mut t = Traversal.dfs().over_nodes();
+    println!(
+        "leaves and their parents from root in depth-first order: {:?}",
+        root.leaves_with(&mut t)
+            .map(|leaf_node| {
+                let data = *leaf_node.data();
+                let parent_data = *leaf_node.parent().unwrap().data();
+                (data, parent_data)
+            })
+            .collect::<Vec<_>>()
+    );
+
+    // paths
+
+    println!("\npaths");
+
+    println!(
+        "all (reversed) paths from root in depth-first order:   {:?}",
+        root.paths::<Dfs>()
+            .map(|path| path.collect::<Vec<_>>())
+            .collect::<Vec<_>>()
+    );
+
+    println!(
+        "all (reversed) paths from root in breadth-first order: {:?}",
+        root.paths::<Bfs>()
+            .map(|path| path.collect::<Vec<_>>())
+            .collect::<Vec<_>>()
+    );
+
+    // cheap-convert path into orx_iterable::Iterable in order to iterate over
+    // each path multiple times without requiring to allocate & collect them.
+    println!(
+        "maximum-value-sum path that does not contain node 7: {:?}",
+        root.paths::<Dfs>()
+            .map(|path| path.into_iterable())
+            .filter(|path| path.iter().all(|x| *x != 7))
+            .max_by_key(|path| path.iter().sum::<u32>())
+            .map(|path| path.iter().collect::<Vec<_>>())
+    );
+
+    // ancestors
+
+    println!("\nancestors");
+
+    println!(
+        "ancestors of the root: {:?}",
+        root.ancestors().map(|node| node.data()).collect::<Vec<_>>()
+    );
+
+    println!(
+        "ancestors of node 3: {:?}",
+        n3.ancestors().map(|node| node.data()).collect::<Vec<_>>()
+    );
+
+    println!(
+        "ancestors of node 11: {:?}",
+        n11.ancestors().map(|node| node.data()).collect::<Vec<_>>()
+    );
+
+    println!(
+        "node information (rather than only data) of ancestors of node 8: {:?}",
+        n8.ancestors().collect::<Vec<_>>()
+    );
+}
diff --git a/examples/walks.rs b/examples/walks.rs
new file mode 100644
index 0000000..d0711e1
--- /dev/null
+++ b/examples/walks.rs
@@ -0,0 +1,100 @@
+use orx_tree::*;
+
+fn main() {
+    // build tree
+    let mut tree = DynTree::new(1);
+
+    let mut root = tree.root_mut();
+    let [id2, id3] = root.push_children([2, 3]);
+    let [id4, _] = tree.node_mut(&id2).push_children([4, 5]);
+    tree.node_mut(&id4).push_child(8);
+    let [id6, id7] = tree.node_mut(&id3).push_children([6, 7]);
+    tree.node_mut(&id6).push_child(9);
+    tree.node_mut(&id7).push_children([10, 11]);
+
+    print!("{}", &tree);
+    // 1
+    // ├──2
+    // │  ├──4
+    // │  │  └──8
+    // │  └──5
+    // └──3
+    //    ├──6
+    //    │  └──9
+    //    └──7
+    //       ├──10
+    //       └──11
+
+    // A. depth-first node values from root
+    let root = tree.root();
+    println!(
+        "depth-first node values from root: {:?}",
+        root.walk::<Dfs>().collect::<Vec<_>>()
+    );
+
+    // B. breadth-first node values from node 3
+    let n3 = tree.node(&id3);
+    println!(
+        "breadth-first node values from root: {:?}",
+        n3.walk::<Bfs>().collect::<Vec<_>>()
+    );
+
+    // C. post-order node values from node 2
+    let n2 = tree.node(&id2);
+    println!(
+        "post-order node values from root: {:?}",
+        n2.walk::<PostOrder>().collect::<Vec<_>>()
+    );
+
+    // using traversal over and over again to minimize allocation
+    let mut t = Traversal.dfs(); // depth-first traverser over data
+    assert_eq!(
+        tree.root().walk_with(&mut t).copied().collect::<Vec<_>>(),
+        [1, 2, 4, 8, 5, 3, 6, 9, 7, 10, 11]
+    );
+    assert_eq!(
+        tree.node(&id2)
+            .walk_with(&mut t)
+            .copied()
+            .collect::<Vec<_>>(),
+        [2, 4, 8, 5]
+    );
+    assert_eq!(
+        tree.node(&id3)
+            .walk_with(&mut t)
+            .copied()
+            .collect::<Vec<_>>(),
+        [3, 6, 9, 7, 10, 11]
+    );
+
+    // using traversal to traverse over nodes, rather than only data, with access to children and parent
+    let mut t = Traversal.bfs().over_nodes(); // breadth-first traverser over nodes
+    let x: Vec<_> = tree
+        .node(&id3)
+        .walk_with(&mut t)
+        .map(|node| {
+            let node_value = *node.data();
+            let children_values_sum = node.children().map(|x| x.data()).sum::<u64>();
+            (node_value, children_values_sum)
+        })
+        .collect();
+    println!(
+        "breadth-first (node value, sum of children values) pairs from n3: {:?}",
+        &x
+    );
+
+    // using traversal to additionally access to depth and sibling indices
+    let mut t = Traversal.dfs().with_depth();
+    let n2 = tree.node(&id2);
+    println!(
+        "depth-first (depth, node value) pairs from n2: {:?}",
+        n2.walk_with(&mut t).collect::<Vec<_>>()
+    );
+
+    let mut t = Traversal.dfs().with_depth().with_sibling_idx();
+    let n3 = tree.node(&id3);
+    println!(
+        "depth-first (depth, sibling index, node value) tuples from n3: {:?}",
+        n3.walk_with(&mut t).collect::<Vec<_>>()
+    );
+}
diff --git a/src/iter/ancestors.rs b/src/iter/ancestors.rs
index 3b62165..b1f579c 100644
--- a/src/iter/ancestors.rs
+++ b/src/iter/ancestors.rs
@@ -36,3 +36,16 @@ impl<V: TreeVariant> Iterator for AncestorsIterPtr<V> {
         })
     }
 }
+
+impl<V: TreeVariant> Clone for AncestorsIterPtr<V> {
+    fn clone(&self) -> Self {
+        Self {
+            root_ptr: self.root_ptr.clone(),
+            current: self.current.clone(),
+        }
+    }
+}
+
+unsafe impl<V: TreeVariant> Send for AncestorsIterPtr<V> where V::Item: Send {}
+
+unsafe impl<V: TreeVariant> Sync for AncestorsIterPtr<V> where V::Item: Sync {}
diff --git a/src/iter/custom_walk.rs b/src/iter/custom_walk.rs
new file mode 100644
index 0000000..ebd71e7
--- /dev/null
+++ b/src/iter/custom_walk.rs
@@ -0,0 +1,52 @@
+use crate::{
+    MemoryPolicy, Node, TreeVariant, aliases::Col, node_ref::NodeRefCore,
+    pinned_storage::PinnedStorage,
+};
+use orx_selfref_col::NodePtr;
+
+/// An iterator which can traverse the tree arbitrarily in any direction where the walk direction
+/// is determined by a custom `next_node` closure with signature `Fn(Node) -> Option(Node)`.
+pub struct CustomWalkIterPtr<'a, V, M, P, F>
+where
+    V: TreeVariant + 'a,
+    M: MemoryPolicy,
+    P: PinnedStorage,
+    F: Fn(Node<'a, V, M, P>) -> Option<Node<'a, V, M, P>>,
+{
+    col: &'a Col<V, M, P>,
+    current: Option<NodePtr<V>>,
+    next_node: F,
+}
+
+impl<'a, V, M, P, F> CustomWalkIterPtr<'a, V, M, P, F>
+where
+    V: TreeVariant + 'a,
+    M: MemoryPolicy,
+    P: PinnedStorage,
+    F: Fn(Node<'a, V, M, P>) -> Option<Node<'a, V, M, P>>,
+{
+    pub(crate) fn new(col: &'a Col<V, M, P>, current: Option<NodePtr<V>>, next_node: F) -> Self {
+        Self {
+            col,
+            current,
+            next_node,
+        }
+    }
+}
+
+impl<'a, V, M, P, F> Iterator for CustomWalkIterPtr<'a, V, M, P, F>
+where
+    V: TreeVariant + 'a,
+    M: MemoryPolicy,
+    P: PinnedStorage,
+    F: Fn(Node<'a, V, M, P>) -> Option<Node<'a, V, M, P>>,
+{
+    type Item = NodePtr<V>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.current.clone().inspect(|current| {
+            let node = Node::new(self.col, current.clone());
+            self.current = (self.next_node)(node).map(|x| x.node_ptr().clone());
+        })
+    }
+}
diff --git a/src/iter/mod.rs b/src/iter/mod.rs
index 245c4bd..f1483dd 100644
--- a/src/iter/mod.rs
+++ b/src/iter/mod.rs
@@ -1,5 +1,7 @@
 mod ancestors;
 mod children_mut;
+mod custom_walk;
 
 pub use ancestors::AncestorsIterPtr;
 pub use children_mut::ChildrenMutIter;
+pub use custom_walk::CustomWalkIterPtr;
diff --git a/src/node.rs b/src/node.rs
index 033a43f..cba166e 100644
--- a/src/node.rs
+++ b/src/node.rs
@@ -18,6 +18,29 @@ where
     node_ptr: NodePtr<V>,
 }
 
+// SAFETY: Required for enabling `NodeRef::walk_with_par`.
+// Notice that `Node` does not expose any methods other than implementing `NodeRef`,
+// and all node ref methods are thread safe without data race risks.
+unsafe impl<V, M, P> Send for Node<'_, V, M, P>
+where
+    V: TreeVariant,
+    M: MemoryPolicy,
+    P: PinnedStorage,
+    V::Item: Send,
+{
+}
+// SAFETY: Required for enabling `NodeRef::walk_with_par`.
+// Notice that `Node` does not expose any methods other than implementing `NodeRef`,
+// and all node ref methods are thread safe without data race risks.
+unsafe impl<V, M, P> Sync for Node<'_, V, M, P>
+where
+    V: TreeVariant,
+    M: MemoryPolicy,
+    P: PinnedStorage,
+    V::Item: Sync,
+{
+}
+
 impl<V, M, P> Clone for Node<'_, V, M, P>
 where
     V: TreeVariant,
@@ -52,7 +75,7 @@ where
     P: PinnedStorage,
 {
     #[inline(always)]
-    fn col(&self) -> &Col<V, M, P> {
+    fn col(&self) -> &'a Col<V, M, P> {
         self.col
     }
 
diff --git a/src/node_mut.rs b/src/node_mut.rs
index 9c463f2..0942cbc 100644
--- a/src/node_mut.rs
+++ b/src/node_mut.rs
@@ -1,7 +1,7 @@
 use crate::{
-    NodeIdx, NodeRef, PostOrder, SubTree, Traverser, Tree, TreeVariant,
+    Node, NodeIdx, NodeRef, PostOrder, SubTree, Traverser, Tree, TreeVariant,
     aliases::{Col, N},
-    iter::ChildrenMutIter,
+    iter::{ChildrenMutIter, CustomWalkIterPtr},
     memory::{Auto, MemoryPolicy},
     node_ref::NodeRefCore,
     pinned_storage::{PinnedStorage, SplitRecursive},
@@ -71,8 +71,9 @@ where
     MO: NodeMutOrientation,
 {
     #[inline(always)]
-    fn col(&self) -> &Col<V, M, P> {
-        self.col
+    fn col(&self) -> &'a Col<V, M, P> {
+        let x = self.col as *const Col<V, M, P>;
+        unsafe { &*x }
     }
 
     #[inline(always)]
@@ -1705,6 +1706,82 @@ where
 
     // traversal
 
+    /// Creates a custom mutable walk starting from this node such that:
+    ///
+    /// * the first element will be this node, say `n1`,
+    /// * the second element will be node `n2 = next_node(n1)`,
+    /// * the third element will be node `n3 = next_node(n2)`,
+    /// * ...
+    ///
+    /// The iteration will terminate as soon as the `next_node` returns `None`.
+    ///
+    /// # Examples
+    ///
+    /// In the following example we create a custom iterator that walks down the tree as follows:
+    ///
+    /// * if the current node is not the last of its siblings, the next node will be its next sibling;
+    /// * if the current node is the last of its siblings and if it has children, the next node will be its first child;
+    /// * otherwise, the iteration will terminate.
+    ///
+    /// This walk strategy is implemented by the `next_node` function, and `custom_walk` is called with this strategy.
+    ///
+    /// ```rust
+    /// use orx_tree::*;
+    ///
+    /// //      1
+    /// //     ╱ ╲
+    /// //    ╱   ╲
+    /// //   2     3
+    /// //  ╱ ╲   ╱ ╲
+    /// // 4   5 6   7
+    ///
+    /// fn next_node<'a, T>(node: DynNode<'a, T>) -> Option<DynNode<'a, T>> {
+    ///     let sibling_idx = node.sibling_idx();
+    ///     let is_last_sibling = sibling_idx == node.num_siblings() - 1;
+    ///
+    ///     match is_last_sibling {
+    ///         true => node.get_child(0),
+    ///         false => match node.parent() {
+    ///             Some(parent) => {
+    ///                 let child_idx = sibling_idx + 1;
+    ///                 parent.get_child(child_idx)
+    ///             }
+    ///             None => None,
+    ///         },
+    ///     }
+    /// }
+    ///
+    /// let mut tree = DynTree::new(1);
+    ///
+    /// let mut root = tree.root_mut();
+    /// let [id2, id3] = root.push_children([2, 3]);
+    /// tree.node_mut(&id2).push_children([4, 5]);
+    /// tree.node_mut(&id3).push_children([6, 7]);
+    ///
+    /// let mut root = tree.root_mut();
+    /// for (i, x) in root.custom_walk_mut(next_node).enumerate() {
+    ///     *x += (i + 1) * 100;
+    /// }
+    ///
+    /// let values: Vec<_> = tree.root().custom_walk(next_node).copied().collect();
+    /// assert_eq!(values, [101, 202, 303, 406, 507]);
+    ///
+    /// let all_values: Vec<_> = tree.root().walk::<Bfs>().copied().collect();
+    /// assert_eq!(all_values, [101, 202, 303, 4, 5, 406, 507]);
+    /// ```
+    #[allow(clippy::missing_panics_doc)]
+    pub fn custom_walk_mut<F>(&mut self, next_node: F) -> impl Iterator<Item = &'a mut V::Item>
+    where
+        F: Fn(Node<'a, V, M, P>) -> Option<Node<'a, V, M, P>>,
+    {
+        let iter_ptr = CustomWalkIterPtr::new(self.col(), Some(self.node_ptr().clone()), next_node);
+        iter_ptr.map(|ptr| {
+            let node = unsafe { &mut *ptr.ptr_mut() };
+            node.data_mut()
+                .expect("node is returned by next_node and is active")
+        })
+    }
+
     /// Returns the mutable node of the `child-index`-th child of this node;
     /// returns None if the child index is out of bounds.
     ///
@@ -1756,7 +1833,7 @@ where
     /// let dfs: Vec<_> = root.walk::<Dfs>().copied().collect();
     /// assert_eq!(dfs, [1, 2, 3, 6, 4, 7, 3, 4, 7, 5, 8, 6, 9]);
     /// ```
-    pub fn get_child_mut(&mut self, child_index: usize) -> Option<NodeMut<V, M, P>> {
+    pub fn get_child_mut(&mut self, child_index: usize) -> Option<NodeMut<'_, V, M, P>> {
         self.node()
             .next()
             .get_ptr(child_index)
@@ -1818,7 +1895,7 @@ where
     /// let dfs: Vec<_> = root.walk::<Dfs>().copied().collect();
     /// assert_eq!(dfs, [1, 2, 3, 6, 4, 7, 3, 4, 7, 5, 8, 6, 9]);
     /// ```
-    pub fn child_mut(&mut self, child_index: usize) -> NodeMut<V, M, P> {
+    pub fn child_mut(&mut self, child_index: usize) -> NodeMut<'_, V, M, P> {
         self.get_child_mut(child_index)
             .expect("Given child_index is out of bounds; i.e., child_index >= self.num_children()")
     }
diff --git a/src/node_ref.rs b/src/node_ref.rs
index 41289f1..50a4cf8 100644
--- a/src/node_ref.rs
+++ b/src/node_ref.rs
@@ -1,7 +1,7 @@
 use crate::{
     Dfs, Node, NodeIdx, Traverser, Tree, TreeVariant,
     aliases::{Col, N},
-    iter::AncestorsIterPtr,
+    iter::{AncestorsIterPtr, CustomWalkIterPtr},
     memory::MemoryPolicy,
     pinned_storage::PinnedStorage,
     subtrees::{ClonedSubTree, CopiedSubTree},
@@ -14,6 +14,8 @@ use crate::{
     },
     tree_variant::RefsChildren,
 };
+#[cfg(feature = "orx-parallel")]
+use orx_parallel::*;
 use orx_selfref_col::{NodePtr, Refs};
 
 pub trait NodeRefCore<'a, V, M, P>
@@ -22,7 +24,7 @@ where
     M: MemoryPolicy,
     P: PinnedStorage,
 {
-    fn col(&self) -> &Col<V, M, P>;
+    fn col(&self) -> &'a Col<V, M, P>;
 
     fn node_ptr(&self) -> &NodePtr<V>;
 
@@ -255,6 +257,88 @@ where
             .map(|ptr| Node::new(self.col(), ptr.clone()))
     }
 
+    /// Creates a **[parallel iterator]** of children nodes of this node.
+    ///
+    /// Please see [`children`] for details, since `children_par` is the parallelized counterpart.
+    /// * Parallel iterators can be used similar to regular iterators.
+    /// * Parallel computation can be configured by using methods such as [`num_threads`] or [`chunk_size`] on the parallel iterator.
+    /// * Parallel counterparts of the tree iterators are available with **orx-parallel** feature.
+    ///
+    /// You may also see [children_iterator](https://github.com/orxfun/orx-tree/blob/main/benches/children_iterator.rs) benchmark to
+    /// see an example use case.
+    ///
+    /// [`children`]: NodeRef::children
+    /// [parallel iterator]: orx_parallel::ParIter
+    /// [`num_threads`]: orx_parallel::ParIter::num_threads
+    /// [`chunk_size`]: orx_parallel::ParIter::chunk_size
+    ///
+    /// # Examples
+    ///
+    /// ```rust
+    /// use orx_tree::*;
+    ///
+    /// const N: usize = 8;
+    ///
+    /// fn build_tree(n: usize) -> DaryTree<N, String> {
+    ///     let mut tree = DaryTree::new(0.to_string());
+    ///     let mut dfs = Traversal.dfs().over_nodes();
+    ///     while tree.len() < n {
+    ///         let root = tree.root();
+    ///         let x: Vec<_> = root.leaves_with(&mut dfs).map(|x| x.idx()).collect();
+    ///         for idx in x.iter() {
+    ///             let count = tree.len();
+    ///             let mut node = tree.node_mut(idx);
+    ///             for j in 0..N {
+    ///                 node.push_child((count + j).to_string());
+    ///             }
+    ///         }
+    ///     }
+    ///     tree
+    /// }
+    ///
+    /// fn compute_subtree_value(subtree: &DaryNode<N, String>) -> u64 {
+    ///     subtree
+    ///         .walk::<Dfs>()
+    ///         .map(|x| x.parse::<u64>().unwrap())
+    ///         .sum()
+    /// }
+    ///
+    /// let tree = build_tree(64 * 1_024);
+    ///
+    /// let seq_value: u64 = tree
+    ///     .root()
+    ///     .children()
+    ///     .map(|x| compute_subtree_value(&x))
+    ///     .sum();
+    ///
+    /// let par_value: u64 = tree
+    ///     .root()
+    ///     .children_par() // compute 8 subtrees in parallel
+    ///     .map(|x| compute_subtree_value(&x))
+    ///     .sum();
+    ///
+    /// let par_value_4t: u64 = tree
+    ///     .root()
+    ///     .children_par() // compute 8 subtrees in parallel
+    ///     .num_threads(4) // but limited to using 4 threads
+    ///     .map(|x| compute_subtree_value(&x))
+    ///     .sum();
+    ///
+    /// assert_eq!(seq_value, par_value);
+    /// assert_eq!(seq_value, par_value_4t);
+    /// ```
+    #[cfg(feature = "orx-parallel")]
+    fn children_par(&'a self) -> impl ParIter<Item = Node<'a, V, M, P>>
+    where
+        V::Item: Send + Sync,
+        Self: Sync,
+    {
+        self.node()
+            .next()
+            .children_ptr_par()
+            .map(|ptr| Node::new(self.col(), ptr.clone()))
+    }
+
     /// Returns the `child-index`-th child of the node; returns None if out of bounds.
     ///
     /// # Examples
@@ -287,7 +371,7 @@ where
     /// assert_eq!(a.get_child(1).unwrap().data(), &'d');
     /// assert_eq!(a.get_child(3), None);
     /// ```
-    fn get_child(&self, child_index: usize) -> Option<Node<V, M, P>> {
+    fn get_child(&self, child_index: usize) -> Option<Node<'a, V, M, P>> {
         self.node()
             .next()
             .get_ptr(child_index)
@@ -330,7 +414,7 @@ where
     /// assert_eq!(a.child(1).data(), &'d');
     /// // let child = a.child(3); // out-of-bounds, panics!
     /// ```
-    fn child(&self, child_index: usize) -> Node<V, M, P> {
+    fn child(&self, child_index: usize) -> Node<'a, V, M, P> {
         self.get_child(child_index)
             .expect("Given child_index is out of bounds; i.e., child_index >= self.num_children()")
     }
@@ -352,7 +436,7 @@ where
     ///     assert_eq!(node.parent().unwrap(), root);
     /// }
     /// ```
-    fn parent(&self) -> Option<Node<V, M, P>> {
+    fn parent(&self) -> Option<Node<'a, V, M, P>> {
         self.node()
             .prev()
             .get()
@@ -482,12 +566,14 @@ where
         depth
     }
 
-    /// Returns an iterator starting from this node moving upwards until the root:
+    /// Returns an iterator starting from this node's parent moving upwards until the root:
     ///
-    /// * yields all ancestors of this node including this node,
-    /// * the first element is always this node, and
+    /// * yields all ancestors of this node,
+    /// * the first element is always this node's parent, and
     /// * the last element is always the root node of the tree.
     ///
+    /// It returns an empty iterator if this is the root node.
+    ///
     /// # Examples
     ///
     /// ```
@@ -518,28 +604,50 @@ where
     /// tree.node_mut(&id6).push_child(9);
     /// let [id10, _] = tree.node_mut(&id7).push_children([10, 11]);
     ///
-    /// // ancestors iterator over nodes
-    /// // upwards from the node to the root
+    /// // ancestors iterator over nodes upwards to the root
     ///
     /// let root = tree.root();
     /// let mut iter = root.ancestors();
-    /// assert_eq!(iter.next().as_ref(), Some(&root));
     /// assert_eq!(iter.next(), None);
     ///
     /// let n10 = tree.node(&id10);
     /// let ancestors_data: Vec<_> = n10.ancestors().map(|x| *x.data()).collect();
-    /// assert_eq!(ancestors_data, [10, 7, 3, 1]);
+    /// assert_eq!(ancestors_data, [7, 3, 1]);
     ///
     /// let n4 = tree.node(&id4);
     /// let ancestors_data: Vec<_> = n4.ancestors().map(|x| *x.data()).collect();
-    /// assert_eq!(ancestors_data, [4, 2, 1]);
+    /// assert_eq!(ancestors_data, [2, 1]);
     /// ```
     fn ancestors(&'a self) -> impl Iterator<Item = Node<'a, V, M, P>> {
         let root_ptr = self.col().ends().get().expect("Tree is non-empty").clone();
         AncestorsIterPtr::new(root_ptr, self.node_ptr().clone())
+            .skip(1)
             .map(|ptr| Node::new(self.col(), ptr))
     }
 
+    /// Creates a **[parallel iterator]** starting from this node moving upwards until the root:
+    ///
+    /// * yields all ancestors of this node including this node,
+    /// * the first element is always this node, and
+    /// * the last element is always the root node of the tree.
+    ///
+    /// Please see [`ancestors`] for details, since `ancestors_par` is the parallelized counterpart.
+    /// * Parallel iterators can be used similar to regular iterators.
+    /// * Parallel computation can be configured by using methods such as [`num_threads`] or [`chunk_size`] on the parallel iterator.
+    /// * Parallel counterparts of the tree iterators are available with **orx-parallel** feature.
+    ///
+    /// [`ancestors`]: NodeRef::ancestors
+    /// [parallel iterator]: orx_parallel::ParIter
+    /// [`num_threads`]: orx_parallel::ParIter::num_threads
+    /// [`chunk_size`]: orx_parallel::ParIter::chunk_size
+    #[cfg(feature = "orx-parallel")]
+    fn ancestors_par(&'a self) -> impl ParIter<Item = Node<'a, V, M, P>>
+    where
+        V::Item: Send + Sync,
+    {
+        self.ancestors().collect::<alloc::vec::Vec<_>>().into_par()
+    }
+
     /// Returns true if this node is an ancestor of the node with the given `idx`;
     /// false otherwise.
     ///
@@ -640,14 +748,114 @@ where
 
     // traversal
 
+    /// Creates a custom walk starting from this node such that:
+    ///
+    /// * the first element will be this node, say `n1`,
+    /// * the second element will be node `n2 = next_node(n1)`,
+    /// * the third element will be node `n3 = next_node(n2)`,
+    /// * ...
+    ///
+    /// The iteration will terminate as soon as the `next_node` returns `None`.
+    ///
+    /// # Examples
+    ///
+    /// In the following example we create a custom iterator that walks down the tree as follows:
+    ///
+    /// * if the current node is not the last of its siblings, the next node will be its next sibling;
+    /// * if the current node is the last of its siblings and if it has children, the next node will be its first child;
+    /// * otherwise, the iteration will terminate.
+    ///
+    /// This walk strategy is implemented by the `next_node` function, and `custom_walk` is called with this strategy.
+    ///
+    /// ```rust
+    /// use orx_tree::*;
+    ///
+    /// //      1
+    /// //     ╱ ╲
+    /// //    ╱   ╲
+    /// //   2     3
+    /// //  ╱ ╲   ╱ ╲
+    /// // 4   5 6   7
+    /// // |     |  ╱ ╲
+    /// // 8     9 10  11
+    ///
+    /// fn next_node<'a, T>(node: DynNode<'a, T>) -> Option<DynNode<'a, T>> {
+    ///     let sibling_idx = node.sibling_idx();
+    ///     let is_last_sibling = sibling_idx == node.num_siblings() - 1;
+    ///
+    ///     match is_last_sibling {
+    ///         true => node.get_child(0),
+    ///         false => match node.parent() {
+    ///             Some(parent) => {
+    ///                 let child_idx = sibling_idx + 1;
+    ///                 parent.get_child(child_idx)
+    ///             }
+    ///             None => None,
+    ///         },
+    ///     }
+    /// }
+    ///
+    /// let mut tree = DynTree::new(1);
+    ///
+    /// let mut root = tree.root_mut();
+    /// let [id2, id3] = root.push_children([2, 3]);
+    /// let [id4, _] = tree.node_mut(&id2).push_children([4, 5]);
+    /// tree.node_mut(&id4).push_child(8);
+    /// let [id6, id7] = tree.node_mut(&id3).push_children([6, 7]);
+    /// tree.node_mut(&id6).push_child(9);
+    /// tree.node_mut(&id7).push_children([10, 11]);
+    ///
+    /// let values: Vec<_> = tree.root().custom_walk(next_node).copied().collect();
+    /// assert_eq!(values, [1, 2, 3, 6, 7, 10, 11]);
+    ///
+    /// let values: Vec<_> = tree.node(&id3).custom_walk(next_node).copied().collect();
+    /// assert_eq!(values, [3, 6, 7, 10, 11]);
+    /// ```
+    fn custom_walk<F>(&self, next_node: F) -> impl Iterator<Item = &'a V::Item>
+    where
+        F: Fn(Node<'a, V, M, P>) -> Option<Node<'a, V, M, P>>,
+    {
+        let iter_ptr = CustomWalkIterPtr::new(self.col(), Some(self.node_ptr().clone()), next_node);
+        iter_ptr.map(|ptr| {
+            let node = unsafe { &*ptr.ptr() };
+            node.data()
+                .expect("node is returned by next_node and is active")
+        })
+    }
+
+    /// Creates a **[parallel iterator]** that yields references to data of all nodes belonging to the subtree rooted at this node.
+    ///
+    /// Please see [`custom_walk`] for details, since `custom_walk_par` is the parallelized counterpart.
+    /// * Parallel iterators can be used similar to regular iterators.
+    /// * Parallel computation can be configured by using methods such as [`num_threads`] or [`chunk_size`] on the parallel iterator.
+    /// * Parallel counterparts of the tree iterators are available with **orx-parallel** feature.
+    ///
+    /// [`custom_walk`]: NodeRef::custom_walk
+    /// [parallel iterator]: orx_parallel::ParIter
+    /// [`num_threads`]: orx_parallel::ParIter::num_threads
+    /// [`chunk_size`]: orx_parallel::ParIter::chunk_size
+    #[cfg(feature = "orx-parallel")]
+    fn custom_walk_par<F>(&self, next_node: F) -> impl ParIter<Item = &'a V::Item>
+    where
+        F: Fn(Node<'a, V, M, P>) -> Option<Node<'a, V, M, P>>,
+        V::Item: Send + Sync,
+    {
+        self.custom_walk(next_node)
+            .collect::<alloc::vec::Vec<_>>()
+            .into_par()
+    }
+
     /// Creates an iterator that yields references to data of all nodes belonging to the subtree rooted at this node.
     ///
     /// The order of the elements is determined by the generic [`Traverser`] parameter `T`.
     /// Available implementations are:
     /// * [`Bfs`] for breadth-first ([wikipedia](https://en.wikipedia.org/wiki/Tree_traversal#Breadth-first_search))
-    /// * [`Bfs`] for (pre-order) depth-first ([wikipedia](https://en.wikipedia.org/wiki/Tree_traversal#Depth-first_search))
+    /// * [`Dfs`] for (pre-order) depth-first ([wikipedia](https://en.wikipedia.org/wiki/Tree_traversal#Depth-first_search))
     /// * [`PostOrder`] for post-order ([wikipedia](https://en.wikipedia.org/wiki/Tree_traversal#Post-order,_LRN))
     ///
+    /// You may see the [walks](https://github.com/orxfun/orx-tree/blob/main/examples/walks.rs) example that demonstrates
+    /// different ways to walk the tree with traversal variants (`cargo run --example walks`).
+    ///
     /// # See also
     ///
     /// See also [`walk_mut`] and [`into_walk`] for iterators over mutable references and owned (removed) values,
@@ -717,6 +925,30 @@ where
         T::iter_with_owned_storage::<V, M, P>(self)
     }
 
+    /// Creates a **[parallel iterator]** that yields references to data of all nodes belonging to the subtree rooted at this node.
+    ///
+    /// Please see [`walk`] for details, since `walk_par` is the parallelized counterpart.
+    /// * Parallel iterators can be used similar to regular iterators.
+    /// * Parallel computation can be configured by using methods such as [`num_threads`] or [`chunk_size`] on the parallel iterator.
+    /// * Parallel counterparts of the tree iterators are available with **orx-parallel** feature.
+    ///
+    /// You may also see [walk_iterator](https://github.com/orxfun/orx-tree/blob/main/benches/walk_iterator.rs) benchmark to
+    /// see an example use case.
+    ///
+    /// [`walk`]: NodeRef::walk
+    /// [parallel iterator]: orx_parallel::ParIter
+    /// [`num_threads`]: orx_parallel::ParIter::num_threads
+    /// [`chunk_size`]: orx_parallel::ParIter::chunk_size
+    #[cfg(feature = "orx-parallel")]
+    fn walk_par<T>(&'a self) -> impl ParIter<Item = &'a V::Item>
+    where
+        T: Traverser<OverData>,
+        Self: Sized,
+        V::Item: Send + Sync,
+    {
+        self.walk::<T>().collect::<alloc::vec::Vec<_>>().into_par()
+    }
+
     /// Creates an iterator that traverses all nodes belonging to the subtree rooted at this node.
     ///
     /// The order of the elements is determined by the type of the `traverser` which implements [`Traverser`].
@@ -725,6 +957,9 @@ where
     /// * [`Dfs`] for (pre-order) depth-first ([wikipedia](https://en.wikipedia.org/wiki/Tree_traversal#Depth-first_search))
     /// * [`PostOrder`] for post-order ([wikipedia](https://en.wikipedia.org/wiki/Tree_traversal#Post-order,_LRN))
     ///
+    /// You may see the [walks](https://github.com/orxfun/orx-tree/blob/main/examples/walks.rs) example that demonstrates
+    /// different ways to walk the tree with traversal variants (`cargo run --example walks`).
+    ///
     /// As opposed to [`walk`], this method does require internal allocation.
     /// Furthermore, it allows to iterate over nodes rather than data; and to attach node depths or sibling
     /// indices to the yield values.
@@ -869,6 +1104,34 @@ where
         traverser.iter(self)
     }
 
+    /// Creates a **[parallel iterator]** that traverses all nodes belonging to the subtree rooted at this node.
+    ///
+    /// Please see [`walk_with`] for details, since `walk_with_par` is the parallelized counterpart.
+    /// * Parallel iterators can be used similar to regular iterators.
+    /// * Parallel computation can be configured by using methods such as [`num_threads`] or [`chunk_size`] on the parallel iterator.
+    /// * Parallel counterparts of the tree iterators are available with **orx-parallel** feature.
+    ///
+    /// [`walk_with`]: NodeRef::walk_with
+    /// [parallel iterator]: orx_parallel::ParIter
+    /// [`num_threads`]: orx_parallel::ParIter::num_threads
+    /// [`chunk_size`]: orx_parallel::ParIter::chunk_size
+    #[cfg(feature = "orx-parallel")]
+    fn walk_with_par<'t, T, O>(
+        &'a self,
+        traverser: &'t mut T,
+    ) -> impl ParIter<Item = OverItem<'a, V, O, M, P>>
+    where
+        O: Over,
+        T: Traverser<O>,
+        Self: Sized,
+        't: 'a,
+        OverItem<'a, V, O, M, P>: Send + Sync,
+    {
+        self.walk_with(traverser)
+            .collect::<alloc::vec::Vec<_>>()
+            .into_par()
+    }
+
     /// Returns an iterator of paths from all leaves of the subtree rooted at
     /// this node **upwards** to this node.
     ///
@@ -886,12 +1149,20 @@ where
     ///
     /// # Yields
     ///
-    /// * `Iterator::Item` => `impl Iterator<Item = &'a V::Item>`
+    /// * `Iterator::Item` => `impl Iterator<Item = &'a V::Item> + Clone`
+    ///
+    /// Notice that each path iterator is cloneable; and hence, can cheaply be converted into
+    /// an [`Iterable`] by [`into_iterable`] method. This allows iterating over each path multiple
+    /// times without requiring to allocate and store the path nodes in a collection.
+    ///
+    /// [`Iterable`]: orx_iterable::Iterable
+    /// [`into_iterable`]: orx_iterable::IntoCloningIterable::into_iterable
     ///
     /// # Examples
     ///
     /// ```
     /// use orx_tree::*;
+    /// use orx_iterable::*;
     ///
     /// //      1
     /// //     ╱ ╲
@@ -950,8 +1221,19 @@ where
     ///     .collect();
     ///
     /// assert_eq!(paths, [vec![9, 6, 3], vec![10, 7, 3], vec![11, 7, 3]]);
+    ///
+    /// // Iterable: convert each path into Iterable paths
+    /// let paths = root.paths::<Bfs>().map(|x| x.into_iterable().copied());
+    ///
+    /// // we can iterate over each path multiple times without needing to collect them into a Vec
+    /// let max_label_path: Vec<_> = paths
+    ///     .filter(|path| path.iter().all(|x| x != 7)) // does not contain 7
+    ///     .max_by_key(|path| path.iter().sum::<i32>()) // has maximal sum of node labels
+    ///     .map(|path| path.iter().collect::<Vec<_>>()) // only collect the selected path
+    ///     .unwrap();
+    /// assert_eq!(max_label_path, vec![9, 6, 3, 1]);
     /// ```
-    fn paths<T>(&'a self) -> impl Iterator<Item = impl Iterator<Item = &'a V::Item>>
+    fn paths<T>(&'a self) -> impl Iterator<Item = impl Iterator<Item = &'a V::Item> + Clone>
     where
         T: Traverser<OverData>,
     {
@@ -964,6 +1246,109 @@ where
             })
     }
 
+    /// Creates a **[parallel iterator]** of paths from all leaves of the subtree rooted at this node **upwards** to this node.
+    ///
+    /// Please see [`paths`] for details, since `paths_par` is the parallelized counterpart.
+    /// * Parallel iterators can be used similar to regular iterators.
+    /// * Parallel computation can be configured by using methods such as [`num_threads`] or [`chunk_size`] on the parallel iterator.
+    /// * Parallel counterparts of the tree iterators are available with **orx-parallel** feature.
+    ///
+    /// [`paths`]: NodeRef::paths
+    /// [parallel iterator]: orx_parallel::ParIter
+    /// [`num_threads`]: orx_parallel::ParIter::num_threads
+    /// [`chunk_size`]: orx_parallel::ParIter::chunk_size
+    ///
+    /// You may also see [paths_iterator](https://github.com/orxfun/orx-tree/blob/main/benches/paths_iterator.rs) benchmark to
+    /// see an example use case.
+    ///
+    /// # Examples
+    ///
+    /// In the following example, we find the best path with respect to a linear-in-time computation.
+    /// The computation demonstrates the following features:
+    ///
+    /// * We use `paths_par` rather than `paths` to parallelize the computation of path values.
+    /// * We configure the parallel computation by limiting the number of threads using the `num_threads`
+    ///   method. Note that this is an optional parameter with a default value of [`Auto`].
+    /// * We start computation by converting each `path` iterator into an [`Iterable`] using hte `into_iterable`
+    ///   method. This is a cheap transformation which allows us to iterate over the path multiple times
+    ///   without requiring to allocate and store them in a collection.
+    /// * We select our best path by the `max_by_key` call.
+    /// * Lastly, we collect the best path. Notice that this is the only allocated path.
+    ///
+    /// [`Auto`]: orx_parallel::NumThreads::Auto
+    /// [`Iterable`]: orx_iterable::Iterable
+    ///
+    /// ```rust
+    /// use orx_tree::*;
+    /// use orx_iterable::*;
+    ///
+    /// fn build_tree(n: usize) -> DynTree<String> {
+    ///     let mut tree = DynTree::new(0.to_string());
+    ///     let mut dfs = Traversal.dfs().over_nodes();
+    ///     while tree.len() < n {
+    ///         let root = tree.root();
+    ///         let x: Vec<_> = root.leaves_with(&mut dfs).map(|x| x.idx()).collect();
+    ///         for idx in x.iter() {
+    ///             let count = tree.len();
+    ///             let mut node = tree.node_mut(idx);
+    ///             let num_children = 4;
+    ///             for j in 0..num_children {
+    ///                 node.push_child((count + j).to_string());
+    ///             }
+    ///         }
+    ///     }
+    ///     tree
+    /// }
+    ///
+    /// fn compute_path_value<'a>(mut path: impl Iterator<Item = &'a String>) -> u64 {
+    ///     match path.next() {
+    ///         Some(first) => {
+    ///             let mut abs_diff = 0;
+    ///             let mut current = first.parse::<u64>().unwrap();
+    ///             for node in path {
+    ///                 let next = node.parse::<u64>().unwrap();
+    ///                 abs_diff += match next >= current {
+    ///                     true => next - current,
+    ///                     false => current - next,
+    ///                 };
+    ///                 current = next;
+    ///             }
+    ///             abs_diff
+    ///         }
+    ///         None => 0,
+    ///     }
+    /// }
+    ///
+    /// let tree = build_tree(1024);
+    ///
+    /// let root = tree.root();
+    /// let best_path: Vec<_> = root
+    ///     .paths_par::<Dfs>() // parallelize
+    ///     .num_threads(4) // configure parallel computation
+    ///     .map(|path| path.into_iterable()) // into-iterable for multiple iterations over each path without allocation
+    ///     .max_by_key(|path| compute_path_value(path.iter())) // find the best path
+    ///     .map(|path| path.iter().collect()) // collect only the best path
+    ///     .unwrap();
+    ///
+    /// let expected = [1364, 340, 84, 20, 4, 0].map(|x| x.to_string());
+    /// assert_eq!(best_path, expected.iter().collect::<Vec<_>>());
+    /// ```
+    #[cfg(feature = "orx-parallel")]
+    fn paths_par<T>(&'a self) -> impl ParIter<Item = impl Iterator<Item = &'a V::Item> + Clone>
+    where
+        T: Traverser<OverData>,
+        V::Item: Send + Sync,
+    {
+        let node_ptr = self.node_ptr();
+        let node_ptrs: alloc::vec::Vec<_> = T::iter_ptr_with_owned_storage(node_ptr.clone())
+            .filter(|x: &NodePtr<V>| unsafe { &*x.ptr() }.next().is_empty())
+            .collect();
+        node_ptrs.into_par().map(move |x| {
+            let iter = AncestorsIterPtr::new(node_ptr.clone(), x);
+            iter.map(|ptr| (unsafe { &*ptr.ptr() }).data().expect("active tree node"))
+        })
+    }
+
     /// Returns an iterator of paths from all leaves of the subtree rooted at
     /// this node **upwards** to this node.
     ///
@@ -1059,7 +1444,7 @@ where
     fn paths_with<T, O>(
         &'a self,
         traverser: &'a mut T,
-    ) -> impl Iterator<Item = impl Iterator<Item = <O as Over>::NodeItem<'a, V, M, P>>>
+    ) -> impl Iterator<Item = impl Iterator<Item = <O as Over>::NodeItem<'a, V, M, P>> + Clone>
     where
         O: Over<Enumeration = Val>,
         T: Traverser<O>,
@@ -1082,6 +1467,119 @@ where
             })
     }
 
+    /// Creates a **[parallel iterator]** of paths from all leaves of the subtree rooted at this node **upwards** to this node.
+    ///
+    /// Please see [`paths_with`] for details, since `paths_with_par` is the parallelized counterpart.
+    /// * Parallel iterators can be used similar to regular iterators.
+    /// * Parallel computation can be configured by using methods such as [`num_threads`] or [`chunk_size`] on the parallel iterator.
+    /// * Parallel counterparts of the tree iterators are available with **orx-parallel** feature.
+    ///
+    /// [`paths_with`]: NodeRef::paths_with
+    /// [parallel iterator]: orx_parallel::ParIter
+    /// [`num_threads`]: orx_parallel::ParIter::num_threads
+    /// [`chunk_size`]: orx_parallel::ParIter::chunk_size
+    ///
+    /// # Examples
+    ///
+    /// In the following example, we find the best path with respect to a linear-in-time computation.
+    /// The computation demonstrates the following features:
+    ///
+    /// * We use `paths_with_par` rather than `paths_with` to parallelize the computation of path values.
+    /// * We configure the parallel computation by limiting the number of threads using the `num_threads`
+    ///   method. Note that this is an optional parameter with a default value of [`Auto`].
+    /// * We start computation by converting each `path` iterator into an [`Iterable`] using hte `into_iterable`
+    ///   method. This is a cheap transformation which allows us to iterate over the path multiple times
+    ///   without requiring to allocate and store them in a collection.
+    /// * We select our best path by the `max_by_key` call.
+    /// * Lastly, we collect the best path. Notice that this is the only allocated path.
+    ///
+    /// [`Auto`]: orx_parallel::NumThreads::Auto
+    /// [`Iterable`]: orx_iterable::Iterable
+    ///
+    /// ```rust
+    /// use orx_tree::*;
+    /// use orx_iterable::*;
+    ///
+    /// fn build_tree(n: usize) -> DynTree<String> {
+    ///     let mut tree = DynTree::new(0.to_string());
+    ///     let mut dfs = Traversal.dfs().over_nodes();
+    ///     while tree.len() < n {
+    ///         let root = tree.root();
+    ///         let x: Vec<_> = root.leaves_with(&mut dfs).map(|x| x.idx()).collect();
+    ///         for idx in x.iter() {
+    ///             let count = tree.len();
+    ///             let mut node = tree.node_mut(idx);
+    ///             let num_children = 4;
+    ///             for j in 0..num_children {
+    ///                 node.push_child((count + j).to_string());
+    ///             }
+    ///         }
+    ///     }
+    ///     tree
+    /// }
+    ///
+    /// fn compute_path_value<'a>(mut path: impl Iterator<Item = &'a String>) -> u64 {
+    ///     match path.next() {
+    ///         Some(first) => {
+    ///             let mut abs_diff = 0;
+    ///             let mut current = first.parse::<u64>().unwrap();
+    ///             for node in path {
+    ///                 let next = node.parse::<u64>().unwrap();
+    ///                 abs_diff += match next >= current {
+    ///                     true => next - current,
+    ///                     false => current - next,
+    ///                 };
+    ///                 current = next;
+    ///             }
+    ///             abs_diff
+    ///         }
+    ///         None => 0,
+    ///     }
+    /// }
+    ///
+    /// let tree = build_tree(1024);
+    /// let mut dfs = Traversal.dfs().over_nodes();
+    ///
+    /// let root = tree.root();
+    /// let best_path: Vec<_> = root
+    ///     .paths_with_par(&mut dfs) // parallelize
+    ///     .num_threads(4) // configure parallel computation
+    ///     .map(|path| path.into_iterable()) // into-iterable for multiple iterations over each path without allocation
+    ///     .max_by_key(|path| compute_path_value(path.iter().map(|x| x.data()))) // find the best path
+    ///     .map(|path| path.iter().map(|x| x.data()).collect()) // collect only the best path
+    ///     .unwrap();
+    ///
+    /// let expected = [1364, 340, 84, 20, 4, 0].map(|x| x.to_string());
+    /// assert_eq!(best_path, expected.iter().collect::<Vec<_>>());
+    /// ```
+    #[cfg(feature = "orx-parallel")]
+    fn paths_with_par<T, O>(
+        &'a self,
+        traverser: &'a mut T,
+    ) -> impl ParIter<Item = impl Iterator<Item = <O as Over>::NodeItem<'a, V, M, P>> + Clone>
+    where
+        O: Over<Enumeration = Val>,
+        T: Traverser<O>,
+        V::Item: Send + Sync,
+        Self: Sync,
+    {
+        let node_ptr = self.node_ptr();
+
+        let node_ptrs: alloc::vec::Vec<_> =
+            T::iter_ptr_with_storage(node_ptr.clone(), TraverserCore::storage_mut(traverser))
+                .filter(|x: &NodePtr<V>| unsafe { &*x.ptr() }.next().is_empty())
+                .collect();
+        node_ptrs.into_par().map(move |x| {
+            let iter = AncestorsIterPtr::new(node_ptr.clone(), x);
+            iter.map(|ptr: NodePtr<V>| {
+                O::Enumeration::from_element_ptr::<'a, V, M, P, O::NodeItem<'a, V, M, P>>(
+                    self.col(),
+                    ptr,
+                )
+            })
+        })
+    }
+
     /// Clone the subtree rooted at this node as a separate tree.
     ///
     /// # Examples
@@ -1229,6 +1727,28 @@ where
             })
     }
 
+    /// Creates a **[parallel iterator]** of references to data of leaves of the subtree rooted at this node.
+    ///
+    /// Please see [`leaves`] for details, since `leaves_par` is the parallelized counterpart.
+    /// * Parallel iterators can be used similar to regular iterators.
+    /// * Parallel computation can be configured by using methods such as [`num_threads`] or [`chunk_size`] on the parallel iterator.
+    /// * Parallel counterparts of the tree iterators are available with **orx-parallel** feature.
+    ///
+    /// [`leaves`]: NodeRef::leaves
+    /// [parallel iterator]: orx_parallel::ParIter
+    /// [`num_threads`]: orx_parallel::ParIter::num_threads
+    /// [`chunk_size`]: orx_parallel::ParIter::chunk_size
+    #[cfg(feature = "orx-parallel")]
+    fn leaves_par<T>(&'a self) -> impl ParIter<Item = &'a V::Item>
+    where
+        T: Traverser<OverData>,
+        V::Item: Send + Sync,
+    {
+        self.leaves::<T>()
+            .collect::<alloc::vec::Vec<_>>()
+            .into_par()
+    }
+
     /// Returns an iterator of leaves of the subtree rooted at this node.
     ///
     /// The order of the elements is determined by the type of the `traverser` which implements [`Traverser`].
@@ -1329,6 +1849,32 @@ where
             })
     }
 
+    /// Creates a **[parallel iterator]** of references to data of leaves of the subtree rooted at this node.
+    ///
+    /// Please see [`leaves_with`] for details, since `leaves_with_par` is the parallelized counterpart.
+    /// * Parallel iterators can be used similar to regular iterators.
+    /// * Parallel computation can be configured by using methods such as [`num_threads`] or [`chunk_size`] on the parallel iterator.
+    /// * Parallel counterparts of the tree iterators are available with **orx-parallel** feature.
+    ///
+    /// [`leaves_with`]: NodeRef::leaves_with
+    /// [parallel iterator]: orx_parallel::ParIter
+    /// [`num_threads`]: orx_parallel::ParIter::num_threads
+    /// [`chunk_size`]: orx_parallel::ParIter::chunk_size
+    #[cfg(feature = "orx-parallel")]
+    fn leaves_with_par<T, O>(
+        &'a self,
+        traverser: &'a mut T,
+    ) -> impl ParIter<Item = OverItem<'a, V, O, M, P>>
+    where
+        O: Over,
+        T: Traverser<O>,
+        OverItem<'a, V, O, M, P>: Send + Sync,
+    {
+        self.leaves_with(traverser)
+            .collect::<alloc::vec::Vec<_>>()
+            .into_par()
+    }
+
     /// Returns an iterator of node indices.
     ///
     /// The order of the indices is determined by the generic [`Traverser`] parameter `T`.
diff --git a/src/tree.rs b/src/tree.rs
index da613f7..9e57945 100644
--- a/src/tree.rs
+++ b/src/tree.rs
@@ -213,7 +213,7 @@ where
     /// tree.push_root('a');
     /// assert_eq!(tree.root().data(), &'a');
     /// ```
-    pub fn root(&self) -> Node<V, M, P> {
+    pub fn root(&self) -> Node<'_, V, M, P> {
         self.root_ptr()
             .cloned()
             .map(|p| Node::new(&self.0, p))
@@ -258,7 +258,7 @@ where
     /// tree.node_mut(&b).push_child('d');
     /// tree.node_mut(&c).push_children(['e', 'f']);
     /// ```
-    pub fn root_mut(&mut self) -> NodeMut<V, M, P> {
+    pub fn root_mut(&mut self) -> NodeMut<'_, V, M, P> {
         self.root_ptr()
             .cloned()
             .map(|p| NodeMut::new(&mut self.0, p))
@@ -286,7 +286,7 @@ where
     /// tree.push_root('a');
     /// assert_eq!(tree.root().data(), &'a');
     /// ```
-    pub fn get_root(&self) -> Option<Node<V, M, P>> {
+    pub fn get_root(&self) -> Option<Node<'_, V, M, P>> {
         self.root_ptr().cloned().map(|p| Node::new(&self.0, p))
     }
 
@@ -311,7 +311,7 @@ where
     /// tree.clear();
     /// assert_eq!(tree.get_root_mut(), None);
     /// ```
-    pub fn get_root_mut(&mut self) -> Option<NodeMut<V, M, P>> {
+    pub fn get_root_mut(&mut self) -> Option<NodeMut<'_, V, M, P>> {
         self.root_ptr()
             .cloned()
             .map(|p| NodeMut::new(&mut self.0, p))
@@ -408,7 +408,7 @@ where
     /// assert_eq!(bfs_values, [1, 2, 3, 4, 5]);
     /// ```
     #[inline(always)]
-    pub fn node(&self, node_idx: &NodeIdx<V>) -> Node<V, M, P> {
+    pub fn node(&self, node_idx: &NodeIdx<V>) -> Node<'_, V, M, P> {
         assert!(self.is_node_idx_valid(node_idx), "{}", INVALID_IDX_ERROR);
         Node::new(&self.0, node_idx.0.node_ptr())
     }
@@ -462,7 +462,7 @@ where
     /// assert_eq!(bfs_values, [1, 2, 3, 4, 5]);
     /// ```
     #[inline(always)]
-    pub fn node_mut(&mut self, node_idx: &NodeIdx<V>) -> NodeMut<V, M, P> {
+    pub fn node_mut(&mut self, node_idx: &NodeIdx<V>) -> NodeMut<'_, V, M, P> {
         assert!(self.is_node_idx_valid(node_idx), "{}", INVALID_IDX_ERROR);
         NodeMut::new(&mut self.0, node_idx.0.node_ptr())
     }
@@ -485,7 +485,7 @@ where
     /// [`NodeIdxError::RemovedNode`]: crate::NodeIdxError::RemovedNode
     /// [`NodeIdxError::ReorganizedCollection`]: crate::NodeIdxError::ReorganizedCollection
     #[inline(always)]
-    pub fn get_node(&self, node_idx: &NodeIdx<V>) -> Option<Node<V, M, P>> {
+    pub fn get_node(&self, node_idx: &NodeIdx<V>) -> Option<Node<'_, V, M, P>> {
         self.is_node_idx_valid(node_idx)
             .then(|| Node::new(&self.0, node_idx.0.node_ptr()))
     }
@@ -508,7 +508,7 @@ where
     /// [`NodeIdxError::RemovedNode`]: crate::NodeIdxError::RemovedNode
     /// [`NodeIdxError::ReorganizedCollection`]: crate::NodeIdxError::ReorganizedCollection
     #[inline(always)]
-    pub fn get_node_mut(&mut self, node_idx: &NodeIdx<V>) -> Option<NodeMut<V, M, P>> {
+    pub fn get_node_mut(&mut self, node_idx: &NodeIdx<V>) -> Option<NodeMut<'_, V, M, P>> {
         self.is_node_idx_valid(node_idx)
             .then(|| NodeMut::new(&mut self.0, node_idx.0.node_ptr()))
     }
@@ -529,7 +529,7 @@ where
     /// [`NodeIdxError::RemovedNode`]: crate::NodeIdxError::RemovedNode
     /// [`NodeIdxError::ReorganizedCollection`]: crate::NodeIdxError::ReorganizedCollection
     #[inline(always)]
-    pub fn try_node(&self, node_idx: &NodeIdx<V>) -> Result<Node<V, M, P>, NodeIdxError> {
+    pub fn try_node(&self, node_idx: &NodeIdx<V>) -> Result<Node<'_, V, M, P>, NodeIdxError> {
         self.0
             .try_get_ptr(&node_idx.0)
             .map(|ptr| Node::new(&self.0, ptr))
@@ -554,7 +554,7 @@ where
     pub fn try_node_mut(
         &mut self,
         node_idx: &NodeIdx<V>,
-    ) -> Result<NodeMut<V, M, P>, NodeIdxError> {
+    ) -> Result<NodeMut<'_, V, M, P>, NodeIdxError> {
         self.0
             .try_get_ptr(&node_idx.0)
             .map(|ptr| NodeMut::new(&mut self.0, ptr))
@@ -570,7 +570,7 @@ where
     /// [`node`]: Self::node
     /// [`is_node_idx_valid`]: Self::is_node_idx_valid
     #[inline(always)]
-    pub unsafe fn node_unchecked(&self, node_idx: &NodeIdx<V>) -> Node<V, M, P> {
+    pub unsafe fn node_unchecked(&self, node_idx: &NodeIdx<V>) -> Node<'_, V, M, P> {
         Node::new(&self.0, node_idx.0.node_ptr())
     }
 
@@ -584,7 +584,7 @@ where
     /// [`node_mut`]: Self::node_mut
     /// [`is_node_idx_valid`]: Self::is_node_idx_valid
     #[inline(always)]
-    pub unsafe fn node_mut_unchecked(&mut self, node_idx: &NodeIdx<V>) -> NodeMut<V, M, P> {
+    pub unsafe fn node_mut_unchecked(&mut self, node_idx: &NodeIdx<V>) -> NodeMut<'_, V, M, P> {
         NodeMut::new(&mut self.0, node_idx.0.node_ptr())
     }
 
diff --git a/src/tree_variant.rs b/src/tree_variant.rs
index 4425ce9..d250718 100644
--- a/src/tree_variant.rs
+++ b/src/tree_variant.rs
@@ -1,3 +1,5 @@
+#[cfg(feature = "orx-parallel")]
+use orx_parallel::*;
 use orx_selfref_col::{
     MemoryReclaimer, NodePtr, Refs, RefsArrayLeftMost, RefsSingle, RefsVec, Variant,
     references::iter::ArrayLeftMostPtrIter,
@@ -28,6 +30,12 @@ pub trait RefsChildren<V: Variant> {
 
     fn children_ptr(&self) -> Self::ChildrenPtrIter<'_>;
 
+    #[cfg(feature = "orx-parallel")]
+    fn children_ptr_par<'a>(&'a self) -> impl ParIter<Item = &'a NodePtr<V>>
+    where
+        V: 'a,
+        V::Item: Send + Sync;
+
     fn get_ptr(&self, i: usize) -> Option<&NodePtr<V>>;
 
     // mut
@@ -60,6 +68,15 @@ impl<V: Variant> RefsChildren<V> for RefsVec<V> {
         self.iter()
     }
 
+    #[cfg(feature = "orx-parallel")]
+    fn children_ptr_par<'a>(&'a self) -> impl ParIter<Item = &'a NodePtr<V>>
+    where
+        V: 'a,
+        V::Item: Send + Sync,
+    {
+        self.as_slice().par()
+    }
+
     #[inline(always)]
     fn get_ptr(&self, i: usize) -> Option<&NodePtr<V>> {
         self.get(i)
@@ -102,6 +119,18 @@ impl<const D: usize, V: Variant> RefsChildren<V> for RefsArrayLeftMost<D, V> {
         self.iter()
     }
 
+    #[cfg(feature = "orx-parallel")]
+    fn children_ptr_par<'a>(&'a self) -> impl ParIter<Item = &'a NodePtr<V>>
+    where
+        V: 'a,
+        V::Item: Send + Sync,
+    {
+        self.as_slice().par().map(|x| {
+            x.as_ref()
+                .expect("all elements of RefsArrayLeftMost::as_slice are of Some variant")
+        })
+    }
+
     #[inline(always)]
     fn get_ptr(&self, i: usize) -> Option<&NodePtr<V>> {
         self.get(i)