From d432ec8513db86b915cf9de5eba2b396e51cef38 Mon Sep 17 00:00:00 2001
From: Andrew Dolgert <adolgert@andrew.cmu.edu>
Date: Mon, 27 May 2024 08:00:28 -0400
Subject: [PATCH 1/6] main page is shorter and has a high level image

---
 docs/src/assets/FleckTopLevel.svg | 105 ++++++++++++++++++++++++++++++
 docs/src/index.md                 |  11 ++--
 2 files changed, 109 insertions(+), 7 deletions(-)
 create mode 100644 docs/src/assets/FleckTopLevel.svg
diff --git a/docs/src/assets/FleckTopLevel.svg b/docs/src/assets/FleckTopLevel.svg
new file mode 100644
index 0000000..cabdd59
--- /dev/null
+++ b/docs/src/assets/FleckTopLevel.svg
@@ -0,0 +1,105 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<svg xmlns="http://www.w3.org/2000/svg" version="1.1" xmlns:xl="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" viewBox="188 158.07714 554.6138 314.42286" width="554.6138" height="314.42286">
+  <defs>
+    <marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="FilledArrow_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-1 -3 6 6" markerWidth="6" markerHeight="6" color="#7f8080">
+      <g>
+        <path d="M 3.2 0 L 0 -1.2 L 0 1.2 Z" fill="currentColor" stroke="currentColor" stroke-width="1"/>
+      </g>
+    </marker>
+    <marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="FilledBall_Marker" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-7 -4 8 8" markerWidth="8" markerHeight="8" color="#7f8080">
+      <g>
+        <circle cx="-2.9999986" cy="0" r="2.99999734868332" fill="currentColor" stroke="currentColor" stroke-width="1"/>
+      </g>
+    </marker>
+    <marker orient="auto" overflow="visible" markerUnits="strokeWidth" id="FilledArrow_Marker_2" stroke-linejoin="miter" stroke-miterlimit="10" viewBox="-1 -3 6 6" markerWidth="6" markerHeight="6" color="#7f8080">
+      <g>
+        <path d="M 3.7333333 0 L 0 -1.4 L 0 1.4 Z" fill="currentColor" stroke="currentColor" stroke-width="1"/>
+      </g>
+    </marker>
+  </defs>
+  <g id="Canvas_1" fill-opacity="1" stroke="none" fill="none" stroke-opacity="1" stroke-dasharray="none">
+    <title>Canvas 1</title>
+    <rect fill="white" x="188" y="158.07714" width="554.6138" height="314.42286"/>
+    <g id="Canvas_1_Layer_1">
+      <title>Layer 1</title>
+      <g id="Graphic_2">
+        <path d="M 588.475 217 L 705.025 217 C 718.8112 217 730 240.072 730 268.5 C 730 296.928 718.8112 320 705.025 320 L 588.475 320 C 574.6888 320 563.5 296.928 563.5 268.5 C 563.5 240.072 574.6888 217 588.475 217 Z" fill="#40ffff"/>
+        <path d="M 588.475 217 L 705.025 217 C 718.8112 217 730 240.072 730 268.5 C 730 296.928 718.8112 320 705.025 320 L 588.475 320 C 574.6888 320 563.5 296.928 563.5 268.5 C 563.5 240.072 574.6888 217 588.475 217 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
+        <text transform="translate(585.15 240.828)" fill="black">
+          <tspan font-family="Helvetica Neue" font-size="16" fill="black" x="12.111998" y="15" xml:space="preserve">Fleck decides </tspan>
+          <tspan font-family="Helvetica Neue" font-size="16" fill="black" x="5.3039985" y="33.448" xml:space="preserve">which transition </tspan>
+          <tspan font-family="Helvetica Neue" font-size="16" fill="black" x="10.767998" y="51.895996" xml:space="preserve">happens next.</tspan>
+        </text>
+      </g>
+      <g id="Graphic_3">
+        <path d="M 190.5 459.7 L 190.5 377.3 C 190.5 371.6144 230.036 367 278.75 367 C 327.464 367 367 371.6144 367 377.3 L 367 459.7 C 367 465.3856 327.464 470 278.75 470 C 230.036 470 190.5 465.3856 190.5 459.7" fill="#c0c0ff"/>
+        <path d="M 190.5 459.7 L 190.5 377.3 C 190.5 371.6144 230.036 367 278.75 367 C 327.464 367 367 371.6144 367 377.3 L 367 459.7 C 367 465.3856 327.464 470 278.75 470 C 230.036 470 190.5 465.3856 190.5 459.7 M 190.5 377.3 C 190.5 382.9856 230.036 387.6 278.75 387.6 C 327.464 387.6 367 382.9856 367 377.3" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
+        <text transform="translate(195.5 414.426)" fill="black">
+          <tspan font-family="Helvetica Neue" font-size="16" fill="black" x="20.418" y="15" xml:space="preserve">Simulation STATE</tspan>
+        </text>
+      </g>
+      <g id="Graphic_4">
+        <path d="M 398 459.7 L 398 377.3 C 398 371.6144 437.536 367 486.25 367 C 534.964 367 574.5 371.6144 574.5 377.3 L 574.5 459.7 C 574.5 465.3856 534.964 470 486.25 470 C 437.536 470 398 465.3856 398 459.7" fill="#c0c0ff"/>
+        <path d="M 398 459.7 L 398 377.3 C 398 371.6144 437.536 367 486.25 367 C 534.964 367 574.5 371.6144 574.5 377.3 L 574.5 459.7 C 574.5 465.3856 534.964 470 486.25 470 C 437.536 470 398 465.3856 398 459.7 M 398 377.3 C 398 382.9856 437.536 387.6 486.25 387.6 C 534.964 387.6 574.5 382.9856 574.5 377.3" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
+        <text transform="translate(403 405.202)" fill="black">
+          <tspan font-family="Helvetica Neue" font-size="16" fill="black" x="21.61" y="15" xml:space="preserve">Which transitions </tspan>
+          <tspan font-family="Helvetica Neue" font-style="italic" font-size="16" fill="black" x="16.562" y="33.448" xml:space="preserve">could</tspan>
+          <tspan font-family="Helvetica Neue" font-size="16" fill="black" y="33.448" xml:space="preserve"> happen next</tspan>
+        </text>
+      </g>
+      <g id="Graphic_5">
+        <path d="M 316.975 217 L 440.525 217 C 455.1392 217 467 240.072 467 268.5 C 467 296.928 455.1392 320 440.525 320 L 316.975 320 C 302.3608 320 290.5 296.928 290.5 268.5 C 290.5 240.072 302.3608 217 316.975 217 Z" fill="#c0c0ff"/>
+        <path d="M 316.975 217 L 440.525 217 C 455.1392 217 467 240.072 467 268.5 C 467 296.928 455.1392 320 440.525 320 L 316.975 320 C 302.3608 320 290.5 296.928 290.5 268.5 C 290.5 240.072 302.3608 217 316.975 217 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
+        <text transform="translate(313.15 250.052)" fill="black">
+          <tspan font-family="Helvetica Neue" font-size="16" fill="black" x="9.143998" y="15" xml:space="preserve">Handle the next </tspan>
+          <tspan font-family="Helvetica Neue" font-size="16" fill="black" x="32.56" y="33.448" xml:space="preserve">transition</tspan>
+        </text>
+      </g>
+      <g id="Line_6">
+        <line x1="467" y1="268.5" x2="544.6" y2="268.5" marker-end="url(#FilledArrow_Marker)" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="4"/>
+      </g>
+      <g id="Line_8">
+        <line x1="340.8111" y1="325.40833" x2="312.56072" y2="367.7839" marker-start="url(#FilledBall_Marker)" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
+      </g>
+      <g id="Line_9">
+        <line x1="419.4447" y1="325.2833" x2="449.9922" y2="367.9077" marker-start="url(#FilledBall_Marker)" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
+      </g>
+      <g id="Group_14">
+        <g id="Graphic_13">
+          <rect x="609" y="328.5359" width="107.5" height="140.448" fill="#ccc"/>
+          <rect x="609" y="328.5359" width="107.5" height="140.448" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="3"/>
+        </g>
+        <g id="Graphic_10">
+          <path d="M 631.625 337.5359 L 692.875 337.5359 C 700.12 337.5359 706 347.6159 706 360.0359 C 706 372.4559 700.12 382.5359 692.875 382.5359 L 631.625 382.5359 C 624.38 382.5359 618.5 372.4559 618.5 360.0359 C 618.5 347.6159 624.38 337.5359 631.625 337.5359 Z" fill="white"/>
+          <path d="M 631.625 337.5359 L 692.875 337.5359 C 700.12 337.5359 706 347.6159 706 360.0359 C 706 372.4559 700.12 382.5359 692.875 382.5359 L 631.625 382.5359 C 624.38 382.5359 618.5 372.4559 618.5 360.0359 C 618.5 347.6159 624.38 337.5359 631.625 337.5359 Z" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
+          <text transform="translate(632.25 350.8119)" fill="black">
+            <tspan font-family="Helvetica Neue" font-size="16" fill="black" x="10.296" y="15" xml:space="preserve">Logic</tspan>
+          </text>
+        </g>
+        <g id="Graphic_11">
+          <path d="M 618.5 435.9359 L 618.5 399.1359 C 618.5 396.5967 638.1 394.5359 662.25 394.5359 C 686.4 394.5359 706 396.5967 706 399.1359 L 706 435.9359 C 706 438.4751 686.4 440.5359 662.25 440.5359 C 638.1 440.5359 618.5 438.4751 618.5 435.9359" fill="white"/>
+          <path d="M 618.5 435.9359 L 618.5 399.1359 C 618.5 396.5967 638.1 394.5359 662.25 394.5359 C 686.4 394.5359 706 396.5967 706 399.1359 L 706 435.9359 C 706 438.4751 686.4 440.5359 662.25 440.5359 C 638.1 440.5359 618.5 438.4751 618.5 435.9359 M 618.5 399.1359 C 618.5 401.6751 638.1 403.7359 662.25 403.7359 C 686.4 403.7359 706 401.6751 706 399.1359" stroke="black" stroke-linecap="round" stroke-linejoin="round" stroke-width="1"/>
+          <text transform="translate(623.5 401.3879)" fill="black">
+            <tspan font-family="Helvetica Neue" font-size="16" fill="black" x="22.006" y="15" xml:space="preserve">Data </tspan>
+            <tspan font-family="Helvetica Neue" font-size="16" fill="black" x="7.038" y="33.448" xml:space="preserve">structure</tspan>
+          </text>
+        </g>
+        <g id="Graphic_12">
+          <text transform="translate(633.802 445.5359)" fill="black">
+            <tspan font-family="Helvetica Neue" font-weight="bold" font-size="16" fill="black" x="31974423e-20" y="16" xml:space="preserve">Legend</tspan>
+          </text>
+        </g>
+      </g>
+      <g id="Line_21">
+        <path d="M 726.6499 242.73032 C 734.8847 233.78013 738.7178 224.20403 739.0703 217 C 740 198 726 182 699.1289 170.89844 C 672.2578 159.79687 327.89062 159.16406 307.9453 166.08203 C 288 173 266 192 267.0703 217 C 267.33993 223.2976 272.33873 229.49532 280.22667 235.293" marker-end="url(#FilledArrow_Marker_2)" stroke="#7f8080" stroke-linecap="round" stroke-linejoin="round" stroke-width="3"/>
+      </g>
+      <g id="Graphic_22">
+        <text transform="translate(488.73 250.052)" fill="black">
+          <tspan font-family="Helvetica Neue" font-size="16" fill="black" x="2.072" y="15" xml:space="preserve">Enable</tspan>
+          <tspan font-family="Helvetica Neue" font-size="16" fill="black" x="4369838e-19" y="33.448" xml:space="preserve">Disable</tspan>
+        </text>
+      </g>
+    </g>
+  </g>
+</svg>
diff --git a/docs/src/index.md b/docs/src/index.md
index 5d854bb..993c766 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -6,10 +6,9 @@ CurrentModule = Fleck
 
 Fleck is a Julia library that samples distributions for discrete event systems (DES) in continuous time. It supports Exponential and non-Exponential distributions for events.
 
-
 ## Overview
 
-If you want to write a simulation engine that tracks the state of a system, and the next event in that system is determined by which, among competing events, fires first, then this library can track the next firing times. Here are some examples of discrete event systems.
+Many kinds of discrete event simulations need an efficient way to choose the next event in a simulation.
 
  * Simulations of chemical reactions.
  * Queueing theory models of networks, production, and computation.
@@ -18,7 +17,9 @@ If you want to write a simulation engine that tracks the state of a system, and
  * Generalized stochastic Petri nets.
  * Generalized semi-Markov Processes.
 
-Many of these mathematical objects provide intricate structure to tell us what events are enabled in any particular state (e.g; Petri nets). Fleck organizes the stochastic firing times (clocks) of enabled events so we can sample from clocks to find out what happens next and update them after the state changes. In statistical terms, this library is a sampler for generalized semi-Markov processes.
+This library supports these kinds of simulations by optimizing the choice of the next event in the system. In statistical terms, this library is a sampler for generalized semi-Markov processes.
+
+![Fleck chooses the next transition but the simulation tracks state and changes to state.](assets/FleckTopLevel.svg)
 
 The background work for this library comes from [Continuous-time, discrete-event simulation from counting processes](https://arxiv.org/abs/1610.03939), by Andrew Dolgert, 2016.
 
@@ -34,10 +35,6 @@ The library provides you with samplers. Each sampler has the same interface. Her
 
 Different samplers are specialized for sampling more quickly and accurately for different applications. For instance, some applications have very few events enabled at once, while some have many. Some applications use only exponentially-distributed events, while some have a mix of distribution types. Because continuous-time discrete event systems can be fire many events, the literature has focused on reducing the number of CPU instructions required to sample each event, and this library reflects that focus.
 
-## Documentation
-
-
-
 ## Why Use This?
 
 If I make a quick simulation for myself, I sample distributions the moment an event is enabled and store the firing times in a [priority queue](https://juliacollections.github.io/DataStructures.jl/v0.12/priority-queue.html). When would I switch to this library?

From 4f4bc07e0a5ea6f5045a73178a4d6ecca1222770 Mon Sep 17 00:00:00 2001
From: Andrew Dolgert <adolgert@andrew.cmu.edu>
Date: Mon, 27 May 2024 09:45:41 -0400
Subject: [PATCH 2/6] added a page on distributions with memory

---
 docs/make.jl              |  17 ++--
 docs/src/background.md    |  16 ++--
 docs/src/distrib.md       | 197 +-------------------------------------
 docs/src/distributions.jl |   2 +-
 docs/src/guide.md         |   8 +-
 docs/src/memory.jl        |  31 ++++++
 6 files changed, 58 insertions(+), 213 deletions(-)
 create mode 100644 docs/src/memory.jl

diff --git a/docs/make.jl b/docs/make.jl
index 1751ae0..2b94227 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -20,7 +20,8 @@ adliterate = [
         ("constant_birth.jl", "constant_birth"),
         ("sir.jl", "sir"),
         ("commonrandom.jl", "commonrandom"),
-        ("reliability.jl", "reliability")
+        ("reliability.jl", "reliability"),
+        ("memory.jl", "memory")
     ]
 literate_subdir = joinpath(example_base, "literate")
 isdir(literate_subdir) || mkdir(literate_subdir)
@@ -37,7 +38,7 @@ for (source, target) in adliterate
         rm("$(ftarget).md", force=true)
     end
     if !isfile("$(ftarget).md") || mtime(fsource) > mtime("$(ftarget).md")
-        println("Literate of $source to $target")
+        @info "Literate of $source to $target"
         Literate.markdown(
             fsource,
             example_base,
@@ -48,7 +49,10 @@ for (source, target) in adliterate
         ischunk = Regex("$(target)-[0-9]+.(png|svg|pdf)")
         chunks = [fn for fn in readdir(example_base) if match(ischunk, fn) !== nothing]
         for chunk in chunks
-            mv(joinpath(example_base, chunk), joinpath(example_base, "literate", chunk))
+            chunk_source = joinpath(example_base, chunk)
+            chunk_target = joinpath(example_base, "literate", chunk)
+            @info "Moving $chunk_source to $chunk_target"
+            mv(chunk_source, chunk_target)
         end
     end
 end
@@ -72,13 +76,12 @@ makedocs(;
             "distributions.md"
         ],
         "Manual" => [
-            "Structure" => "objects.md",
-            "commonrandom.md",
-            "background.md",
             "distrib.md",
-            "Develop" => "develop.md",
+            "background.md",
             "samplers.md",
+            "memory.md",
             "Vector Addition Systems" => "vas.md",
+            "commonrandom.md",
         ],
         "Examples" => [
             "Birth-death Process" => "constant_birth.md",
diff --git a/docs/src/background.md b/docs/src/background.md
index b7c6d8c..75f164f 100644
--- a/docs/src/background.md
+++ b/docs/src/background.md
@@ -1,4 +1,4 @@
-# Background
+# Markov Processes
 
 This addresses two main points, how to specify a model for the library
 using distributions defined by hazards and why such a specification,
@@ -18,8 +18,8 @@ measles.
 It frequently happens that random samples of the real valued variables
 such as $\bf{X}$ are actually analyzed on a discrete scale. For example
 Stocks' data on latent periods of measles in
-`latent_period`{.interpreted-text role="ref"} is based on daily visits
-by patients.
+`latent_period` is based on daily visits
+by patients [Stocks:1931].
 
 The (cumulative) distribution of $\bf{X}$ is defined as
 
@@ -36,7 +36,7 @@ f_{X}(k) & =  \mathcal{P}[X=k] \\
 \end{aligned}
 ```
 
-For Stocks' data in `latent_period`{.interpreted-text role="ref"}, the
+For Stocks' data in `latent_period`, the
 density at day $k$ should be interpreted as the probability of the
 appearance of symptoms since the previous visit on day $k-1$.
 
@@ -53,7 +53,7 @@ h_{X}(k) & =  \mathcal{P}[X=k\; |\; k-1<X] \\
 ```
 
 In the case of Stocks' data, the hazards shown in
-`latent_period_hazard`{.interpreted-text role="ref"} would correspond to
+`latent_period_hazard` would correspond to
 the probability of symptoms appearing at day $k$ given that the patient
 had not displayed symptoms at any previous visit. As time goes on,
 patients who have already developed symptoms effectively reduce the pool
@@ -177,7 +177,7 @@ a new stochastic process, $\bf{Z}$, from $\bf{Y}$ by the rule
 $Z_{t} = s_k$ in the time interval $t_n \le t
 < t_{n+1}$ given $Y_{n} = (s_k, t_n)$ . In other words, $\bf{Z}_{t}$ is
 a piecewise constant version of $\bf{Y}$ as shown in
-`piecewise_Z`{.interpreted-text role="ref"}
+`piecewise_Z`.
 
 ![Figure 4. **Realization of a continuous time stochastic process and
 associated Markov chain.**](assets/piecewise_Z.svg)
@@ -280,6 +280,10 @@ waiting time, we won't end up needing to relation $\pi_{ij}(\tau)$ to
 $\Pi_{ij}(\tau)$ when finding trajectories or computing hazards, so the
 more complicated relationship won't be a problem.
 
+## References
+
+[Stocks:1931]	P. Stocks, “[Incubation period of measles](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2313639/),” British Medical Journal 1(3655): p. 157.
+
 ## Acknowledgement
 
 This section comes was created by the Analytical Framework for Infectious Disease Dynamics (AFIDD) group at Cornell University in conjunction with the USDA Agricultural Research Service. This work was supported by the Science & Technology Directorate, Department of Homeland Security via interagency agreement no. HSHQDC-10-X-00138.
diff --git a/docs/src/distrib.md b/docs/src/distrib.md
index a9aeb74..f14c38c 100644
--- a/docs/src/distrib.md
+++ b/docs/src/distrib.md
@@ -1,12 +1,6 @@
 # Notation for Distributions
 
-This describes implementation of distributions for sampling of
-distributions for stochastic simulation in continuous time. This kind of
-simulation makes specific demands on what calls a distribution must
-support, and those calls are different from what libraries provide. This
-is a guide for implementation of new distributions and a way to ensure
-that those implemented look correct. If something is wrong here, it
-matters, so file a bug report.
+This defines notation for subsequent sections.
 
 ## Notation
 
@@ -41,11 +35,9 @@ $$G(t_0, t_1)=\frac{G(t_1)}{G(t_0)}=e^{-\int_{t_0}^{t_1} \lambda(s)ds}$$
 
 ## Using Julia's Distributions
 
-Julia's continuous univariate distributions support a fixed interface.
-In this section, we look at how to translate any distribution into the
-operations above.
+Julia's continuous univariate distributions support a common interface.
 
-In this table, `d` is the distribution.
+In this table, `d` is the distribution, `t` is the time, and `q` is a quantile.
 
 |  Julia call          | Notation                           |
 |  ------------------- | ---------------------------------- |
@@ -59,186 +51,3 @@ In this table, `d` is the distribution.
 |  `invlogcdf(d,lp)`   | $F^{-1}(e^{l_p})$                  |
 |  `invlogccdf(d,lp)`  | $G^{-1}(e^{l_p})$ or $-\int_0^{t(l_p)}\lambda(s)ds=l_p$ |
 |  `randexp(rng)`      | $-\ln(1-U)$                        |
-
-
-## Requirements for a Continuous-Time Simulation
-
-### Shifted Sample
-
-The First Reaction method requires that we sample a distribution given
-that we known it has not yet fired by a time $t_0$. The statement that
-it hasn't fired by time $t_0$ creates a new distribution from which to
-sample. If the old distribution had the hazard $G(t)=G(0, t)$, it could
-be written as
-
-$$G(0, t)=G(0, t_0)G(t_0, t).$$
-
-It is this partial survival, since $t_0$, that we want to sample.
-Solving for $G(t_0, t)$ and subtracting both sides from 1,
-
-$$1-G(t_0, t)=\frac{G(0, t_0)-G(0, t)}{G(0, t_0)}.$$
-
-Written in terms of the cumulative distribution functions, the cdf of
-the new distribution, which we'll call $F(t, t_0)$, is
-
-$$F(t, t_0)=\frac{F(t)-F(t_0)}{1-F(t_0)}$$
-
-This kind of distribution could be sampled by a rejection method, but
-the default way to sample it is by inversion, which means generating a
-uniform random value between $[0,1]$ and solving $U=F(t)$ for $t$. For
-Eq.   `gibson-shifted`{.interpreted-text role="eq"}, this becomes
-
-```math
-\begin{aligned}
- U&=F(t,t_0,t_e) \\
-  &=\frac{F(t,t_e)-F(t_0,t_e)}{1-F(t_0,t_e)} \\
-U(1-F(t_0,t_e))&=F(t,t_e)-F(t_0,t_e) \\
-F(t,t_e)&=U(1-F(t_0,t_e))+F(t_0,t_e) \\
-F(t-t_e)&=U(1-F(t_0-t_e))+F(t_0-t_e) \\
-t-t_e &= F^{-1}\left[U(1-F(t_0-t_e))+F(t_0-t_e)\right] \\
-t &= t_e+F^{-1}\left[U(1-F(t_0-t_e))+F(t_0-t_e)\right]
-\end{aligned}
-```
-
-We will call this operation **SampleShifted.**
-
-### Hazard Rate for Next Reaction
-
-The Next Reaction method requires sampling a distribution such that the
-quantile is saved, so that later adjustments to the distribution can use
-the same quantile.
-
-During a simulation, the hazard rate, $\lambda$, is a function of the
-state of the system, $X(t)$. The state of the system only changes in
-jumps, so the hazard rate is effectively a series of distributions in
-time. For instance, a hazard rate, from enabling time $T_0$ to firing
-time $T_3$, might have three parts.
-
-```math
-\begin{aligned}
-  \lambda(\{X_0, T_0\}, t)=h_0(t) & \qquad T_0 \le t < T_1 \\
-  \lambda(\{X_0, T_0, X_1, T_1\}, t)=h_1(t) & \qquad T_1 \le t < T_2 \\
-  \lambda(\{X_0, T_0, X_1, T_1, X_2, T_2\}, t)=h_2(t) & \qquad T_2 \le t < T_3
-\end{aligned}
-```
-
-The algorithm therefore samples for a firing time from $h_0(t)$ as soon
-as the transition is enabled, but that time will turn out to be wrong
-(we call it a putative time). Later, the algorithm will resample using
-$h_1(t)$ using the original sample's quantile and taking time off the
-clock. If the first sample were by inversion, it would look like solving
-this equation for $t$ (still ignoring enabling times),
-
-$$U=1-\exp\left(\int_0^{t}h_0(s)ds\right).$$
-
-Then a later sample would use the same $U$, but with knowledge that the
-distribution now contains a new part, $h_1(t)$,
-
-```math
-\begin{equation}\tag{cdf-consume}
-U=1-\exp\left(-\int_0^{t_1}h_0(s)ds\right)\exp\left(-\int_{t_1}^{t}h_1(s)ds\right)
-\end{equation}
-```
-
-Anderson had the bright idea to write the quantile as an exponential
-quantile, $1-U=e^{\ln (1-U)}$, so that the equation requires only
-addition of integrated hazards,
-
-```math
-\begin{aligned}
-  \ln(1-U)&=-\int_0^{t_1}h_0(s)ds-\int_{t_1}^{t}h_1(s)ds \\
-  \int_{t_1}^{t}h_1(s)ds&=-\ln(1-U)-\int_0^{t_1}h_0(s)ds.
-\end{aligned}
-```
-
-As the underlying distribution, $h_i(t)$, changes, the right hand side
-gets smaller and smaller. Let's call the sum of all consumed hazard
-$\gamma$,
-
-$$\gamma=\sum_i \int_{t_i}^{t_{i+1}}h_i(s)ds$$
-
-The algorithm therefore needs three operations from the distribution.
-
-1.  **MeasuredSample**\-\--Sample the distribution, returning the
-    exponential quantile. Calling the random number generator, `rng`
-    and the putative time $t_p$, it's
-
-    $$\left(rng, h_0(t)\right) \mapsto \left(t_p, -\ln(1-U)\right).$$
-
-2.  **ConsumeSample**\-\--Consume remaining quantile for the next
-    sample. If the sum of hazard in the past is called $\gamma$, then
-
-    $$\left(\gamma, h_i(t), t_i\right) \mapsto \left(\gamma', t_{i+1}\right)$$
-
-3.  **Putative**\-\--Generate a new putative time from the exponential
-    quantile and the consumed hazard,
-
-    $$\left(-\ln(1-U), \gamma, t_i, h_i(t)\right) \mapsto p_t$$
-
-The nice part about the first step is that there is no need to sample by
-inversion. Any sampling method will do, as long as the exponential
-quantile is calculated.
-
-### Cumulative Distributions for Next Reaction
-
-The original form of the Next Reaction, by Gibson and Bruck, was written
-in terms, not of the hazards, but of the cumulative distribution
-functions. This form remains useful because some distributions are much
-simpler, or more accurate, to sample as cdfs instead of sampling from
-their hazard rates.
-
-Returning to Eq. cdf-consume above, this can be rewritten as
-
-$$1-U=\exp\left(-\int_0^{t_1}h_0(s)ds\right)\exp\left(-\int_{t_1}^{t}h_1(s)ds\right)$$
-
-In terms of the survival functions, this becomes
-
-$$1-U=G_0(0, t_1)G_1(t_1, t)$$
-
-If we wish to solve this for $t$, then, in terms of the survival, it
-looks like
-
-$$G_1(t_1, t)=\frac{1-U}{G_0(0, t_1)}$$
-
-Writing the left-hand side as a cumulative distribution function
-requires the transformation
-
-$$G(a, b)=\frac{G(b)}{G(a)}=\frac{1-F(b)}{G(a)}$$
-
-so we have
-
-$$F_1(t)=1-G_1(t_1) \frac{1-U}{G_0(0, t_1)}$$
-
-This generalizes to many steps as
-
-$$F_j(t)=1-G_j(t_j) (1-U) \prod_i^j \frac{G_i(t_i)}{G_i(t_{i+1})}$$
-
-Let's call the running product on the right $\delta$,
-
-$$\delta=\prod_i^j \frac{G_i(t_i)}{G_i(t_{i+1})}$$
-
-Then the algorithm requires three operations
-
-1.  **MeasuredSample**\-\--Sample the distribution, returning the
-    quantile. Calling the random number generator, "rng," and the
-    putative time $t_p$, it's
-
-    $$\left(\rng, h_0(t)\right) \mapsto \left(t_p, 1-U\right).$$
-
-2.  **ConsumeSample**\-\--Consume remaining quantile for the next
-    sample.
-
-    $$\left(\delta_i, h_i(t), t_i\right) \mapsto \left(\delta_{i+1}, t_{i+1}\right)$$
-
-3.  **Putative**\-\--Generate a new putative time from the exponential
-    quantile and the consumed hazard,
-
-    $$\left(1-U, \delta_i, t_i, h_i(t)\right) \mapsto p_t$$
-
-As you can see by comparison with the hazards version, it's simple to
-write the algorithm to accommodate either method of sampling. Therefore,
-each distribution can choose which interface to support.
-
-## Acknowledgement
-
-This section comes was created by the Analytical Framework for Infectious Disease Dynamics (AFIDD) group at Cornell University in conjunction with the USDA Agricultural Research Service. This work was supported by the Science & Technology Directorate, Department of Homeland Security via interagency agreement no. HSHQDC-10-X-00138.
diff --git a/docs/src/distributions.jl b/docs/src/distributions.jl
index 78a71e9..335f1c5 100644
--- a/docs/src/distributions.jl
+++ b/docs/src/distributions.jl
@@ -1,4 +1,4 @@
-# # How Fleck Decides What Happens Next
+# # Non-exponential Simulation
 
 using DisplayAs #hide
 using Plots #hide
diff --git a/docs/src/guide.md b/docs/src/guide.md
index 33762eb..eb08be1 100644
--- a/docs/src/guide.md
+++ b/docs/src/guide.md
@@ -1,8 +1,6 @@
-# Bag of Clocks
+# Competing Clocks
 
-We can introduce this style of simulation with a description that has no equations.
-
-We use the term discrete event simulation when code tracks the effects of individual events in time. We classify discrete event simulations by the pace of advances in time. If time advances in steps of the same size, it is a *discrete-time* simulation. If time advances to the next event, whenever that event might happen, it is *continuous-time* simulation. This library supports continuous-time discrete-event simulations.
+A discrete event simulation tracks the effects of individual events in time. We classify discrete event simulations by the pace of advances in time. If time advances in steps of the same size, it is a *discrete-time* simulation. If time advances to the next event, whenever that event might happen, it is *continuous-time* simulation. This library supports continuous-time discrete-event simulations.
 
 We write discrete event simulations by defining the state of a system and events that can act on that state. The state of a system can be the count of each chemical species for a chemical reaction system. It can be different numbers of chickens at different weights in different chicken coops for a farming simulation. It can be the age of each widget in an engine for a reliability simulation. These states are called the *physical state* of the system and vary widely in what they represent.
 
@@ -26,4 +24,4 @@ Newly enabled clocks are those that are enabled in ``S^{'}`` not part of the set
 
 All these sets are visualized in the figure below, reproduced from "Stochastic petri nets: Modelling, stability, simulation" by Peter J. Haas (2002).
 
-![](assets/ClockUpdate.png)
\ No newline at end of file
+![](assets/ClockUpdate.png)
diff --git a/docs/src/memory.jl b/docs/src/memory.jl
new file mode 100644
index 0000000..e37ae56
--- /dev/null
+++ b/docs/src/memory.jl
@@ -0,0 +1,31 @@
+# # Transitions with Memory
+#
+# Some transitions can be paused and restarted. These transitions are said to have memory [Zimmerman:2007]. For instance, let's say there is an industrial process that takes a Gamma-distributed amount of time.
+using Distributions
+using Plots
+bottler = Gamma(7.5, 1.0)
+x = 0.0:0.05:12.0
+plot(x, pdf.(bottler, x), xlims=(0.0, 12.0))
+#
+# Then that process is interrupted, maybe because the machinery is required for a different process with higher priority, and then we restart the industrial process. What is the distribution of the restarted process, assuming it picks up where it left off? It's shifted to the left. Shifting the distribution doesn't just translate the pdf. It rescales the pdf so that the area under the curve is one and also changes the shape.
+#
+survival = ccdf
+hazard(dist, x) = pdf(dist, x) / survival(dist, x)
+conditional_survival(dist, intermediate, x) = survival(dist, x) / survival(dist, intermediate)
+shiftpdf(dist, intermediate, x) = hazard(dist, x + intermediate) * conditional_survival(dist, intermediate, x + intermediate)
+remembered_age = 5.1
+plot(x, shiftpdf.(bottler, remembered_age, x), xlims=(0.0, 12.0))
+#
+# Another way to think about transitions with memory is that the zero-time of the process is now in the past. For Fleck, that zero-time is called the *enabling time.* When we enable a transition, we give the enabling time as an absolute time in the simulation.
+#
+# ```
+# enablingtime = currenttime - remembered_age
+# enable!(sampler, clock, distribution, enablingtime, currenttime, RNG)`
+# ```
+#
+# If a simulation wants to include transitions that have memory, that simulation needs to store the total time a transition has been enabled and then use that to set the enabling time when it restarts a transtition.
+#
+# ## References
+#
+# [Zimmerman:2007] Zimmermann, Armin. Stochastic discrete event systems. Springer, Berlin Heidelberg New York, 2007.
+#

From 6fc2a9252a5d15bb9b6e7b9c41fb08675f3322ed Mon Sep 17 00:00:00 2001
From: Andrew Dolgert <adolgert@andrew.cmu.edu>
Date: Mon, 27 May 2024 11:52:59 -0400
Subject: [PATCH 3/6] added a gsmp section

---
 docs/make.jl     |  5 ++--
 docs/src/gsmp.jl | 75 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 2 deletions(-)
 create mode 100644 docs/src/gsmp.jl

diff --git a/docs/make.jl b/docs/make.jl
index 2b94227..b5ee089 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -21,7 +21,8 @@ adliterate = [
         ("sir.jl", "sir"),
         ("commonrandom.jl", "commonrandom"),
         ("reliability.jl", "reliability"),
-        ("memory.jl", "memory")
+        ("memory.jl", "memory"),
+        ("gsmp.jl", "gsmp")
     ]
 literate_subdir = joinpath(example_base, "literate")
 isdir(literate_subdir) || mkdir(literate_subdir)
@@ -78,9 +79,9 @@ makedocs(;
         "Manual" => [
             "distrib.md",
             "background.md",
+            "GSMP" => "gsmp.md",
             "samplers.md",
             "memory.md",
-            "Vector Addition Systems" => "vas.md",
             "commonrandom.md",
         ],
         "Examples" => [
diff --git a/docs/src/gsmp.jl b/docs/src/gsmp.jl
new file mode 100644
index 0000000..3ffccaa
--- /dev/null
+++ b/docs/src/gsmp.jl
@@ -0,0 +1,75 @@
+# # Generalized Semi-Markov processes
+#
+# ## Definition
+#
+# We said that a semi-Markov process is a set of random variables representing states and times of a system, $\{X_i,T_i\}$. For each state at time $T_i$, the probability of the next state and time is some distribution, $P[X_{i+1},T_{i+1}|X_i,T_i]$. A generalized semi-Markov process (GSMP) also has the same states and times an the same distribution of next states and times, but it's more specific about how to calculate the probability.
+#
+# For a GSMP, every change in state of the system, from $X_i$ to $X_{i+1}$, is the result of an event $E_j$. Each event is associated with a distribution of event times, also called firing times, and those times are distributed as $f_j(\tau)$ where $\tau=t-T_i$. The probability of the next state and time is determined by the minimum firing time of all events enabled at $(X_i,T_i)$. The distributions of these events are our competing clocks. When one event fires, it changes the state of the system, and, as a result, some events may be disabled and new events may be enabled.
+#
+# ## Considerations
+#
+# Once we create this separate object, the event, it raises questions about corner cases. If two events change the state in the same way, then they are the same as one event whose hazard rate is the sum of the two events. If an event does not change the state, then this is like a self-loop in a Markov chain, and it complicates how we count states. As with the semi-Markov process, there remains the question of events which are immediate. If an event can happen at $T_{i+1}=T_i$, then it is possible for a system to fail to progress to a later time, and that's a problem.
+#
+# The GSMP is a specific form of a semi-Markov process that requires $P[T_{i+1}|X_i,T_i]$ (note the $X_{i+1}$ isn't on the left side) be determined by the minimum time to the next event, so it is determined solely by the $f_j(\tau)$. Each event is also defined by how it changes the state. If events change the state in a deterministic way, such that there is some function $X_{i+1}=\chi_j(X_i)$, then the likelihood of the system is the product of $P[X_{i+1},T_{i+1}|X_i,T_i]$ for each time step, determined by the mimimum of event times. It is traditional to define generalized stochastic Petri nets [Haas:2002] this way, and Anderson and Kurtz's excellent short manuscript presents GSMP in the same light using counting processes [Anderson:2015]. However, Haas describes GSMP as allowing events to be stochastic. This means the likelihood has two terms, $P[E_j,T_{i+1}|X_i,T_i]P[X_{i+1}|E_j, T_{i+1}, X_i,T_i]$.
+#
+# ## Events and Physical State
+#
+# Glynn presented GSMP by distinguishing the physical state of the system from the clock state [Glynn:1989], and Shedler is known for having the clearest presentation [Shedler:1987]. He represented the physical state as a set of states $p=(p_1,p_2,p_3...)$. Then each event, $E_j$, is defined in relation to those physical states:
+#
+#  * An event is *enabled* by an enabling function which depends on a subset of physical states. This is a function $e(\{p_l\},T_i)\rightarrow \mbox{bool}$. In this notation, the curly bracked $\{\}$ indicate a "set of" something.
+#  * When the event is not enabled, it is *disabled.*
+#  * The distribution of event times for an event is determined by a subset of physical states. This is a function $f(\{p_m\},T_i)\rightarrow \mbox{pdf}$.
+#  * An event creates a new state by changing some subset of the physical state, and that function can depend on another subset of the physical state, which isn't changed. This is a function $\chi(\{p_j\},\{p_k\},T_i,T_{i+1})\rightarrow \{p'_j\}$.
+#
+# Look at all the subsets. There is a subset for enabling, transition rates, modified state, and catalyst state (which affects the action but isn't modified). If we think of the physical state as nodes in a graph and the events as nodes in a graph, then each subset associated with an event forms a different kind of edge in a bipartite graph.
+#
+# The state of the system at any time is more than the physical state. It's the physical state plus the history of when each event clock was enabled. It is even reasonable to include in the state of the system every past event that fired and the time it fired, which is called the *filtration* of the stochastic process.
+#
+# The idea behind introducing the notion of physical state and subsets of the physical state is to help think about a semi-Markov process where events live longer than a single time step. Glynn wanted to attach those long-lived competing processes to some state because, in practice, there is something about the state of the world that remains the same at each time step. The brilliance of Anderson and Kurtz's monograph is that they start their model as a set of counting processes [Anderson:2015]. Any state of the system is a predictable function of the filtration (event history) of the counting processes.
+#
+# In the nomenclature of the GSMP, an event defines a change to substates, and every possible pair of states $(X_{i+1},X_i)$ defines a transition. In general, the number of possible transitions is combinatorially larger than the number of possible events, as Haas covers in detail [Haas:2002].
+#
+# ## Formalisms of GSMP
+#
+# There are many frameworks for simulation where the simulation is in continuous-time and the next event is determined by competition among clocks.
+#
+#  * Simulations of chemical reactions. Here, the physical state is chemical components and simulations are usually, but not always, Exponentially-distributed.
+#  * Queueing theory models of networks, production, and computation. Here, the state is in queues and the events are reprsented by servers.
+#  * Epidemiological models of disease spread among individuals. These models are often hand-coded, but they look a lot like chemical simulation with non-Exponential distributions.
+#  * Vector-addition systems are an older form of simulation where the state is a vector of integers and every event gives or takes from the vector of integers. Again, it looks a lot like chemical simulation.
+#  * Generalized stochastic Petri nets are what happens when engineers use GSMP. There is a strong vocabulary used to define the state as marking and places, but they conform to what is described above.
+#
+# ## Extensions to GSMP
+#
+# ### Atomic Hazards and Differential Equations
+#
+# Most presentations of GSMP assume that the distribution times of events are continuous distributions that are well-behaved, but the structure of the stochastic process remains well-defined if we allow distributions that have jumps. Examples of such distributions are delta functions. We could for instance, say that an event has a 1/3 chance of happening in 2 minutes an a 2/3 chance of happening in 5 minutes.
+#
+# More commonly, what if the next step in a simulation were determined by an ordinary differential equation (ODE) that depends on the current state and time? A simulation could, at the enabling time, integrate the ODE to find when it predicts the next event and then enable a distribution that is a delta function centered at that predicted time.
+#
+# The caveat for atomic hazards it's possible for two atomic hazards to happen at exactly the same time. Fleck doesn't have a way to guarantee which of those events happen first. It certainly doesn't have a way to randomly select which event should happen from a configurable probability distribution. This is a feature specific to samplers that allow instantaneous and simultaneous events. The way we handle the possiblility of simultaneous events is to schedule atomic events relative to continuous-time events or to ensure that there is only one atomic hazard in a whole simulation.
+#
+# ### Markov Decision Process
+#
+# In reinforcement learning, there is a model for how the world changes and a model for how to make decisions that depend on the world's history. For a GSMP-based decision process, there are two terms in the likelihood for the next state and time.
+#
+# ```math
+# P[X_{i+1},T_{i+1}|X_i,T_i]P[A_j]
+# ```
+#
+# The additional stochastic variable $A_j$ is the decision at each step.
+#
+# ### Piecewise-deterministic Markov Process
+#
+# 
+#
+# ## References
+#
+# [Anderson:2015] Anderson, David F., and Thomas G. Kurtz. Stochastic analysis of biochemical systems. Vol. 674. Berlin, Germany: Springer International Publishing, 2015.
+#
+# [Haas:2002] P. J. Haas, Stochastic Petri Nets: Modelling, Stability, Simulation, Springer-Verlag, New York, New York, USA, 2002.
+#
+# [Glynn:1989] P. Glynn, A GSMP formalism for discrete event systems, Proceedings of the IEEE 77 (1) (1989) 14– 23, ISSN 00189219, [URL](http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=21067).
+#
+# [Shedler:1987] Shedler, Gerald S. Regeneration and networks of queues. Vol. 3. Springer Science & Business Media, 1987.
+#
\ No newline at end of file

From 3161e723b83c1830177754c43af5a956c29b2187 Mon Sep 17 00:00:00 2001
From: Andrew Dolgert <adolgert@andrew.cmu.edu>
Date: Mon, 27 May 2024 21:36:00 -0400
Subject: [PATCH 4/6] docs/src/hierarchical.jl

---
 docs/make.jl         | 4 +++-
 docs/src/samplers.md | 8 +++++---
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index b5ee089..524b364 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -22,7 +22,8 @@ adliterate = [
         ("commonrandom.jl", "commonrandom"),
         ("reliability.jl", "reliability"),
         ("memory.jl", "memory"),
-        ("gsmp.jl", "gsmp")
+        ("gsmp.jl", "gsmp"),
+        ("hierarchical.jl", "hierarchical")
     ]
 literate_subdir = joinpath(example_base, "literate")
 isdir(literate_subdir) || mkdir(literate_subdir)
@@ -81,6 +82,7 @@ makedocs(;
             "background.md",
             "GSMP" => "gsmp.md",
             "samplers.md",
+            "hierarchical.md",
             "memory.md",
             "commonrandom.md",
         ],
diff --git a/docs/src/samplers.md b/docs/src/samplers.md
index c22dbc6..b22a75e 100644
--- a/docs/src/samplers.md
+++ b/docs/src/samplers.md
@@ -13,7 +13,7 @@ From the perspective of someone who uses samplers in a simulation the features a
    - control variates?
  * Does it maintain accuracy for extreme distribution parameters or extreme draws?
 
-Answers to these questions come from understanding how samplers are built.
+Answers to these questions come from understanding how samplers are built. A modern description of samplers is in Marchetti [Marchetti:2019].
 
 ## Series of choices
 
@@ -95,7 +95,7 @@ The next reaction method also samples each clock, but it samples in such a way t
 
 When a clock is first enabled, the next reaction method samples a uniform variate in [0,1], and then it finds a putative next event time for that clock using inversion. The original random variate is considered the **total survival for this clock.** The interesting move is that, if that same clock is disabled, this method saves some measure of how far the clock has gotten. That is, it measures the survival of the clock and subtracts that survival from the total survival. If the clock is ever enabled again, its new firing time is determined by the remaining survival.
 
-Is that allowed? The authors of the Next Reaction sort of prove it in ["Efficient Exact Stochastic Simulation of Chemical Systems with Many Species and Many Channels"](https://pubs.acs.org/doi/full/10.1021/jp993732q), but they can fortunately can rely on the work of Kurtz [1], which I don't quite see in their references. Nevertheless, Anderson and Kurtz amended this work with ["Continuous time markov chain models for chemical reaction methods"](https://link.springer.com/chapter/10.1007/978-1-4419-6766-4_1).
+Is that allowed? The authors of the Next Reaction sort of prove it in ["Efficient Exact Stochastic Simulation of Chemical Systems with Many Species and Many Channels"](https://pubs.acs.org/doi/full/10.1021/jp993732q), but they can fortunately can rely on the work of Kurtz [Kurtz:1970], which I don't quite see in their references. Nevertheless, Anderson and Kurtz amended this work with ["Continuous time markov chain models for chemical reaction methods"](https://link.springer.com/chapter/10.1007/978-1-4419-6766-4_1).
 
 In Anderson and Kurtz, they take the same approach as the next reaction method. It's still split by the conditional firing times. It's still sampling by inversion, but they change to a log-space for the sampling of individual distributions. That's the whole change. Instead of storing the total survival, $U$, they store the log of that quantity, which turns out to be much more efficient for exponentials and Weibulls, which are used most frequently.
 
@@ -113,4 +113,6 @@ It would be interesting to ask whether we could create a data structure that is
 
 ## References
 
-[1] Kurtz, Thomas G. "Solutions of ordinary differential equations as limits of pure jump Markov processes." Journal of applied Probability 7.1 (1970): 49-58.
+[Kurtz:1970] Kurtz, Thomas G. "Solutions of ordinary differential equations as limits of pure jump Markov processes." Journal of applied Probability 7.1 (1970): 49-58.
+
+[Marchetti:2019] Marchetti, Luca, Corrado Priami, and Vo Hong Thanh. Simulation algorithms for computational systems biology. Vol. 1. Berlin, Germany, 2019.

From 2656b3fdcb7afeec9e6895f20987fd89dce18211 Mon Sep 17 00:00:00 2001
From: Andrew Dolgert <adolgert@andrew.cmu.edu>
Date: Mon, 27 May 2024 21:38:21 -0400
Subject: [PATCH 5/6] added description of hierarchical samplers

---
 .gitignore               | 10 ++++++++--
 docs/src/hierarchical.jl | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 2 deletions(-)
 create mode 100644 docs/src/hierarchical.jl

diff --git a/.gitignore b/.gitignore
index ac6354c..9af2266 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,10 +24,16 @@ deps/src/
 docs/build/
 docs/site/
 # Literate.jl creates chunk files.
-docs/src/literate
+docs/src/commonrandom.md
+docs/src/constant_birth.md
 docs/src/distributions.md
+docs/src/gsmp.md
+docs/src/hierarchical.md
+docs/src/literate
 docs/src/mainloop.md
-docs/src/constant_birth.md
+docs/src/memory.md
+docs/src/reliability.md
+docs/src/shifting.md
 docs/src/sir.md
 
 # File generated by Pkg, the package manager, based on a corresponding Project.toml
diff --git a/docs/src/hierarchical.jl b/docs/src/hierarchical.jl
new file mode 100644
index 0000000..ed549a6
--- /dev/null
+++ b/docs/src/hierarchical.jl
@@ -0,0 +1,32 @@
+# # Hierarchical Samplers
+#
+# ## Overview
+#
+# Continuous-time samplers have to process each event separately. As simulations grow in size, samplers use more memory, and they take more time to select the next event. One approach to speed up sampling is to use multiple samplers arranged in a hierarchy.
+#
+# ## Why Hierarchical Samplers
+#
+# There are two reasons to use hierarchical samplers.
+#
+# Some samplers are better for some simulations. If all transitions are Exponentially-distributed, then an optimized Direct sampler can be the fastest. If all of the distributions are of Exponential families, then Anderson's method is faster than the Next Reaction method. You can split the events among samplers according to the sampler that best fits the behavior of those events.
+#
+# The other reason to use multiple samplers has to do with the frequency and locality of the events, in the same way we think of the frequency and locality of memory accesses for cache use. If there is a small subset of events that regenerate frequently, it can make sense even to use a [`FirstReaction`](@ref) sampler for those events. While FirstReaction doesn't use a complicated data structure to optimize, it can be winningly fast for small numbers of events. Leave the rest of the events in a [`FirstToFire`](@ref) sampler. Or, for a spatial simulation, you could make separate samplers for separate parts of the landscape, so that each event tends to affect a limited number of samplers.
+#
+# ## First Sampler to Fire
+#
+# All of the different samplers find the first event to fire. If we set up two samplers, so that each holds mutually distinct enabled event distributions, we can ask each sampler which it thinks will fire next. The first to fire is the first of the two samplers. This generalizes to any number of samplers. We can make a [`MultiSampler`](@ref) which contains multiple samplers and always returns the soonest of those contained.
+#
+# Even further a MultiSampler can contain a MultiSampler if that makes sense.
+#
+# ## Multiple Direct Samplers
+#
+# It's possible to make a Direct-style sampler that is hierarchical, too. A Direct sampler works in two steps. It sums the hazards of all enabled events and then selects a time according to the sum of the hazards. The main algorithm of a Direct sampler is to sum hazards. A hierarchical Direct algorithm sums the sums of the hazards and then selects a time.
+#
+# While hierarchical samplers can contain multiple-Direct samplers, multiple-Direct samplers can only contain other multiple-Direct samplers.
+#
+# ## How to Split a Simulation
+#
+# Each time a simulation calls `enable!` and `disable!` for an event, it specifies a key for that event. If the sampler is hierarchical, it can use that key, and maybe the type of the distribution, to choose which sampler handles any given event.
+#
+# Fleck's approach in the [`MultiSampler`](@ref) type is to let the user specify a function that takes as input the key and the distribution and returns some ID for the chosen sampler. The MultiSampler then remembers that choice for this event key, from that point on.
+#

From 662cde4da478cc140c910629fc549cff187ffba2 Mon Sep 17 00:00:00 2001
From: slwu89 <10673535+slwu89@users.noreply.github.com>
Date: Tue, 28 May 2024 08:36:02 -0700
Subject: [PATCH 6/6] copy editing, latex fixes, and reference

---
 docs/src/commonrandom.jl | 2 +-
 docs/src/gsmp.jl         | 4 ++--
 docs/src/guide.md        | 8 ++++++--
 docs/src/hierarchical.jl | 2 +-
 docs/src/index.md        | 3 +--
 5 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/docs/src/commonrandom.jl b/docs/src/commonrandom.jl
index fbd08c5..3dc67ad 100644
--- a/docs/src/commonrandom.jl
+++ b/docs/src/commonrandom.jl
@@ -4,7 +4,7 @@
 
 # If you set up the same model and run it with different initial random number generator (RNG) states, then it will create a set of trajectories. Fleck sees these as a sequence of clock events and times of those events. You are usually interested in some summary outcomes of a simulation, such as the total time to a goal or the number of events. This summary outcome is a predictable function of the trajectories. We often want to ask how the goal function depends on simulation parameters, and that can be difficult to determine because each trajectory gives an individual value, and the set of trajectories gives an estimate that can have wide variance.
 
-# What we want is a variance reduction technique. Common random numbers (CRN) are a variance reduction technique that enables you to use fewer simulation runs to compare the effect of different simulation parameters on the outcome. There are several other variance reduction techniques, such as antithetic variates and importance sampling, but let's look at common random numbers in Fleck.
+# What we want is a [variance reduction](https://en.wikipedia.org/wiki/Variance_reduction) technique. Common random numbers (CRN) are a variance reduction technique that enables you to use fewer simulation runs to compare the effect of different simulation parameters on the outcome. There are several other variance reduction techniques, such as antithetic variates and importance sampling, but let's look at common random numbers in Fleck.
 
 # If you estimate a value with $n$ independent trajectories, then the bias of the estimate is proportional to $1/\sqrt{n}$ in most cases. If you want to distinguish the effect of changing a parameter, then the estimate must be precise enough that you can see the difference. It is common to use millions of trajectories. On the other hand, CRN means that you can produce $n=100$ trajectories, with significant bias in the estimate, and still see the effect of changing a parameter.
 
diff --git a/docs/src/gsmp.jl b/docs/src/gsmp.jl
index 3ffccaa..f01c016 100644
--- a/docs/src/gsmp.jl
+++ b/docs/src/gsmp.jl
@@ -16,9 +16,9 @@
 #
 # Glynn presented GSMP by distinguishing the physical state of the system from the clock state [Glynn:1989], and Shedler is known for having the clearest presentation [Shedler:1987]. He represented the physical state as a set of states $p=(p_1,p_2,p_3...)$. Then each event, $E_j$, is defined in relation to those physical states:
 #
-#  * An event is *enabled* by an enabling function which depends on a subset of physical states. This is a function $e(\{p_l\},T_i)\rightarrow \mbox{bool}$. In this notation, the curly bracked $\{\}$ indicate a "set of" something.
+#  * An event is *enabled* by an enabling function which depends on a subset of physical states. This is a function $e(\{p_l\},T_i)\rightarrow \text{bool}$. In this notation, the curly bracked $\{\}$ indicate a "set of" something.
 #  * When the event is not enabled, it is *disabled.*
-#  * The distribution of event times for an event is determined by a subset of physical states. This is a function $f(\{p_m\},T_i)\rightarrow \mbox{pdf}$.
+#  * The distribution of event times for an event is determined by a subset of physical states. This is a function $f(\{p_m\},T_i)\rightarrow \text{pdf}$.
 #  * An event creates a new state by changing some subset of the physical state, and that function can depend on another subset of the physical state, which isn't changed. This is a function $\chi(\{p_j\},\{p_k\},T_i,T_{i+1})\rightarrow \{p'_j\}$.
 #
 # Look at all the subsets. There is a subset for enabling, transition rates, modified state, and catalyst state (which affects the action but isn't modified). If we think of the physical state as nodes in a graph and the events as nodes in a graph, then each subset associated with an event forms a different kind of edge in a bipartite graph.
diff --git a/docs/src/guide.md b/docs/src/guide.md
index eb08be1..9f0ad0f 100644
--- a/docs/src/guide.md
+++ b/docs/src/guide.md
@@ -4,7 +4,7 @@ A discrete event simulation tracks the effects of individual events in time. We
 
 We write discrete event simulations by defining the state of a system and events that can act on that state. The state of a system can be the count of each chemical species for a chemical reaction system. It can be different numbers of chickens at different weights in different chicken coops for a farming simulation. It can be the age of each widget in an engine for a reliability simulation. These states are called the *physical state* of the system and vary widely in what they represent.
 
-Even though nearly every simulation represents a unique system, it is often possible to find enough in common among systems that we can make libraries to help create, simulate, and observe changes of state. For instance, a large class of physical states are counts of things, whether they are molecules, vehicles, or chickens. These states can be represented in code as vectors of integers, and there is a long history of models represented by vectors of integers. Most of this work focuses on using chemical simulation codes to represent not only chemicals but also other systems like vehicles. An older, more general, form of this simulation is the vector addition system, which a later section of this manual describes. However, states don't need to be vectors of integers.
+Even though nearly every simulation represents a unique system, it is often possible to find enough in common among systems that we can make libraries to help create, simulate, and observe changes of state. For instance, a large class of physical states are counts of things, whether they are molecules, vehicles, or chickens. These states can be represented in code as vectors of integers, and there is a long history of models represented by vectors of integers. Most of this work focuses on using chemical simulation codes to represent not only chemicals but also other systems like vehicles. An older, more general, form of this simulation is the [vector addition system](https://en.wikipedia.org/wiki/Vector_addition_system). However, states don't need to be restricted to vectors of integers.
 
 We usually want to simulate systems that have real-valued observations such as ages, temperatures, and distances. These continuous parts of the state of the system mean that the system rarely returns to exactly the same state. There are now an infinite number of possible states of the system, which changes how we would think of the statistics of the system, but it doesn't much change how we handle simulation, because simulation is still driven by events.
 
@@ -12,7 +12,7 @@ An *event* is an observable change of state at a time. It is a function where th
 
 Given the current state of the system, there is a set of possible next events which could happen. We call the possible next events *enabled.* Think of the moment in time just after the simulation has begun or an event has just happened. At this moment, there may be multiple enabled events. Which one is next depends on how often that event happens. If our simulation includes both the radioactive decay of an unstable element and decay of iron, the unstable element will usually decay first. We describe the rates of these events using probability distributions in time. Each event has an associated continuous univariate distribution where the variable is time.
 
-We can think of all event distributions in a model as a *bag of clocks.* The next one to ring is the next event to occur. When that event occurs, the state changes. When the state changes, it will enable some events and disable others. Enabled events are added to the bag of clocks. Disabled ones are removed. This library holds the bag of clocks.
+We can think of all event distributions in a model as a *bag of clocks.* The next one to ring is the next event to occur. When that event occurs, the state changes. When the state changes, it will enable some events and disable others. Enabled events are added to the bag of clocks. Disabled ones are removed. Fleck is responsible for managing this bag of clocks.
 
 ## Sets of Clocks
 
@@ -25,3 +25,7 @@ Newly enabled clocks are those that are enabled in ``S^{'}`` not part of the set
 All these sets are visualized in the figure below, reproduced from "Stochastic petri nets: Modelling, stability, simulation" by Peter J. Haas (2002).
 
 ![](assets/ClockUpdate.png)
+
+## References
+
+[Haas:2006]	PJ. Haas, “[Stochastic petri nets: Modelling, stability, simulation](https://link.springer.com/book/10.1007/b97265),” Springer Science & Business Media, 2006.
\ No newline at end of file
diff --git a/docs/src/hierarchical.jl b/docs/src/hierarchical.jl
index ed549a6..dec778c 100644
--- a/docs/src/hierarchical.jl
+++ b/docs/src/hierarchical.jl
@@ -10,7 +10,7 @@
 #
 # Some samplers are better for some simulations. If all transitions are Exponentially-distributed, then an optimized Direct sampler can be the fastest. If all of the distributions are of Exponential families, then Anderson's method is faster than the Next Reaction method. You can split the events among samplers according to the sampler that best fits the behavior of those events.
 #
-# The other reason to use multiple samplers has to do with the frequency and locality of the events, in the same way we think of the frequency and locality of memory accesses for cache use. If there is a small subset of events that regenerate frequently, it can make sense even to use a [`FirstReaction`](@ref) sampler for those events. While FirstReaction doesn't use a complicated data structure to optimize, it can be winningly fast for small numbers of events. Leave the rest of the events in a [`FirstToFire`](@ref) sampler. Or, for a spatial simulation, you could make separate samplers for separate parts of the landscape, so that each event tends to affect a limited number of samplers.
+# The other reason to use multiple samplers has to do with the frequency and locality of the events, in the same way we think of the frequency and locality of memory accesses for cache use. If there is a small subset of events that regenerate frequently, it can make sense even to use a [`FirstReaction`](@ref) sampler for those events. While FirstReaction doesn't use a complicated data structure to optimize, it can be winningly fast for small numbers of events. Or, for a spatial simulation, you could make separate samplers for separate parts of the landscape, so that each event tends to affect a limited number of samplers.
 #
 # ## First Sampler to Fire
 #
diff --git a/docs/src/index.md b/docs/src/index.md
index 993c766..17225fc 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -33,7 +33,7 @@ The library provides you with samplers. Each sampler has the same interface. Her
 
  * [next](@ref)`(sampler, current time, RNG)` - to ask this library who fires next.
 
-Different samplers are specialized for sampling more quickly and accurately for different applications. For instance, some applications have very few events enabled at once, while some have many. Some applications use only exponentially-distributed events, while some have a mix of distribution types. Because continuous-time discrete event systems can be fire many events, the literature has focused on reducing the number of CPU instructions required to sample each event, and this library reflects that focus.
+Different samplers are specialized for sampling more quickly and accurately for different applications. For instance, some applications have very few events enabled at once, while some have many. Some applications use only exponentially-distributed events, while some have a mix of distribution types. Because continuous-time discrete event systems can fire many events, the literature has focused on reducing the number of CPU instructions required to sample each event, and this library reflects that focus.
 
 ## Why Use This?
 
@@ -46,4 +46,3 @@ If I make a quick simulation for myself, I sample distributions the moment an ev
  * Performance matters (which it often doesn't), so I would like to try different samplers on my problem.
 
  * I want to focus on developing and testing my *model* not my *simulation algorithm*; Fleck is designed and tested with care to ensure correctness.
- 
\ No newline at end of file