fix: spellings to pass spell check

tp832944 · tp832944 · commit af8dbd19e5f7 · 2023-11-15T17:08:28.000Z
Includes renaming some variables to match updated contributor guidelines.
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
@@ -35,7 +35,7 @@ Project maintainers have the right and responsibility to remove, edit, or reject
 Enforcement
 -----------
 
-Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible
+Instances of abusive, harassing, or otherwise unacceptable behaviour may be reported to the community leaders responsible
 for enforcement at [oss@gchq.gov.uk](mailto:oss@gchq.gov.uk). All complaints will be reviewed and investigated promptly and fairly, and will
 result in a response that is deemed necessary and appropriate to the circumstances. The community leaders responsible
 for enforcement are obligated to maintain confidentiality with regard to the reporter of an incident.
diff --git a/README.md b/README.md
@@ -11,7 +11,7 @@ Coreax is a library for **coreset algorithms**, written in [Jax](https://jax.rea
 A coreset algorithm takes a $n \times d$ data set and reduces it to $m \ll n$ points whilst attempting to preserve the statistical properties of the full data set. Some algorithms return the $m$ points with weights, such that importance can be attributed to each point. These are often chosen from the simplex, i.e. such that they are non-negative and sum to 1.
 
 ## Quick example
-Here are $n=10,000$ points drawn from six $2$-D Gaussians. The coreset size, which we set, is $m=100$. Run `examples/weighted_herding.py` to replicate.
+Here are $n=10,000$ points drawn from six $2$-D Gaussian distributions. The coreset size, which we set, is $m=100$. Run `examples/weighted_herding.py` to replicate.
 
 ![](examples/data/coreset_seq/coreset_seq.gif)
 ![](examples/data/random_seq/random_seq.gif)
@@ -82,7 +82,7 @@ coreset = kernel_herding_refine_block(X, m, k):
 ```
 
 ## Stein kernel herding
-We have implemented a version of kernel herding that uses a **Stein kernel**, which targets [kernelised Stein discrepancy (KSD)](https://arxiv.org/abs/1602.03253) rather than MMD. This can often give better integration error in practice, but it can be slower than using a simpler kernel targeting MMD. To use Stein kernel herding, we have to define a continuous approximation to the discerete measure, e.g. using a KDE, or estimate the score function $\nabla \log f_X(\mathbf{x})$ of a continuous PDF from a finite set of samples. In this example, we use a Stein kernel with an inverse multi-quadric base kernel; computing the score function explicitly (score matching coming soon). Again, there are block versions for fitting within GPU memory constraints.
+We have implemented a version of kernel herding that uses a **Stein kernel**, which targets [kernelised Stein discrepancy (KSD)](https://arxiv.org/abs/1602.03253) rather than MMD. This can often give better integration error in practice, but it can be slower than using a simpler kernel targeting MMD. To use Stein kernel herding, we have to define a continuous approximation to the discrete measure, e.g. using a KDE, or estimate the score function $\nabla \log f_X(\mathbf{x})$ of a continuous PDF from a finite set of samples. In this example, we use a Stein kernel with an inverse multi-quadric base kernel; computing the score function explicitly (score matching coming soon). Again, there are block versions for fitting within GPU memory constraints.
 ```python
 from coreax.kernel import stein_kernel_pc_imq_element, rbf_grad_log_f_x
 from coreax.kernel_herding import stein_kernel_herding_block
diff --git a/coreax/approximation.py b/coreax/approximation.py
@@ -283,7 +283,7 @@ def anchor_body(
     Execute main loop of the ANNchor construction.
 
     :param idx: Loop counter
-    :param features: Loop updateables
+    :param features: Loop variables to be updated
     :param data: Original :math:`n \times d` dataset
     :param kernel_function: Vectorised kernel function on pairs `(X,x)`:
         :math:`k: \mathbb{R}^{n \times d} \times \mathbb{R}^d \rightarrow \mathbb{R}^n`
diff --git a/coreax/kernel.py b/coreax/kernel.py
@@ -25,7 +25,7 @@
 from coreax.util import (
     KernelFunction,
     KernelFunctionWithGrads,
-    pdiff,
+    pairwise_diff,
     sq_dist,
     sq_dist_pairwise,
 )
@@ -115,7 +115,7 @@ def grad_rbf_y(
     else:
         gram_matrix = jnp.asarray(gram_matrix)
 
-    distances = pdiff(x_array, y_array)
+    distances = pairwise_diff(x_array, y_array)
 
     return distances * gram_matrix[:, :, None] / bandwidth**2
 
@@ -164,7 +164,7 @@ def grad_pc_imq_y(
         gram_matrix = pc_imq(x_array, y_array, bandwidth)
     else:
         gram_matrix = jnp.asarray(gram_matrix)
-    mq_array = pdiff(x_array, y_array)
+    mq_array = pairwise_diff(x_array, y_array)
 
     return gram_matrix[:, :, None] ** 3 * mq_array / scaling
 
diff --git a/coreax/kernel_herding.py b/coreax/kernel_herding.py
@@ -45,7 +45,7 @@ def greedy_body(
     Execute main loop of greedy kernel herding.
 
     :param i: Loop counter
-    :param val: Loop updatables
+    :param val: Loop variables to be updated
     :param X: Original :math:`n \times d` dataset
     :param k_vec: Vectorised kernel function on pairs `(X,x)`:
         :math:`k: \mathbb{R}^{n \times d} \times \mathbb{R}^d \rightarrow \mathbb{R}^n`
@@ -84,7 +84,7 @@ def stein_greedy_body(
     Execute the main loop of greedy Stein herding.
 
     :param i: Loop counter
-    :param val: Loop updatables
+    :param val: Loop variables to be updated
     :param X: Original :math:`n \times d` dataset
     :param k_vec: Vectorised kernel function on pairs ``(X,x,Y,y)``:
                   :math:`k: \mathbb{R}^{n \times d} \times \mathbb{R}^d \times`
@@ -147,7 +147,7 @@ def kernel_herding_block(
     S = jnp.zeros(n_core, dtype=jnp.int32)
     K = jnp.zeros((n_core, n))
 
-    # Greedly select coreset points
+    # Greedily select coreset points
     body = partial(greedy_body, X=X, k_vec=k_vec, K_mean=K_mean, unique=unique)
     S, K, _ = lax.fori_loop(0, n_core, body, (S, K, K_t))
     Kbar = K.mean(axis=1)
@@ -208,7 +208,7 @@ def stein_kernel_herding_block(
     S = jnp.zeros(n_core, dtype=jnp.int32)
     K = jnp.zeros((n_core, n))
 
-    # Greedly select coreset points
+    # Greedily select coreset points
     body = partial(
         stein_greedy_body,
         X=X,
@@ -226,7 +226,7 @@ def stein_kernel_herding_block(
 
 
 @jit
-def fw_linesearch(arg_x_t: int, K: ArrayLike, Ek: ArrayLike) -> Array:
+def fw_line_search(arg_x_t: int, K: ArrayLike, Ek: ArrayLike) -> Array:
     r"""
     Execute Frank-Wolfe line search.
 
@@ -254,7 +254,7 @@ def herding_body(
     Execute body of default herding.
 
     :param i: Loop counter
-    :param val: Loop updatables
+    :param val: Loop variables to be updated
     :return: Coreset indices, objective, Gram matrix mean and Gram matrix
     """
     S, objective, Kbar, K = val
@@ -277,7 +277,7 @@ def greedy_herding_body(
     Execute body of Stein thinning.
 
     :param i: Loop counter
-    :param val: Loop updatables
+    :param val: Loop variables to be updated
     :return: Coreset indices, objective, Gram matrix mean and Gram matrix
     """
     S, objective, Kbar, K = val
@@ -300,7 +300,7 @@ def fw_herding_body(
     Execute body of Frank-Wolfe herding.
 
     :param i: Loop counter
-    :param val: Loop updatables
+    :param val: Loop variables to be updated
     :return: Coreset indices, objective, Gram matrix mean and Gram matrix
     """
     S, objective, Kbar, K = val
@@ -310,7 +310,7 @@ def fw_herding_body(
     K = jnp.asarray(K)
     j = objective.argmax()
     S = S.at[i].set(j)
-    rho = fw_linesearch(S[i], K, Kbar)
+    rho = fw_line_search(S[i], K, Kbar)
     objective = objective * (1 - rho) + (Kbar - K[S[i]]) * rho
     return S, objective, Kbar, K
 
@@ -408,8 +408,8 @@ def scalable_herding(
     else:
         # build a kdtree
         kdtree = KDTree(X, leaf_size=size)
-        _, nindices, nodes, _ = kdtree.get_arrays()
-        new_indices = [jnp.array(nindices[nd[0] : nd[1]]) for nd in nodes if nd[2]]
+        _, node_indices, nodes, _ = kdtree.get_arrays()
+        new_indices = [jnp.array(node_indices[nd[0] : nd[1]]) for nd in nodes if nd[2]]
         split_data = [X[n] for n in new_indices]
         # k = len(split_data)
         # print(n, k, n // k)
diff --git a/coreax/refine.py b/coreax/refine.py
@@ -88,7 +88,7 @@ def refine_body(
     Execute main loop of the refine method, :math:`S \rightarrow x`.
 
     :param i: Loop counter
-    :param S: Loop updatables
+    :param S: Loop variables to be updated
     :param x: Original :math:`n \times d` dataset
     :param K_mean: Mean vector over rows for the Gram matrix, a :math:`1 \times n` array
     :param K_diag: Gram matrix diagonal, a :math:`1 \times n` array
@@ -209,7 +209,7 @@ def refine_rand_body(
     Execute main loop of the random refine method.
 
     :param i: Loop counter
-    :param val: Loop updatables
+    :param val: Loop variables to be updated
     :param x: Original :math:`n \times d` dataset
     :param n_cand: Number of candidates for comparison
     :param K_mean: Mean vector over rows for the Gram matrix, a :math:`1 \times n` array
@@ -228,7 +228,7 @@ def refine_rand_body(
     cand = random.randint(subkey, (n_cand,), 0, len(x))
     # cand = random.choice(subkey, len(x), (n_cand,), replace=False)
     comps = comparison_cand(S[i], cand, S, x, K_mean, K_diag, k_pairwise, k_vec)
-    S = lax.cond(jnp.any(comps > 0), change, nochange, i, S, cand, comps)
+    S = lax.cond(jnp.any(comps > 0), change, no_change, i, S, cand, comps)
 
     return key, S
 
@@ -296,7 +296,7 @@ def change(i: int, S: ArrayLike, cand: ArrayLike, comps: ArrayLike) -> Array:
 
 
 @jit
-def nochange(i: int, S: ArrayLike, cand: ArrayLike, comps: ArrayLike) -> Array:
+def no_change(i: int, S: ArrayLike, cand: ArrayLike, comps: ArrayLike) -> Array:
     r"""
     Leave ``S`` unchanged.
 
@@ -373,7 +373,7 @@ def refine_rev_body(
     Execute main loop of the refine method, :math:`x \rightarrow S`.
 
     :param i: Loop counter
-    :param S: Loop updatables
+    :param S: Loop variables to be updated
     :param x: Original :math:`n \times d` dataset
     :param K_mean: Mean vector over rows for the Gram matrix, a :math:`1 \times n` array
     :param K_diag: Gram matrix diagonal, a :math:`1 \times n` array
@@ -384,7 +384,7 @@ def refine_rev_body(
     :returns: Updated loop variables ``S``
     """
     comps = comparison_rev(i, S, x, K_mean, K_diag, k_pairwise, k_vec)
-    S = lax.cond(jnp.any(comps > 0), change_rev, nochange_rev, i, S, comps)
+    S = lax.cond(jnp.any(comps > 0), change_rev, no_change_rev, i, S, comps)
 
     return S
 
@@ -447,7 +447,7 @@ def change_rev(i: int, S: ArrayLike, comps: ArrayLike) -> Array:
 
 
 @jit
-def nochange_rev(i: int, S: ArrayLike, comps: ArrayLike) -> Array:
+def no_change_rev(i: int, S: ArrayLike, comps: ArrayLike) -> Array:
     r"""
     Leave ``S`` unchanged.
 
diff --git a/coreax/score_matching.py b/coreax/score_matching.py
@@ -159,7 +159,7 @@ def noise_conditional_loop_body(
     Sum objective function with noise perturbations.
 
     Inputs are perturbed by Gaussian random noise to improve performance of score
-    matching. See :cite:p:`improvedsgm` for details.
+    matching. See :cite:p:`improved_sgm` for details.
 
     :param i: Loop index
     :param obj: Running objective, i.e. the current partial sum
@@ -226,7 +226,7 @@ def loss(params):
 
 def sliced_score_matching(
     X: ArrayLike,
-    rgenerator: Callable,
+    rand_generator: Callable,
     noise_conditioning: bool = True,
     use_analytic: bool = False,
     M: int = 1,
@@ -246,7 +246,7 @@ def sliced_score_matching(
     the score function. Alternative network architectures can be considered.
 
     :param X: The :math:`n \times d` data vectors
-    :param rgenerator: Distribution sampler (key, shape, dtype) :math:`\rightarrow`
+    :param rand_generator: Distribution sampler (key, shape, dtype) :math:`\rightarrow`
         :class:`~jax.Array`, e.g. distributions in :class:`~jax.random`
     :param noise_conditioning: Use the noise conditioning version of score matching,
         defaults to True
@@ -255,7 +255,7 @@ def sliced_score_matching(
     :param M: The number of random vectors to use per data vector, defaults to 1
     :param lr: Optimiser learning rate, defaults to 1e-3
     :param epochs: Epochs for training, defaults to 10
-    :param batch_size: Size of minibatch, defaults to 64
+    :param batch_size: Size of mini-batch, defaults to 64
     :param hidden_dim: The ScoreNetwork hidden dimension, defaults to 128
     :param optimiser: The optax optimiser to use, defaults to :func:`~optax.adamw`
     :param L: Number of noise models to use in noise conditional score matching,
@@ -280,7 +280,7 @@ def sliced_score_matching(
 
     # random vector setup
     k1, k2 = random.split(random.PRNGKey(0))
-    V = rgenerator(k1, (n, M, d), dtype=float)
+    V = rand_generator(k1, (n, M, d), dtype=float)
 
     # training setup
     state = create_train_state(sn, k2, lr, d, optimiser)
diff --git a/coreax/util.py b/coreax/util.py
@@ -97,7 +97,7 @@ def diff(x: ArrayLike, y: ArrayLike) -> Array:
 
 
 @jit
-def pdiff(x_array: ArrayLike, y_array: ArrayLike) -> Array:
+def pairwise_diff(x_array: ArrayLike, y_array: ArrayLike) -> Array:
     r"""
     Calculate efficient pairwise difference between two arrays of vectors.
 
diff --git a/coreax/weights.py b/coreax/weights.py
@@ -30,7 +30,7 @@ def calculate_BQ_weights(
     Calculate weights from Sequential Bayesian Quadrature (SBQ).
 
     References for this technique can be found in
-    :cite:p:`huszar2016optimallyweighted`. These are equivalent to the unconstrained
+    :cite:p:`huszar2016optimally`. These are equivalent to the unconstrained
     weighted maximum mean discrepancy (MMD) optimum.
 
     :param x: The original :math:`n \times d` data
diff --git a/documentation/source/references.bib b/documentation/source/references.bib
@@ -7,7 +7,7 @@ @misc{chatalic2022nystrom
       primaryClass={{stat.ML}},
 }
 
-@inproceedings{improvedsgm,
+@inproceedings{improved_sgm,
   title={Improved techniques for training score-based generative models},
   author={Song, Yang and Ermon, Stefano},
   booktitle={{Advances in Neural Information Processing Systems}},
@@ -25,7 +25,7 @@ @inproceedings{ssm
   organization={PMLR}
 }
 
-@misc{huszar2016optimallyweighted,
+@misc{huszar2016optimally,
       title={{Optimally-Weighted Herding is Bayesian Quadrature}},
       author={Huszar, Ferenc and Duvenaud, David},
       year={2016},
diff --git a/examples/david.py b/examples/david.py
@@ -103,7 +103,7 @@ def main(
 
     print("Choosing random subset...")
     # choose a random subset of C points from the original image
-    rpoints = np.random.choice(n, C, replace=False)
+    rand_points = np.random.choice(n, C, replace=False)
 
     # define a reference kernel to use for comparisons of MMD. We'll use an RBF
     def k(x, y):
@@ -113,7 +113,7 @@ def k(x, y):
     m = mmd_block(X, X[coreset], k, max_size=1000)
 
     # compute the MMD between X and the random sample
-    rm = mmd_block(X, X[rpoints], k, max_size=1000).item()
+    rm = mmd_block(X, X[rand_points], k, max_size=1000).item()
 
     # print the MMDs
     print("Random MMD")
@@ -148,9 +148,9 @@ def k(x, y):
     # plot the image of randomly sampled points
     plt.subplot(1, 3, 3)
     plt.scatter(
-        X[rpoints, 1],
-        -X[rpoints, 0],
-        c=X[rpoints, 2],
+        X[rand_points, 1],
+        -X[rand_points, 0],
+        c=X[rand_points, 2],
         s=1.0,
         cmap="gray",
         marker="h",
diff --git a/examples/pounce.py b/examples/pounce.py
@@ -90,9 +90,9 @@ def k(x, y):
     m = mmd_block(X, X[coreset], k, max_size=1000)
 
     # get a random sample of points to compare against
-    rsample = np.random.choice(N, size=C, replace=False)
+    rand_sample = np.random.choice(N, size=C, replace=False)
     # compute the MMD between X and the random sample
-    rm = mmd_block(X, X[rsample], k, max_size=1000).item()
+    rm = mmd_block(X, X[rand_sample], k, max_size=1000).item()
 
     # print the MMDs
     print(f"Random MMD: {rm}")
diff --git a/examples/pounce_sm.py b/examples/pounce_sm.py
@@ -32,7 +32,7 @@ def main(directory: Path = Path("../examples/data/pounce")) -> tuple[float, floa
     Run the 'pounce' example for video sampling with score matching.
 
     Take a video of a pouncing cat, apply PCA and then generate a coreset using
-    score matching, in which we train a neural network to approximate the score functon
+    score matching, in which we train a neural network to approximate the score function
     of the underlying distribution. Compare the result from this to a coreset generated
     via uniform random sampling. Coreset quality is measured using maximum mean
     discrepancy (MMD).
@@ -90,9 +90,9 @@ def k(x, y):
     m = mmd_block(X, X[coreset], k, max_size=1000)
 
     # get a random sample of points to compare against
-    rsample = np.random.choice(N, size=C, replace=False)
+    rand_sample = np.random.choice(N, size=C, replace=False)
     # compute the MMD between X and the random sample
-    rm = mmd_block(X, X[rsample], k, max_size=1000).item()
+    rm = mmd_block(X, X[rand_sample], k, max_size=1000).item()
 
     # print the MMDs
     print(f"Random MMD: {rm}")
diff --git a/examples/weighted_herding.py b/examples/weighted_herding.py
@@ -81,7 +81,7 @@ def k(x, y):
     )
 
     # get a random sample of points to compare against
-    rsample = np.random.choice(N, size=C, replace=False)
+    rand_sample = np.random.choice(N, size=C, replace=False)
 
     # the weighted bool turns the coreset weights on or off. If on, a quadratic program
     # is invoked to solve the weights' vector. This buys some increase in integration
@@ -103,7 +103,7 @@ def k(x, y):
     m = m.item()
 
     # compute the MMD between X and the random sample
-    rm = mmd_block(X, X[rsample], k, max_size=1000).item()
+    rm = mmd_block(X, X[rand_sample], k, max_size=1000).item()
 
     # nudge the weights to avoid negative entries for plotting
     if weights.min() < 0:
@@ -117,7 +117,7 @@ def k(x, y):
     plt.show()
 
     plt.scatter(X[:, 0], X[:, 1], s=2.0, alpha=0.1)
-    plt.scatter(X[rsample, 0], X[rsample, 1], s=10, color="red")
+    plt.scatter(X[rand_sample, 0], X[rand_sample, 1], s=10, color="red")
     plt.title("Random, m=%d, MMD=%.6f" % (C, rm))
     plt.axis("off")
 
diff --git a/examples/weighted_herding_sm.py b/examples/weighted_herding_sm.py
diff --git a/tests/integration/test_examples.py b/tests/integration/test_examples.py
diff --git a/tests/unit/test_score_matching.py b/tests/unit/test_score_matching.py
diff --git a/tests/unit/test_util.py b/tests/unit/test_util.py