diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index eb9fb9f..aad17ea 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,6 +22,14 @@ repos:
     -   id: flake8
         exclude: ^causalnex/ebaybbn
 
+-   repo: https://github.com/codespell-project/codespell
+    rev: v2.2.5
+    hooks:
+    -   id: codespell
+        args:
+            - --skip=docs/source/*
+            - --ignore-words-list=fro,jaques,fpr,te
+
 -   repo: https://github.com/pre-commit/mirrors-isort
     rev: v4.3.21
     hooks:
diff --git a/causalnex/discretiser/discretiser_strategy.py b/causalnex/discretiser/discretiser_strategy.py
index 480cca1..41e4609 100644
--- a/causalnex/discretiser/discretiser_strategy.py
+++ b/causalnex/discretiser/discretiser_strategy.py
@@ -245,7 +245,7 @@ def __init__(
 
         Args:
             min_depth: The minimum depth of the interval splitting.
-            min_split: The minmum size to split a bin
+            min_split: The minimum size to split a bin
             dtype: The type of the array returned by the `transform()` method
             **dlp_args: keyword arguments, which are parameters used for `mdlp.discretization.MDLP`
         Raises:
diff --git a/causalnex/ebaybbn/bbn.py b/causalnex/ebaybbn/bbn.py
index f85ddea..8c9455a 100644
--- a/causalnex/ebaybbn/bbn.py
+++ b/causalnex/ebaybbn/bbn.py
@@ -76,7 +76,7 @@ def __init__(self, nodes_dict, name=None, domains={}):
         # variable it 'introduced'.
         # Note that we cannot record
         # this duing Node instantiation
-        # becuase at that point we do
+        # because at that point we do
         # not yet know *which* of the
         # variables in the argument
         # list is the one being modeled
@@ -220,7 +220,7 @@ def initialize_potentials(self, assignments, bbn, evidence={}):
 
         # Step 2: Note that in H&D the assignments are
         # done as part of step 2 however we have
-        # seperated the assignment algorithm out and
+        # separated the assignment algorithm out and
         # done these prior to step 1.
         # Now for each assignment we want to
         # generate a truth-table from the
@@ -302,7 +302,7 @@ def assign_clusters(self, bbn):
                 # once and once only. The example
                 # in H&D just happens to be a clique
                 # that f_a could have been assigned
-                # to but wasnt presumably because
+                # to but wasn't presumably because
                 # it got assigned somewhere else.
                 pass
                 # continue
@@ -313,7 +313,7 @@ def assign_clusters(self, bbn):
             family = set(args)
             # At this point we need to know which *variable*
             # a BBN node represents. Up to now we have
-            # not *explicitely* specified this, however
+            # not *explicitly* specified this, however
             # we have been following some conventions
             # so we could just use this convention for
             # now. Need to come back to this to
@@ -426,8 +426,8 @@ def marginal(self, bbn_node):
         for node in self.clique_nodes:
             if bbn_node.name in [n.name for n in node.clique.nodes]:
                 containing_nodes.append(node)
-                # In theory it doesnt matter which one we
-                # use so we could bale out after we
+                # In theory it doesn't matter which one we
+                # use so we could able out after we
                 # find the first one
                 # TODO: With some better indexing we could
                 # avoid searching for this node every time...
@@ -540,7 +540,7 @@ def pass_message(self, target):
 
         logging.debug(" Send the summed marginals to the target: %s ", str(sepset_node))
 
-        # Step 2 absorbtion
+        # Step 2 absorption
         self.absorb(sepset_node, target)
 
     def project(self, sepset_node):
@@ -572,7 +572,7 @@ def absorb(self, sepset, target):
         # Assign a new potential tt to
         # Y (the target)
         logging.debug(
-            "Absorb potentails from sepset node %s into clique %s",
+            "Absorb potentials from sepset node %s into clique %s",
             sepset.name,
             target.name,
         )
@@ -650,7 +650,7 @@ def insert(self, forest):
         cliques are in different trees,
         means that effectively we are
         collapsing the two trees into
-        one. We will explicitely perform
+        one. We will explicitly perform
         this collapse by adding the
         sepset node into the tree
         and adding edges between itself
diff --git a/causalnex/network/network.py b/causalnex/network/network.py
index 4ff3738..4467b67 100644
--- a/causalnex/network/network.py
+++ b/causalnex/network/network.py
@@ -402,7 +402,7 @@ def fit_cpds(
                                  regardless of variable cardinality;
                          - "BDeu": equivalent of using Dirichlet and using uniform 'pseudo_counts' of
                                    `equivalent_sample_size / (node_cardinality * np.prod(parents_cardinalities))`
-                                   for each node. Use equivelant_sample_size.
+                                   for each node. Use equivalent_sample_size.
             equivalent_sample_size: used by BDeu bayes_prior to compute pseudo_counts.
 
         Returns:
@@ -463,7 +463,7 @@ def fit_node_states_and_cpds(
                                  regardless of variable cardinality;
                          - "BDeu": equivalent of using dirichlet and using uniform 'pseudo_counts' of
                                    `equivalent_sample_size / (node_cardinality * np.prod(parents_cardinalities))`
-                                   for each node. Use equivelant_sample_size.
+                                   for each node. Use equivalent_sample_size.
             equivalent_sample_size: used by BDeu bayes_prior to compute pseudo_counts.
 
         Returns:
diff --git a/causalnex/structure/data_generators/wrappers.py b/causalnex/structure/data_generators/wrappers.py
index f8abac1..2077267 100644
--- a/causalnex/structure/data_generators/wrappers.py
+++ b/causalnex/structure/data_generators/wrappers.py
@@ -620,7 +620,7 @@ def gen_stationary_dyn_net_and_df(  # pylint: disable=R0913, R0914
         w_decay: exponent of weights decay for slices that are farther apart. Default is 1.0, which implies no decay
         sem_type: {linear-gauss,linear-exp,linear-gumbel}
         noise_scale: scale parameter of noise distribution in linear SEM
-        max_data_gen_trials: maximun number of attempts until obtaining a seemingly stationary model
+        max_data_gen_trials: maximum number of attempts until obtaining a seemingly stationary model
     Returns:
         Tuple with:
         - the model created,as a Structure model
diff --git a/causalnex/structure/pytorch/notears.py b/causalnex/structure/pytorch/notears.py
index 68e367c..a229822 100644
--- a/causalnex/structure/pytorch/notears.py
+++ b/causalnex/structure/pytorch/notears.py
@@ -344,7 +344,7 @@ def from_pandas(
         **kwargs,
     )
 
-    # set comprehension to ensure only unique dist types are extraced
+    # set comprehension to ensure only unique dist types are extracted
     # NOTE: this prevents double-renaming caused by the same dist type used on expanded columns
     unique_dist_types = {node[1]["dist_type"] for node in g.nodes(data=True)}
     # use the dist types to update the idx_col mapping
@@ -375,7 +375,7 @@ def from_pandas(
         node_name = idx_col_expanded[node[0]]
         sm.nodes[node_name]["bias"] = node[1]["bias"]
 
-    # recover and preseve the node dist_types
+    # recover and preserve the node dist_types
     for node_data in g.nodes(data=True):
         node_name = idx_col_expanded[node_data[0]]
         sm.nodes[node_name]["dist_type"] = node_data[1]["dist_type"]
diff --git a/causalnex/structure/pytorch/sklearn/_base.py b/causalnex/structure/pytorch/sklearn/_base.py
index 5eb0d51..f69125d 100644
--- a/causalnex/structure/pytorch/sklearn/_base.py
+++ b/causalnex/structure/pytorch/sklearn/_base.py
@@ -82,7 +82,7 @@ def __init__(
             alpha: l1 loss weighting. When using nonlinear layers this is only applied
             to the first layer.
 
-            beta: l2 loss weighting. Applied across all layers. Reccomended to use this
+            beta: l2 loss weighting. Applied across all layers. Recommended to use this
             when fitting nonlinearities.
 
             fit_intercept: Whether to fit an intercept in the structure model
@@ -111,7 +111,7 @@ def __init__(
 
             standardize: Whether to standardize the X and y variables before fitting.
             The L-BFGS algorithm used to fit the underlying NOTEARS works best on data
-            all of the same scale so this parameter is reccomended.
+            all of the same scale so this parameter is recommended.
 
             notears_mlp_kwargs: Additional arguments for the NOTEARS MLP model.
 
@@ -160,7 +160,7 @@ def __init__(
         self.target_dist_type = target_dist_type
         self.notears_mlp_kwargs = notears_mlp_kwargs
 
-        # sklearn wrapper paramters
+        # sklearn wrapper parameters
         self.dependent_target = dependent_target
         self.enforce_dag = enforce_dag
         self.standardize = standardize
diff --git a/causalnex/utils/pgmpy_utils.py b/causalnex/utils/pgmpy_utils.py
index f83c791..39b987e 100644
--- a/causalnex/utils/pgmpy_utils.py
+++ b/causalnex/utils/pgmpy_utils.py
@@ -113,7 +113,7 @@ def cpd_multiplication(
 
     Args:
         cpds: cpds to multiply
-        normalize: wether to normalise the columns, so that each column sums to 1
+        normalize: whether to normalise the columns, so that each column sums to 1
 
     Returns:
         Pandas dataframe containing the resulting product, looking like a cpd
diff --git a/devel-gpu.Dockerfile b/devel-gpu.Dockerfile
index 32cc996..6a22f02 100644
--- a/devel-gpu.Dockerfile
+++ b/devel-gpu.Dockerfile
@@ -4,7 +4,7 @@ ENV DEBIAN_FRONTEND=noninteractive
 
 RUN apt-get update -y && apt install -y python3.8 python3-pip
 RUN ln -s $(which python3) /usr/local/bin/python
-# Copy all files to container as intalling .[pytorch] requires setup.py, which requires other files
+# Copy all files to container as installing .[pytorch] requires setup.py, which requires other files
 COPY . /tmp
 WORKDIR /tmp
 
diff --git a/tests/ebaybbn/test_ebaybbn.py b/tests/ebaybbn/test_ebaybbn.py
index 17a7b14..cc605da 100644
--- a/tests/ebaybbn/test_ebaybbn.py
+++ b/tests/ebaybbn/test_ebaybbn.py
@@ -287,7 +287,7 @@ def priority_func_override(node):
 
     def test_initialize_potentials(self, huang_darwiche_jt, huang_darwiche_dag):
         # Seems like there can be multiple assignments so
-        # for this test we will set the assignments explicitely
+        # for this test we will set the assignments explicitly
         cliques = {node.name: node for node in huang_darwiche_jt.nodes}
         bbn_nodes = {node.name: node for node in huang_darwiche_dag.nodes}
         assignments = {
diff --git a/tests/estimator/test_em.py b/tests/estimator/test_em.py
index 585fbd8..f8cbf49 100644
--- a/tests/estimator/test_em.py
+++ b/tests/estimator/test_em.py
@@ -187,7 +187,7 @@ def get_correct_cpds(
 class TestEMJobs:
     @pytest.mark.parametrize("n_jobs", [1, 3, -2])
     def test_em_no_missing_data(self, n_jobs):
-        """If all data for the latent variable is provided, the result is the same as runing bn.fit_cpds"""
+        """If all data for the latent variable is provided, the result is the same as running bn.fit_cpds"""
         df, sm, node_states, true_lv_values = naive_bayes_plus_parents(
             percentage_not_missing=1
         )
diff --git a/tests/structure/data_generators/test_core.py b/tests/structure/data_generators/test_core.py
index 76d7eb1..efc851a 100644
--- a/tests/structure/data_generators/test_core.py
+++ b/tests/structure/data_generators/test_core.py
@@ -331,7 +331,7 @@ def test_mixed_type_independence(
             seed=seed,
         )
 
-        atol = 0.02  # at least 2% difference bewteen joint & factored!
+        atol = 0.02  # at least 2% difference between joint & factored!
         # 1. dependent links
         # 0 -> 1 (we look at the class with the highest deviation from uniform
         # to avoid small values)
diff --git a/tests/structure/data_generators/test_wrappers.py b/tests/structure/data_generators/test_wrappers.py
index de144f7..61281a2 100644
--- a/tests/structure/data_generators/test_wrappers.py
+++ b/tests/structure/data_generators/test_wrappers.py
@@ -427,7 +427,7 @@ def test_dataframe(self, graph, distribution, noise_std, intercept, seed, kernel
     @pytest.mark.parametrize("seed", (10, 20, 30))
     def test_independence(self, graph_gen, seed, num_nodes):
         """
-        test whether the relation is accurate, implicitely tests sequence of
+        test whether the relation is accurate, implicitly tests sequence of
         nodes.
         """
 
@@ -633,7 +633,7 @@ def test_intercept(self, distribution, n_categories, noise_scale):
     @pytest.mark.parametrize("distribution", ["probit", "logit"])
     def test_independence(self, graph_gen, seed, num_nodes, n_categories, distribution):
         """
-        test whether the relation is accurate, implicitely tests sequence of
+        test whether the relation is accurate, implicitly tests sequence of
         nodes.
         """
         sm = graph_gen(num_nodes=num_nodes, seed=seed, weight=None)
diff --git a/tests/structure/test_dist_type.py b/tests/structure/test_dist_type.py
index a577ed8..b4dffb8 100644
--- a/tests/structure/test_dist_type.py
+++ b/tests/structure/test_dist_type.py
@@ -176,7 +176,7 @@ def test_preprocess_X_expanded_cols(self, dist_type, X):
         # check size of column expansion
         assert X.shape[1] == 6
 
-        # check that the correct indecies are pulled out
+        # check that the correct indices are pulled out
         assert dist_types[0].idx_group == [0, 2, 3]
         assert dist_types[1].idx_group == [1, 4, 5]
         # test that the expanded get_columns works
diff --git a/tests/structure/test_dynotears.py b/tests/structure/test_dynotears.py
index 31094e6..0ff60b3 100644
--- a/tests/structure/test_dynotears.py
+++ b/tests/structure/test_dynotears.py
@@ -266,7 +266,7 @@ def test_edges_contain_weight(self, data_dynotears_p2):
         )
 
     def test_certain_relationships_get_near_certain_weight(self):
-        """If a == b always, ther should be an edge a->b or b->a with coefficient close to one"""
+        """If a == b always, there should be an edge a->b or b->a with coefficient close to one"""
 
         np.random.seed(17)
         data = pd.DataFrame(
@@ -281,7 +281,7 @@ def test_certain_relationships_get_near_certain_weight(self):
         assert 0.99 < edge <= 1.01
 
     def test_inverse_relationships_get_negative_weight(self):
-        """If a == -b always, ther should be an edge a->b or b->a with coefficient close to minus one"""
+        """If a == -b always, there should be an edge a->b or b->a with coefficient close to minus one"""
 
         np.random.seed(17)
         data = pd.DataFrame(
@@ -565,7 +565,7 @@ def test_edges_contain_weight(self, data_dynotears_p3):
         )
 
     def test_certain_relationships_get_near_certain_weight(self):
-        """If a == b always, ther should be an edge a->b or b->a with coefficient close to one"""
+        """If a == b always, there should be an edge a->b or b->a with coefficient close to one"""
 
         np.random.seed(17)
         data = pd.DataFrame(
diff --git a/tests/structure/test_notears.py b/tests/structure/test_notears.py
index 1daffde..887ee7f 100644
--- a/tests/structure/test_notears.py
+++ b/tests/structure/test_notears.py
@@ -716,7 +716,7 @@ def test_non_negativity_constraint(self, train_data_idx):
         """
         The optimisation in notears lasso involves reshaping the initial similarity matrix
         into two strictly positive matrixes (w+ and w-) and imposing a non negativity constraint
-        to the solver. We test here if these two contraints are imposed.
+        to the solver. We test here if these two constraints are imposed.
 
         We check if:
         (1) bounds impose non negativity constraint
diff --git a/tests/structure/test_pytorch_notears.py b/tests/structure/test_pytorch_notears.py
index c8044da..1f970a9 100644
--- a/tests/structure/test_pytorch_notears.py
+++ b/tests/structure/test_pytorch_notears.py
@@ -226,7 +226,7 @@ def test_check_array(self, data):
             from_pandas(pd.DataFrame(data=data, columns=["a"]))
 
     def test_f1score_generated_binary(self):
-        """Binary strucutre learned should have good f1 score"""
+        """Binary structure learned should have good f1 score"""
         np.random.seed(10)
         sm = generate_structure(5, 2.0)
         df = generate_binary_dataframe(
@@ -255,7 +255,7 @@ def test_f1score_generated_binary(self):
         assert f1_score > 0.8
 
     def test_f1score_generated_poisson(self):
-        """Poisson strucutre learned should have good f1 score"""
+        """Poisson structure learned should have good f1 score"""
         np.random.seed(10)
         sm = generate_structure(5, 3.0)
         df = generate_count_dataframe(
@@ -445,7 +445,7 @@ def test_non_negativity_constraint(self, train_data_idx):
         """
         The optimisation in notears lasso involves reshaping the initial similarity matrix
         into two strictly positive matrixes (w+ and w-) and imposing a non negativity constraint
-        to the solver. We test here if these two contraints are imposed.
+        to the solver. We test here if these two constraints are imposed.
 
         We check if:
         (1) bounds impose non negativity constraint
diff --git a/tests/test_bayesiannetwork.py b/tests/test_bayesiannetwork.py
index 2c0110b..c4d0a96 100644
--- a/tests/test_bayesiannetwork.py
+++ b/tests/test_bayesiannetwork.py
@@ -815,7 +815,7 @@ def test_set_bad_cpd(self, bn, bad_cpd):
 
         with pytest.raises(
             ValueError,
-            match=r"Sum or integral of conditional probabilites for node b is not equal to 1.",
+            match=r"Sum or integral of conditional probabilities for node b is not equal to 1.",
         ):
             bn.set_cpd("b", bad_cpd)