Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix typos and add codespell pre-commit hook #214

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ repos:
- id: flake8
exclude: ^causalnex/ebaybbn

- repo: https://github.com/codespell-project/codespell
rev: v2.2.5
hooks:
- id: codespell
args:
- --skip=docs/source/*
- --ignore-words-list=fro,jaques,fpr,te

- repo: https://github.com/pre-commit/mirrors-isort
rev: v4.3.21
hooks:
Expand Down
2 changes: 1 addition & 1 deletion causalnex/discretiser/discretiser_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def __init__(

Args:
min_depth: The minimum depth of the interval splitting.
min_split: The minmum size to split a bin
min_split: The minimum size to split a bin
dtype: The type of the array returned by the `transform()` method
**dlp_args: keyword arguments, which are parameters used for `mdlp.discretization.MDLP`
Raises:
Expand Down
18 changes: 9 additions & 9 deletions causalnex/ebaybbn/bbn.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(self, nodes_dict, name=None, domains={}):
# variable it 'introduced'.
# Note that we cannot record
# this duing Node instantiation
# becuase at that point we do
# because at that point we do
# not yet know *which* of the
# variables in the argument
# list is the one being modeled
Expand Down Expand Up @@ -220,7 +220,7 @@ def initialize_potentials(self, assignments, bbn, evidence={}):

# Step 2: Note that in H&D the assignments are
# done as part of step 2 however we have
# seperated the assignment algorithm out and
# separated the assignment algorithm out and
# done these prior to step 1.
# Now for each assignment we want to
# generate a truth-table from the
Expand Down Expand Up @@ -302,7 +302,7 @@ def assign_clusters(self, bbn):
# once and once only. The example
# in H&D just happens to be a clique
# that f_a could have been assigned
# to but wasnt presumably because
# to but wasn't presumably because
# it got assigned somewhere else.
pass
# continue
Expand All @@ -313,7 +313,7 @@ def assign_clusters(self, bbn):
family = set(args)
# At this point we need to know which *variable*
# a BBN node represents. Up to now we have
# not *explicitely* specified this, however
# not *explicitly* specified this, however
# we have been following some conventions
# so we could just use this convention for
# now. Need to come back to this to
Expand Down Expand Up @@ -426,8 +426,8 @@ def marginal(self, bbn_node):
for node in self.clique_nodes:
if bbn_node.name in [n.name for n in node.clique.nodes]:
containing_nodes.append(node)
# In theory it doesnt matter which one we
# use so we could bale out after we
# In theory it doesn't matter which one we
# use so we could able out after we
# find the first one
# TODO: With some better indexing we could
# avoid searching for this node every time...
Expand Down Expand Up @@ -540,7 +540,7 @@ def pass_message(self, target):

logging.debug(" Send the summed marginals to the target: %s ", str(sepset_node))

# Step 2 absorbtion
# Step 2 absorption
self.absorb(sepset_node, target)

def project(self, sepset_node):
Expand Down Expand Up @@ -572,7 +572,7 @@ def absorb(self, sepset, target):
# Assign a new potential tt to
# Y (the target)
logging.debug(
"Absorb potentails from sepset node %s into clique %s",
"Absorb potentials from sepset node %s into clique %s",
sepset.name,
target.name,
)
Expand Down Expand Up @@ -650,7 +650,7 @@ def insert(self, forest):
cliques are in different trees,
means that effectively we are
collapsing the two trees into
one. We will explicitely perform
one. We will explicitly perform
this collapse by adding the
sepset node into the tree
and adding edges between itself
Expand Down
4 changes: 2 additions & 2 deletions causalnex/network/network.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ def fit_cpds(
regardless of variable cardinality;
- "BDeu": equivalent of using Dirichlet and using uniform 'pseudo_counts' of
`equivalent_sample_size / (node_cardinality * np.prod(parents_cardinalities))`
for each node. Use equivelant_sample_size.
for each node. Use equivalent_sample_size.
equivalent_sample_size: used by BDeu bayes_prior to compute pseudo_counts.

Returns:
Expand Down Expand Up @@ -463,7 +463,7 @@ def fit_node_states_and_cpds(
regardless of variable cardinality;
- "BDeu": equivalent of using dirichlet and using uniform 'pseudo_counts' of
`equivalent_sample_size / (node_cardinality * np.prod(parents_cardinalities))`
for each node. Use equivelant_sample_size.
for each node. Use equivalent_sample_size.
equivalent_sample_size: used by BDeu bayes_prior to compute pseudo_counts.

Returns:
Expand Down
2 changes: 1 addition & 1 deletion causalnex/structure/data_generators/wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ def gen_stationary_dyn_net_and_df( # pylint: disable=R0913, R0914
w_decay: exponent of weights decay for slices that are farther apart. Default is 1.0, which implies no decay
sem_type: {linear-gauss,linear-exp,linear-gumbel}
noise_scale: scale parameter of noise distribution in linear SEM
max_data_gen_trials: maximun number of attempts until obtaining a seemingly stationary model
max_data_gen_trials: maximum number of attempts until obtaining a seemingly stationary model
Returns:
Tuple with:
- the model created,as a Structure model
Expand Down
4 changes: 2 additions & 2 deletions causalnex/structure/pytorch/notears.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def from_pandas(
**kwargs,
)

# set comprehension to ensure only unique dist types are extraced
# set comprehension to ensure only unique dist types are extracted
# NOTE: this prevents double-renaming caused by the same dist type used on expanded columns
unique_dist_types = {node[1]["dist_type"] for node in g.nodes(data=True)}
# use the dist types to update the idx_col mapping
Expand Down Expand Up @@ -375,7 +375,7 @@ def from_pandas(
node_name = idx_col_expanded[node[0]]
sm.nodes[node_name]["bias"] = node[1]["bias"]

# recover and preseve the node dist_types
# recover and preserve the node dist_types
for node_data in g.nodes(data=True):
node_name = idx_col_expanded[node_data[0]]
sm.nodes[node_name]["dist_type"] = node_data[1]["dist_type"]
Expand Down
6 changes: 3 additions & 3 deletions causalnex/structure/pytorch/sklearn/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def __init__(
alpha: l1 loss weighting. When using nonlinear layers this is only applied
to the first layer.

beta: l2 loss weighting. Applied across all layers. Reccomended to use this
beta: l2 loss weighting. Applied across all layers. Recommended to use this
when fitting nonlinearities.

fit_intercept: Whether to fit an intercept in the structure model
Expand Down Expand Up @@ -111,7 +111,7 @@ def __init__(

standardize: Whether to standardize the X and y variables before fitting.
The L-BFGS algorithm used to fit the underlying NOTEARS works best on data
all of the same scale so this parameter is reccomended.
all of the same scale so this parameter is recommended.

notears_mlp_kwargs: Additional arguments for the NOTEARS MLP model.

Expand Down Expand Up @@ -160,7 +160,7 @@ def __init__(
self.target_dist_type = target_dist_type
self.notears_mlp_kwargs = notears_mlp_kwargs

# sklearn wrapper paramters
# sklearn wrapper parameters
self.dependent_target = dependent_target
self.enforce_dag = enforce_dag
self.standardize = standardize
Expand Down
2 changes: 1 addition & 1 deletion causalnex/utils/pgmpy_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def cpd_multiplication(

Args:
cpds: cpds to multiply
normalize: wether to normalise the columns, so that each column sums to 1
normalize: whether to normalise the columns, so that each column sums to 1

Returns:
Pandas dataframe containing the resulting product, looking like a cpd
Expand Down
2 changes: 1 addition & 1 deletion devel-gpu.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update -y && apt install -y python3.8 python3-pip
RUN ln -s $(which python3) /usr/local/bin/python
# Copy all files to container as intalling .[pytorch] requires setup.py, which requires other files
# Copy all files to container as installing .[pytorch] requires setup.py, which requires other files
COPY . /tmp
WORKDIR /tmp

Expand Down
2 changes: 1 addition & 1 deletion tests/ebaybbn/test_ebaybbn.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ def priority_func_override(node):

def test_initialize_potentials(self, huang_darwiche_jt, huang_darwiche_dag):
# Seems like there can be multiple assignments so
# for this test we will set the assignments explicitely
# for this test we will set the assignments explicitly
cliques = {node.name: node for node in huang_darwiche_jt.nodes}
bbn_nodes = {node.name: node for node in huang_darwiche_dag.nodes}
assignments = {
Expand Down
2 changes: 1 addition & 1 deletion tests/estimator/test_em.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def get_correct_cpds(
class TestEMJobs:
@pytest.mark.parametrize("n_jobs", [1, 3, -2])
def test_em_no_missing_data(self, n_jobs):
"""If all data for the latent variable is provided, the result is the same as runing bn.fit_cpds"""
"""If all data for the latent variable is provided, the result is the same as running bn.fit_cpds"""
df, sm, node_states, true_lv_values = naive_bayes_plus_parents(
percentage_not_missing=1
)
Expand Down
2 changes: 1 addition & 1 deletion tests/structure/data_generators/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def test_mixed_type_independence(
seed=seed,
)

atol = 0.02 # at least 2% difference bewteen joint & factored!
atol = 0.02 # at least 2% difference between joint & factored!
# 1. dependent links
# 0 -> 1 (we look at the class with the highest deviation from uniform
# to avoid small values)
Expand Down
4 changes: 2 additions & 2 deletions tests/structure/data_generators/test_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ def test_dataframe(self, graph, distribution, noise_std, intercept, seed, kernel
@pytest.mark.parametrize("seed", (10, 20, 30))
def test_independence(self, graph_gen, seed, num_nodes):
"""
test whether the relation is accurate, implicitely tests sequence of
test whether the relation is accurate, implicitly tests sequence of
nodes.
"""

Expand Down Expand Up @@ -633,7 +633,7 @@ def test_intercept(self, distribution, n_categories, noise_scale):
@pytest.mark.parametrize("distribution", ["probit", "logit"])
def test_independence(self, graph_gen, seed, num_nodes, n_categories, distribution):
"""
test whether the relation is accurate, implicitely tests sequence of
test whether the relation is accurate, implicitly tests sequence of
nodes.
"""
sm = graph_gen(num_nodes=num_nodes, seed=seed, weight=None)
Expand Down
2 changes: 1 addition & 1 deletion tests/structure/test_dist_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def test_preprocess_X_expanded_cols(self, dist_type, X):
# check size of column expansion
assert X.shape[1] == 6

# check that the correct indecies are pulled out
# check that the correct indices are pulled out
assert dist_types[0].idx_group == [0, 2, 3]
assert dist_types[1].idx_group == [1, 4, 5]
# test that the expanded get_columns works
Expand Down
6 changes: 3 additions & 3 deletions tests/structure/test_dynotears.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ def test_edges_contain_weight(self, data_dynotears_p2):
)

def test_certain_relationships_get_near_certain_weight(self):
"""If a == b always, ther should be an edge a->b or b->a with coefficient close to one"""
"""If a == b always, there should be an edge a->b or b->a with coefficient close to one"""

np.random.seed(17)
data = pd.DataFrame(
Expand All @@ -281,7 +281,7 @@ def test_certain_relationships_get_near_certain_weight(self):
assert 0.99 < edge <= 1.01

def test_inverse_relationships_get_negative_weight(self):
"""If a == -b always, ther should be an edge a->b or b->a with coefficient close to minus one"""
"""If a == -b always, there should be an edge a->b or b->a with coefficient close to minus one"""

np.random.seed(17)
data = pd.DataFrame(
Expand Down Expand Up @@ -565,7 +565,7 @@ def test_edges_contain_weight(self, data_dynotears_p3):
)

def test_certain_relationships_get_near_certain_weight(self):
"""If a == b always, ther should be an edge a->b or b->a with coefficient close to one"""
"""If a == b always, there should be an edge a->b or b->a with coefficient close to one"""

np.random.seed(17)
data = pd.DataFrame(
Expand Down
2 changes: 1 addition & 1 deletion tests/structure/test_notears.py
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ def test_non_negativity_constraint(self, train_data_idx):
"""
The optimisation in notears lasso involves reshaping the initial similarity matrix
into two strictly positive matrixes (w+ and w-) and imposing a non negativity constraint
to the solver. We test here if these two contraints are imposed.
to the solver. We test here if these two constraints are imposed.

We check if:
(1) bounds impose non negativity constraint
Expand Down
6 changes: 3 additions & 3 deletions tests/structure/test_pytorch_notears.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def test_check_array(self, data):
from_pandas(pd.DataFrame(data=data, columns=["a"]))

def test_f1score_generated_binary(self):
"""Binary strucutre learned should have good f1 score"""
"""Binary structure learned should have good f1 score"""
np.random.seed(10)
sm = generate_structure(5, 2.0)
df = generate_binary_dataframe(
Expand Down Expand Up @@ -255,7 +255,7 @@ def test_f1score_generated_binary(self):
assert f1_score > 0.8

def test_f1score_generated_poisson(self):
"""Poisson strucutre learned should have good f1 score"""
"""Poisson structure learned should have good f1 score"""
np.random.seed(10)
sm = generate_structure(5, 3.0)
df = generate_count_dataframe(
Expand Down Expand Up @@ -445,7 +445,7 @@ def test_non_negativity_constraint(self, train_data_idx):
"""
The optimisation in notears lasso involves reshaping the initial similarity matrix
into two strictly positive matrixes (w+ and w-) and imposing a non negativity constraint
to the solver. We test here if these two contraints are imposed.
to the solver. We test here if these two constraints are imposed.

We check if:
(1) bounds impose non negativity constraint
Expand Down
2 changes: 1 addition & 1 deletion tests/test_bayesiannetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,7 +815,7 @@ def test_set_bad_cpd(self, bn, bad_cpd):

with pytest.raises(
ValueError,
match=r"Sum or integral of conditional probabilites for node b is not equal to 1.",
match=r"Sum or integral of conditional probabilities for node b is not equal to 1.",
):
bn.set_cpd("b", bad_cpd)

Expand Down