Skip to content

Commit

Permalink
chore(opfython): Adds first part of annotated typing.
Browse files Browse the repository at this point in the history
  • Loading branch information
gugarosa committed Apr 30, 2022
1 parent 1092fb5 commit e99f49f
Show file tree
Hide file tree
Showing 11 changed files with 376 additions and 322 deletions.
374 changes: 187 additions & 187 deletions opfython/math/distance.py

Large diffs are not rendered by default.

58 changes: 35 additions & 23 deletions opfython/math/general.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""General-based mathematical methods.
"""

from typing import List, Optional, Union

import numpy as np

import opfython.math.distance as d
Expand All @@ -9,15 +11,17 @@
logger = logging.get_logger(__name__)


def confusion_matrix(labels, preds):
def confusion_matrix(
labels: Union[np.array, List[int]], preds: Union[np.array, List[int]]
) -> np.array:
"""Calculates the confusion matrix between true and predicted labels.
Args:
labels (np.array | list): List or numpy array holding the true labels.
preds (np.array | list): List or numpy array holding the predicted labels.
labels: List or numpy array holding the true labels.
preds: List or numpy array holding the predicted labels.
Returns:
The confusion matrix.
(np.array): The confusion matrix.
"""

Expand All @@ -37,14 +41,14 @@ def confusion_matrix(labels, preds):
return c_matrix


def normalize(array):
def normalize(array: np.array) -> np.array:
"""Normalizes an input array.
Args:
array (np.array): Array to be normalized.
array: Array to be normalized.
Returns:
The normalized version (between 0 and 1) of the input array.
(np.array): The normalized version (between 0 and 1) of the input array.
"""

Expand All @@ -56,15 +60,17 @@ def normalize(array):
return norm_array


def opf_accuracy(labels, preds):
def opf_accuracy(
labels: Union[np.array, List[int]], preds: Union[np.array, List[int]]
) -> float:
"""Calculates the accuracy between true and predicted labels using OPF-style measure.
Args:
labels (np.array | list): List or numpy array holding the true labels.
preds (np.array | list): List or numpy array holding the predicted labels.
labels: List or numpy array holding the true labels.
preds: List or numpy array holding the predicted labels.
Returns:
The OPF accuracy measure between 0 and 1.
(float): The OPF accuracy measure between 0 and 1.
"""

Expand Down Expand Up @@ -101,15 +107,17 @@ def opf_accuracy(labels, preds):
return accuracy


def opf_accuracy_per_label(labels, preds):
def opf_accuracy_per_label(
labels: Union[np.array, List[int]], preds: Union[np.array, List[int]]
) -> float:
"""Calculates the accuracy per label between true and predicted labels using OPF-style measure.
Args:
labels (np.array | list): List or numpy array holding the true labels.
preds (np.array | list): List or numpy array holding the predicted labels.
labels: List or numpy array holding the true labels.
preds: List or numpy array holding the predicted labels.
Returns:
The OPF accuracy measure per label between 0 and 1.
(float): The OPF accuracy measure per label between 0 and 1.
"""

Expand Down Expand Up @@ -139,13 +147,15 @@ def opf_accuracy_per_label(labels, preds):
return accuracy


def pre_compute_distance(data, output, distance="log_squared_euclidean"):
def pre_compute_distance(
data: np.array, output: str, distance: Optional[str] = "log_squared_euclidean"
) -> None:
"""Pre-computes a matrix of distances based on an input data.
Args:
data (np.array): Array of samples.
output (str): File to be saved.
distance (str): Distance metric to be used.
data: Array of samples.
output: File to be saved.
distance: Distance metric to be used.
"""

Expand All @@ -167,15 +177,17 @@ def pre_compute_distance(data, output, distance="log_squared_euclidean"):
logger.info("Distances saved to: %s.", output)


def purity(labels, preds):
def purity(
labels: Union[np.array, List[int]], preds: Union[np.array, List[int]]
) -> float:
"""Calculates the purity measure of an unsupervised technique.
Args:
labels (np.array | list): List or numpy array holding the true labels.
preds (np.array | list): List or numpy array holding the assigned labels by the clusters.
labels: List or numpy array holding the true labels.
preds: List or numpy array holding the assigned labels by the clusters.
Returns:
The purity measure.
(float): The purity measure.
"""

Expand Down
28 changes: 18 additions & 10 deletions opfython/math/random.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
"""Random numbers generators.
"""

from typing import Optional

import numpy as np


def generate_uniform_random_number(low=0.0, high=1.0, size=1):
def generate_uniform_random_number(
low: Optional[float] = 0.0, high: Optional[float] = 1.0, size: Optional[int] = 1
) -> np.array:
"""Generates a random number or array based on an uniform distribution.
Args:
low (float): Lower interval.
high (float): Higher interval.
size (int): Size of array.
low: Lower interval.
high: Higher interval.
size: Size of array.
Returns:
An uniform random number or array.
(np.array): An uniform random number or array.
"""

Expand All @@ -22,16 +26,20 @@ def generate_uniform_random_number(low=0.0, high=1.0, size=1):
return uniform_array


def generate_gaussian_random_number(mean=0.0, variance=1.0, size=1):
def generate_gaussian_random_number(
mean: Optional[float] = 0.0,
variance: Optional[float] = 1.0,
size: Optional[int] = 1,
) -> np.array:
"""Generates a random number or array based on a gaussian distribution.
Args:
mean (float): Gaussian's mean value.
variance (float): Gaussian's variance value.
size (int): Size of array.
mean: Gaussian's mean value.
variance: Gaussian's variance value.
size: Size of array.
Returns:
A gaussian random number or array.
(np.array): A gaussian random number or array.
"""

Expand Down
18 changes: 9 additions & 9 deletions opfython/stream/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@
logger = logging.get_logger(__name__)


def load_csv(csv_path):
def load_csv(csv_path: str) -> np.array:
"""Loads a CSV file into a numpy array.
Please make sure the .csv is uniform along all rows and columns.
Args:
csv_path (str): String holding the .csv's path.
csv_path: String holding the .csv's path.
Returns:
A numpy array holding the loaded data.
(np.array): A numpy array holding the loaded data.
"""

Expand All @@ -38,16 +38,16 @@ def load_csv(csv_path):
return csv


def load_txt(txt_path):
def load_txt(txt_path: str) -> np.array:
"""Loads a .txt file into a numpy array.
Please make sure the .txt is uniform along all rows and columns.
Args:
txt_path (str): A path to the .txt file.
txt_path: A path to the .txt file.
Returns:
A numpy array holding the loaded data.
(np.array): A numpy array holding the loaded data.
"""

Expand All @@ -66,16 +66,16 @@ def load_txt(txt_path):
return txt


def load_json(json_path):
def load_json(json_path: str) -> np.array:
"""Loads a .json file into a numpy array.
Please make sure the .json is uniform along all keys and items.
Args:
json_path (str): Path to the .json file.
json_path: Path to the .json file.
Returns:
A numpy array holding the loaded data.
(np.array): A numpy array holding the loaded data.
"""

Expand Down
6 changes: 3 additions & 3 deletions opfython/stream/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,14 @@
logger = logging.get_logger(__name__)


def parse_loader(data):
def parse_loader(data: np.array) -> np.array:
"""Parses data in OPF file format that was pre-loaded (.csv, .txt or .json).
Args:
data (np.array): Numpy array holding the data in OPF file format.
data: Numpy array holding the data in OPF file format.
Returns:
Arrays holding the features and labels.
(np.array): Arrays holding the features and labels.
"""

Expand Down
50 changes: 32 additions & 18 deletions opfython/stream/splitter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Data splitting utilities.
"""

from typing import Optional, Tuple

import numpy as np

import opfython.utils.exception as e
Expand All @@ -9,17 +11,22 @@
logger = logging.get_logger(__name__)


def split(X, Y, percentage=0.5, random_state=1):
def split(
X: np.array,
Y: np.array,
percentage: Optional[float] = 0.5,
random_state: Optional[int] = 1,
) -> Tuple[np.array, np.array, np.array, np.array]:
"""Splits data into two new sets.
Args:
X (np.array): Array of features.
Y (np.array): Array of labels.
percentage (float): Percentage of the data that should be in first set.
random_state (int): An integer that fixes the random seed.
X: Array of features.
Y: Array of labels.
percentage: Percentage of the data that should be in first set.
random_state: An integer that fixes the random seed.
Returns:
Two new sets that were created from `X` and `Y`.
(Tuple[np.array, np.array, np.array, np.array]): Two new sets that were created from `X` and `Y`.
"""

Expand Down Expand Up @@ -54,17 +61,22 @@ def split(X, Y, percentage=0.5, random_state=1):
return X_1, X_2, Y_1, Y_2


def split_with_index(X, Y, percentage=0.5, random_state=1):
def split_with_index(
X: np.array,
Y: np.array,
percentage: Optional[float] = 0.5,
random_state: Optional[int] = 1,
) -> Tuple[np.array, np.array, np.array, np.array, np.array, np.array]:
"""Splits data into two new sets.
Args:
X (np.array): Array of features.
Y (np.array): Array of labels.
percentage (float): Percentage of the data that should be in first set.
random_state (int): An integer that fixes the random seed.
X: Array of features.
Y: Array of labels.
percentage: Percentage of the data that should be in first set.
random_state: An integer that fixes the random seed.
Returns:
Two new sets that were created from `X` and `Y`, along their indexes.
(Tuple[np.array, np.array, np.array, np.array, np.array, np.array]): Two new sets that were created from `X` and `Y`, along their indexes.
"""

Expand Down Expand Up @@ -102,17 +114,19 @@ def split_with_index(X, Y, percentage=0.5, random_state=1):
return X_1, X_2, Y_1, Y_2, I_1, I_2


def merge(X_1, X_2, Y_1, Y_2):
def merge(
X_1: np.array, X_2: np.array, Y_1: np.array, Y_2: np.array
) -> Tuple[np.array, np.array]:
"""Merge two sets into a new set.
Args:
X_1 (np.array): First array of features.
X_2 (np.array): Second array of features.
Y_1 (np.array): First array of labels.
Y_2 (np.array): Second array of labels.
X_1: First array of features.
X_2: Second array of features.
Y_1: First array of labels.
Y_2: Second array of labels.
Returns:
A new merged set that was created from `X_1`, `X_2`, `Y_1` and `Y_2`.
(Tuple[np.array, np.array]:): A new merged set that was created from `X_1`, `X_2`, `Y_1` and `Y_2`.
"""

Expand Down
Loading

0 comments on commit e99f49f

Please sign in to comment.