chore(opfython): Adds first part of annotated typing.

gugarosa · Apr 30, 2022 · e99f49f · e99f49f
1 parent 1092fb5
commit e99f49f
Show file tree

Hide file tree

Showing 11 changed files with 376 additions and 322 deletions.
diff --git a/opfython/math/distance.py b/opfython/math/distance.py
diff --git a/opfython/math/general.py b/opfython/math/general.py
@@ -1,6 +1,8 @@
 """General-based mathematical methods.
 """
 
+from typing import List, Optional, Union
+
 import numpy as np
 
 import opfython.math.distance as d
@@ -9,15 +11,17 @@
 logger = logging.get_logger(__name__)
 
 
-def confusion_matrix(labels, preds):
+def confusion_matrix(
+    labels: Union[np.array, List[int]], preds: Union[np.array, List[int]]
+) -> np.array:
     """Calculates the confusion matrix between true and predicted labels.
 
     Args:
-        labels (np.array | list): List or numpy array holding the true labels.
-        preds (np.array | list): List or numpy array holding the predicted labels.
+        labels: List or numpy array holding the true labels.
+        preds: List or numpy array holding the predicted labels.
 
     Returns:
-        The confusion matrix.
+        (np.array): The confusion matrix.
 
     """
 
@@ -37,14 +41,14 @@ def confusion_matrix(labels, preds):
     return c_matrix
 
 
-def normalize(array):
+def normalize(array: np.array) -> np.array:
     """Normalizes an input array.
 
     Args:
-        array (np.array): Array to be normalized.
+        array: Array to be normalized.
 
     Returns:
-        The normalized version (between 0 and 1) of the input array.
+        (np.array): The normalized version (between 0 and 1) of the input array.
 
     """
 
@@ -56,15 +60,17 @@ def normalize(array):
     return norm_array
 
 
-def opf_accuracy(labels, preds):
+def opf_accuracy(
+    labels: Union[np.array, List[int]], preds: Union[np.array, List[int]]
+) -> float:
     """Calculates the accuracy between true and predicted labels using OPF-style measure.
 
     Args:
-        labels (np.array | list): List or numpy array holding the true labels.
-        preds (np.array | list): List or numpy array holding the predicted labels.
+        labels: List or numpy array holding the true labels.
+        preds: List or numpy array holding the predicted labels.
 
     Returns:
-        The OPF accuracy measure between 0 and 1.
+        (float): The OPF accuracy measure between 0 and 1.
 
     """
 
@@ -101,15 +107,17 @@ def opf_accuracy(labels, preds):
     return accuracy
 
 
-def opf_accuracy_per_label(labels, preds):
+def opf_accuracy_per_label(
+    labels: Union[np.array, List[int]], preds: Union[np.array, List[int]]
+) -> float:
     """Calculates the accuracy per label between true and predicted labels using OPF-style measure.
 
     Args:
-        labels (np.array | list): List or numpy array holding the true labels.
-        preds (np.array | list): List or numpy array holding the predicted labels.
+        labels: List or numpy array holding the true labels.
+        preds: List or numpy array holding the predicted labels.
 
     Returns:
-        The OPF accuracy measure per label between 0 and 1.
+        (float): The OPF accuracy measure per label between 0 and 1.
 
     """
 
@@ -139,13 +147,15 @@ def opf_accuracy_per_label(labels, preds):
     return accuracy
 
 
-def pre_compute_distance(data, output, distance="log_squared_euclidean"):
+def pre_compute_distance(
+    data: np.array, output: str, distance: Optional[str] = "log_squared_euclidean"
+) -> None:
     """Pre-computes a matrix of distances based on an input data.
 
     Args:
-        data (np.array): Array of samples.
-        output (str): File to be saved.
-        distance (str): Distance metric to be used.
+        data: Array of samples.
+        output: File to be saved.
+        distance: Distance metric to be used.
 
     """
 
@@ -167,15 +177,17 @@ def pre_compute_distance(data, output, distance="log_squared_euclidean"):
     logger.info("Distances saved to: %s.", output)
 
 
-def purity(labels, preds):
+def purity(
+    labels: Union[np.array, List[int]], preds: Union[np.array, List[int]]
+) -> float:
     """Calculates the purity measure of an unsupervised technique.
 
     Args:
-        labels (np.array | list): List or numpy array holding the true labels.
-        preds (np.array | list): List or numpy array holding the assigned labels by the clusters.
+        labels: List or numpy array holding the true labels.
+        preds: List or numpy array holding the assigned labels by the clusters.
 
     Returns:
-        The purity measure.
+        (float): The purity measure.
 
     """
 

diff --git a/opfython/math/random.py b/opfython/math/random.py
@@ -1,19 +1,23 @@
 """Random numbers generators.
 """
 
+from typing import Optional
+
 import numpy as np
 
 
-def generate_uniform_random_number(low=0.0, high=1.0, size=1):
+def generate_uniform_random_number(
+    low: Optional[float] = 0.0, high: Optional[float] = 1.0, size: Optional[int] = 1
+) -> np.array:
     """Generates a random number or array based on an uniform distribution.
 
     Args:
-        low (float): Lower interval.
-        high (float): Higher interval.
-        size (int): Size of array.
+        low: Lower interval.
+        high: Higher interval.
+        size: Size of array.
 
     Returns:
-        An uniform random number or array.
+        (np.array): An uniform random number or array.
 
     """
 
@@ -22,16 +26,20 @@ def generate_uniform_random_number(low=0.0, high=1.0, size=1):
     return uniform_array
 
 
-def generate_gaussian_random_number(mean=0.0, variance=1.0, size=1):
+def generate_gaussian_random_number(
+    mean: Optional[float] = 0.0,
+    variance: Optional[float] = 1.0,
+    size: Optional[int] = 1,
+) -> np.array:
     """Generates a random number or array based on a gaussian distribution.
 
     Args:
-        mean (float): Gaussian's mean value.
-        variance (float): Gaussian's variance value.
-        size (int): Size of array.
+        mean: Gaussian's mean value.
+        variance: Gaussian's variance value.
+        size: Size of array.
 
     Returns:
-        A gaussian random number or array.
+        (np.array): A gaussian random number or array.
 
     """
 

diff --git a/opfython/stream/loader.py b/opfython/stream/loader.py
@@ -10,16 +10,16 @@
 logger = logging.get_logger(__name__)
 
 
-def load_csv(csv_path):
+def load_csv(csv_path: str) -> np.array:
     """Loads a CSV file into a numpy array.
 
     Please make sure the .csv is uniform along all rows and columns.
 
     Args:
-        csv_path (str): String holding the .csv's path.
+        csv_path: String holding the .csv's path.
 
     Returns:
-        A numpy array holding the loaded data.
+        (np.array): A numpy array holding the loaded data.
 
     """
 
@@ -38,16 +38,16 @@ def load_csv(csv_path):
     return csv
 
 
-def load_txt(txt_path):
+def load_txt(txt_path: str) -> np.array:
     """Loads a .txt file into a numpy array.
 
     Please make sure the .txt is uniform along all rows and columns.
 
     Args:
-        txt_path (str): A path to the .txt file.
+        txt_path: A path to the .txt file.
 
     Returns:
-        A numpy array holding the loaded data.
+        (np.array): A numpy array holding the loaded data.
 
     """
 
@@ -66,16 +66,16 @@ def load_txt(txt_path):
     return txt
 
 
-def load_json(json_path):
+def load_json(json_path: str) -> np.array:
     """Loads a .json file into a numpy array.
 
     Please make sure the .json is uniform along all keys and items.
 
     Args:
-        json_path (str): Path to the .json file.
+        json_path: Path to the .json file.
 
     Returns:
-        A numpy array holding the loaded data.
+        (np.array): A numpy array holding the loaded data.
 
     """
 

diff --git a/opfython/stream/parser.py b/opfython/stream/parser.py
@@ -9,14 +9,14 @@
 logger = logging.get_logger(__name__)
 
 
-def parse_loader(data):
+def parse_loader(data: np.array) -> np.array:
     """Parses data in OPF file format that was pre-loaded (.csv, .txt or .json).
 
     Args:
-        data (np.array): Numpy array holding the data in OPF file format.
+        data: Numpy array holding the data in OPF file format.
 
     Returns:
-        Arrays holding the features and labels.
+        (np.array): Arrays holding the features and labels.
 
     """
 

diff --git a/opfython/stream/splitter.py b/opfython/stream/splitter.py
@@ -1,6 +1,8 @@
 """Data splitting utilities.
 """
 
+from typing import Optional, Tuple
+
 import numpy as np
 
 import opfython.utils.exception as e
@@ -9,17 +11,22 @@
 logger = logging.get_logger(__name__)
 
 
-def split(X, Y, percentage=0.5, random_state=1):
+def split(
+    X: np.array,
+    Y: np.array,
+    percentage: Optional[float] = 0.5,
+    random_state: Optional[int] = 1,
+) -> Tuple[np.array, np.array, np.array, np.array]:
     """Splits data into two new sets.
 
     Args:
-        X (np.array): Array of features.
-        Y (np.array): Array of labels.
-        percentage (float): Percentage of the data that should be in first set.
-        random_state (int): An integer that fixes the random seed.
+        X: Array of features.
+        Y: Array of labels.
+        percentage: Percentage of the data that should be in first set.
+        random_state: An integer that fixes the random seed.
 
     Returns:
-        Two new sets that were created from `X` and `Y`.
+        (Tuple[np.array, np.array, np.array, np.array]): Two new sets that were created from `X` and `Y`.
 
     """
 
@@ -54,17 +61,22 @@ def split(X, Y, percentage=0.5, random_state=1):
     return X_1, X_2, Y_1, Y_2
 
 
-def split_with_index(X, Y, percentage=0.5, random_state=1):
+def split_with_index(
+    X: np.array,
+    Y: np.array,
+    percentage: Optional[float] = 0.5,
+    random_state: Optional[int] = 1,
+) -> Tuple[np.array, np.array, np.array, np.array, np.array, np.array]:
     """Splits data into two new sets.
 
     Args:
-        X (np.array): Array of features.
-        Y (np.array): Array of labels.
-        percentage (float): Percentage of the data that should be in first set.
-        random_state (int): An integer that fixes the random seed.
+        X: Array of features.
+        Y: Array of labels.
+        percentage: Percentage of the data that should be in first set.
+        random_state: An integer that fixes the random seed.
 
     Returns:
-        Two new sets that were created from `X` and `Y`, along their indexes.
+        (Tuple[np.array, np.array, np.array, np.array, np.array, np.array]): Two new sets that were created from `X` and `Y`, along their indexes.
 
     """
 
@@ -102,17 +114,19 @@ def split_with_index(X, Y, percentage=0.5, random_state=1):
     return X_1, X_2, Y_1, Y_2, I_1, I_2
 
 
-def merge(X_1, X_2, Y_1, Y_2):
+def merge(
+    X_1: np.array, X_2: np.array, Y_1: np.array, Y_2: np.array
+) -> Tuple[np.array, np.array]:
     """Merge two sets into a new set.
 
     Args:
-        X_1 (np.array): First array of features.
-        X_2 (np.array): Second array of features.
-        Y_1 (np.array): First array of labels.
-        Y_2 (np.array): Second array of labels.
+        X_1: First array of features.
+        X_2: Second array of features.
+        Y_1: First array of labels.
+        Y_2: Second array of labels.
 
     Returns:
-        A new merged set that was created from `X_1`, `X_2`, `Y_1` and `Y_2`.
+        (Tuple[np.array, np.array]:): A new merged set that was created from `X_1`, `X_2`, `Y_1` and `Y_2`.
 
     """