From 2ee618b72e86b2aeeb9be9c7f245cc28ad18daa6 Mon Sep 17 00:00:00 2001 From: Victor Date: Fri, 1 Mar 2024 20:07:42 -0500 Subject: [PATCH 1/2] accelerate directlingam with cuda implementation Signed-off-by: Victor --- .../search/FCMBased/lingam/direct_lingam.py | 36 +++++++++++++--- setup.py | 3 ++ tests/TestDirectLiNGAMfast.py | 41 +++++++++++++++++++ 3 files changed, 75 insertions(+), 5 deletions(-) create mode 100644 tests/TestDirectLiNGAMfast.py diff --git a/causallearn/search/FCMBased/lingam/direct_lingam.py b/causallearn/search/FCMBased/lingam/direct_lingam.py index ac75ad33..7db5734b 100644 --- a/causallearn/search/FCMBased/lingam/direct_lingam.py +++ b/causallearn/search/FCMBased/lingam/direct_lingam.py @@ -8,17 +8,20 @@ from sklearn.utils import check_array from .base import _BaseLiNGAM - +try: + from lingam_cuda import causal_order as causal_order_gpu +except ImportError: + pass class DirectLiNGAM(_BaseLiNGAM): """Implementation of DirectLiNGAM Algorithm [1]_ [2]_ References ---------- - .. [1] S. Shimizu, T. Inazumi, Y. Sogawa, A. Hyvärinen, Y. Kawahara, T. Washio, P. O. Hoyer and K. Bollen. + .. [1] S. Shimizu, T. Inazumi, Y. Sogawa, A. Hyvärinen, Y. Kawahara, T. Washio, P. O. Hoyer and K. Bollen. DirectLiNGAM: A direct method for learning a linear non-Gaussian structural equation model. Journal of Machine Learning Research, 12(Apr): 1225--1248, 2011. - .. [2] A. Hyvärinen and S. M. Smith. Pairwise likelihood ratios for estimation of non-Gaussian structural eauation models. - Journal of Machine Learning Research 14:111-152, 2013. + .. [2] A. Hyvärinen and S. M. Smith. Pairwise likelihood ratios for estimation of non-Gaussian structural eauation models. + Journal of Machine Learning Research 14:111-152, 2013. """ def __init__(self, random_state=None, prior_knowledge=None, apply_prior_knowledge_softly=False, measure='pwling'): @@ -38,7 +41,7 @@ def __init__(self, random_state=None, prior_knowledge=None, apply_prior_knowledg * ``-1`` : No prior background_knowledge is available to know if either of the two cases above (0 or 1) is true. apply_prior_knowledge_softly : boolean, optional (default=False) If True, apply prior background_knowledge softly. - measure : {'pwling', 'kernel'}, optional (default='pwling') + measure : {'pwling', 'kernel', 'pwling_fast'}, optional (default='pwling') Measure to evaluate independence: 'pwling' [2]_ or 'kernel' [1]_. """ super().__init__(random_state) @@ -86,6 +89,8 @@ def fit(self, X): for _ in range(n_features): if self._measure == 'kernel': m = self._search_causal_order_kernel(X_, U) + elif self._measure == "pwling_fast": + m = self._search_causal_order_gpu(X_.astype(np.float64), U.astype(np.int32)) else: m = self._search_causal_order(X_, U) for i in U: @@ -257,3 +262,24 @@ def _search_causal_order_kernel(self, X, U): Tkernels.append(Tkernel) return Uc[np.argmin(Tkernels)] + + def _search_causal_order_gpu(self, X, U): + """Accelerated Causal ordering. + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + Training data, where ``n_samples`` is the number of samples + and ``n_features`` is the number of features. + U: indices of cols in X + Returns + ------- + self : object + Returns the instance itself. + mlist: causal ordering + """ + cols = len(U) + rows = len(X) + + arr = X[:, np.array(U)] + mlist = causal_order_gpu(arr, rows, cols) + return U[np.argmax(mlist)] diff --git a/setup.py b/setup.py index f132a020..faceda24 100644 --- a/setup.py +++ b/setup.py @@ -24,6 +24,9 @@ 'pydot', 'tqdm' ], + extras_require={ + 'gpu': ['culingam'] # optional dependency for accelerated lingam. cuda required. + }, url='https://github.com/py-why/causal-learn', packages=setuptools.find_packages(), classifiers=[ diff --git a/tests/TestDirectLiNGAMfast.py b/tests/TestDirectLiNGAMfast.py new file mode 100644 index 00000000..ffaff4a5 --- /dev/null +++ b/tests/TestDirectLiNGAMfast.py @@ -0,0 +1,41 @@ +import sys + +sys.path.append("") +import unittest +from pickle import load + +import numpy as np +import pandas as pd +import subprocess + +from causallearn.search.FCMBased import lingam + +def get_cuda_version(): + try: + nvcc_version = subprocess.check_output(["nvcc", "--version"]).decode('utf-8') + print("CUDA Version found:\n", nvcc_version) + return True + except Exception as e: + print("CUDA not found or nvcc not in PATH:", e) + return False + +class TestDirectLiNGAMFast(unittest.TestCase): + + def test_DirectLiNGAM(self): + np.set_printoptions(precision=3, suppress=True) + np.random.seed(100) + x3 = np.random.uniform(size=1000) + x0 = 3.0 * x3 + np.random.uniform(size=1000) + x2 = 6.0 * x3 + np.random.uniform(size=1000) + x1 = 3.0 * x0 + 2.0 * x2 + np.random.uniform(size=1000) + x5 = 4.0 * x0 + np.random.uniform(size=1000) + x4 = 8.0 * x0 - 1.0 * x2 + np.random.uniform(size=1000) + X = pd.DataFrame(np.array([x0, x1, x2, x3, x4, x5]).T, columns=['x0', 'x1', 'x2', 'x3', 'x4', 'x5']) + + cuda = get_cuda_version() + if cuda: + model = lingam.DirectLiNGAM() + model.fit(X) + + print(model.causal_order_) + print(model.adjacency_matrix_) From 6039bba66060b49dd8c0d3b16bdb65ed760edd1a Mon Sep 17 00:00:00 2001 From: Victor Date: Fri, 1 Mar 2024 20:11:38 -0500 Subject: [PATCH 2/2] accelerate directlingam with cuda implementation Signed-off-by: Victor --- tests/TestDirectLiNGAMfast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/TestDirectLiNGAMfast.py b/tests/TestDirectLiNGAMfast.py index ffaff4a5..57b44c8e 100644 --- a/tests/TestDirectLiNGAMfast.py +++ b/tests/TestDirectLiNGAMfast.py @@ -34,7 +34,7 @@ def test_DirectLiNGAM(self): cuda = get_cuda_version() if cuda: - model = lingam.DirectLiNGAM() + model = lingam.DirectLiNGAM(measure='pwling_fast') model.fit(X) print(model.causal_order_)