remove scipy dependency

interpretml · Nov 27, 2024 · d9eff0b · d9eff0b
1 parent 595214a
commit d9eff0b
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 7 deletions.
diff --git a/python/interpret-core/interpret/data/_response.py b/python/interpret-core/interpret/data/_response.py
@@ -2,7 +2,6 @@
 # Distributed under the MIT software license
 
 import numpy as np
-from scipy.stats import pearsonr
 
 from ..api.base import ExplainerMixin, ExplanationMixin
 from ..utils._clean_simple import clean_dimensions, typify_classification
@@ -100,7 +99,10 @@ def explain_data(self, X, y, name=None):
             feature_type = feature_types[feat_idx]
             if feature_type == "continuous":
                 counts, values = np.histogram(X[:, feat_idx], bins="doane")
-                corr = pearsonr(X[:, feat_idx].astype(np.float64, copy=False), y)[0]
+                corr = np.corrcoef(X[:, feat_idx].astype(np.float64, copy=False), y)[
+                    0, 1
+                ]
+
             elif feature_type in ("nominal", "ordinal"):
                 values, counts = np.unique(X[:, feat_idx], return_counts=True)
                 corr = None

diff --git a/python/interpret-core/interpret/utils/_privacy.py b/python/interpret-core/interpret/utils/_privacy.py
@@ -4,8 +4,7 @@
 import logging
 
 import numpy as np
-from scipy.optimize import brentq
-from scipy.stats import norm
+import math
 
 from ._native import Native
 
@@ -25,10 +24,46 @@ def calc_classic_noise_multi(total_queries, target_epsilon, delta, sensitivity):
     return np.sqrt(variance)
 
 
+_sqrt_2 = math.sqrt(2)
+
+
+def norm_cdf(x):
+    return (math.erf(x / _sqrt_2) + 1.0) * 0.5
+
+
 # General calculations, largely borrowed from tensorflow/privacy and presented in https://arxiv.org/abs/1911.11607
 def delta_eps_mu(eps, mu):
     """Code adapted from: https://github.com/tensorflow/privacy/blob/master/tensorflow_privacy/privacy/analysis/gdp_accountant.py#L44"""
-    return norm.cdf(-eps / mu + mu / 2) - np.exp(eps) * norm.cdf(-eps / mu - mu / 2)
+
+    return norm_cdf(-eps / mu + mu / 2) - np.exp(eps) * norm_cdf(-eps / mu - mu / 2)
+
+
+def brentq_local(f, a, b):
+    fa = f(a)
+    fb = f(b)
+    if fa * fb >= 0:
+        raise ValueError("The function must have different signs at a and b.")
+
+    best_abs_fc = np.inf
+    best_c = a * 0.5 + b * 0.5
+    iters_no_improvement = 0
+
+    while iters_no_improvement < 100:
+        iters_no_improvement += 1
+        c = a * 0.5 + b * 0.5
+        fc = f(c)
+        abs_fc = abs(fc)
+        if abs_fc < best_abs_fc:
+            best_abs_fc = abs_fc
+            best_c = c
+            iters_no_improvement = 0
+        if fa * fc < 0:
+            b = c
+            fb = fc
+        else:
+            a = c
+            fa = fc
+    return best_c
 
 
 def calc_gdp_noise_multi(total_queries, target_epsilon, delta):
@@ -37,7 +72,7 @@ def calc_gdp_noise_multi(total_queries, target_epsilon, delta):
     def f(mu, eps, delta):
         return delta_eps_mu(eps, mu) - delta
 
-    final_mu = brentq(lambda x: f(x, target_epsilon, delta), 1e-5, 1000)
+    final_mu = brentq_local(lambda x: f(x, target_epsilon, delta), 1e-5, 1000)
     return np.sqrt(total_queries) / final_mu
 
 

diff --git a/python/interpret-core/setup.py b/python/interpret-core/setup.py
@@ -238,7 +238,6 @@ def run(self):
     },
     install_requires=[
         "numpy>=1.25",
-        "scipy>=0.18.1",
         "pandas>=0.19.2",
         "scikit-learn>=0.18.1",
         "joblib>=0.11",
@@ -269,6 +268,7 @@ def run(self):
         ],
         # Testing
         "testing": [
+            "scipy>=0.18.1",
             "scikit-learn>=1.0.0",
             "pytest>=4.3.0",
             "pytest-runner>=4.4",