From e2e72600468440ed43c99033307e1cf0182d55ee Mon Sep 17 00:00:00 2001
From: eicchen <eicchen02@gmail.com>
Date: Thu, 22 May 2025 17:19:47 -0500
Subject: [PATCH 1/3] Merged changes from branch BUG_df.rolling_#61416 to be
 included in PR

---
 doc/source/whatsnew/v3.0.0.rst                |   1 +
 pandas/_libs/window/aggregations.pyx          | 123 ++++++++++--------
 pandas/tests/window/test_rolling_skew_kurt.py |  50 +++++++
 3 files changed, 121 insertions(+), 53 deletions(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 099e5bc48353a..d060da4bc8743 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -830,6 +830,7 @@ Groupby/resample/rolling
 - Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
 - Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
 - Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
+- Bug in :meth:`Rolling.kurt` where kurtosis calculations would unintentionally be influenced by values outside of scope. (:issue:`61416`)
 - Bug in :meth:`Series.resample` could raise when the the date range ended shortly before a non-existent time. (:issue:`58380`)
 
 Reshaping
diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
index 0c8ea28b60ce8..0595ca40bad60 100644
--- a/pandas/_libs/window/aggregations.pyx
+++ b/pandas/_libs/window/aggregations.pyx
@@ -1,13 +1,18 @@
 # cython: boundscheck=False, wraparound=False, cdivision=True
-
 from libc.math cimport (
     round,
     signbit,
     sqrt,
+    pow,
+    log10,
+    abs,
+    isfinite,
 )
 from libcpp.deque cimport deque
 from libcpp.stack cimport stack
 from libcpp.unordered_map cimport unordered_map
+from libcpp cimport bool
+
 
 from pandas._libs.algos cimport TiebreakEnumType
 
@@ -21,6 +26,8 @@ from numpy cimport (
     ndarray,
 )
 
+
+
 cnp.import_array()
 
 import cython
@@ -724,6 +731,46 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
 
     return result
 
+cdef void update_sum_of_window( float64_t val,
+                                float64_t **x_value,
+                                float64_t **comp_value,
+                                int power_of_element,  
+                                bool add_mode,   #1 for add_kurt, 0 for remove_kurt
+                                ) noexcept nogil:
+
+    cdef:
+        float64_t val_raised, new_sum
+        bool val_length_flag, x_length_flag
+    
+    if add_mode:
+        val_raised = pow(val, power_of_element)
+    else:
+        val_raised = -pow(val, power_of_element)
+
+    x_length_flag = abs(log10(abs(x_value[0][0]))) > 15 and isfinite(abs(log10(abs(x_value[0][0])))) == 1
+    val_length_flag = abs(log10(abs(val_raised))) > 15 and isfinite(abs(log10(abs(val_raised)))) == 1
+
+    # We'll try to maintain comp_value as the counter for 
+    # numbers <1e15 to keep it from getting rounded out.
+    if x_length_flag and val_length_flag:
+        #Both > 1e15 or < 1-e15
+        x_value[0][0] += val_raised
+
+    elif x_length_flag:
+        comp_value[0][0] += val_raised
+
+
+    elif val_length_flag:
+        comp_value[0][0] += x_value[0][0]
+        x_value[0][0] = val_raised
+        
+    else:
+        #Neither are >1e15/<1e-15, safe to proceed
+        x_value[0][0] += val_raised
+
+        if comp_value[0][0] != 0:
+            x_value[0][0] += comp_value[0][0]
+            comp_value[0][0] = 0
 
 cdef void add_kurt(float64_t val, int64_t *nobs,
                    float64_t *x, float64_t *xx,
@@ -736,29 +783,15 @@ cdef void add_kurt(float64_t val, int64_t *nobs,
                    float64_t *prev_value
                    ) noexcept nogil:
     """ add a value from the kurotic calc """
-    cdef:
-        float64_t y, t
 
     # Not NaN
     if val == val:
         nobs[0] = nobs[0] + 1
 
-        y = val - compensation_x[0]
-        t = x[0] + y
-        compensation_x[0] = t - x[0] - y
-        x[0] = t
-        y = val * val - compensation_xx[0]
-        t = xx[0] + y
-        compensation_xx[0] = t - xx[0] - y
-        xx[0] = t
-        y = val * val * val - compensation_xxx[0]
-        t = xxx[0] + y
-        compensation_xxx[0] = t - xxx[0] - y
-        xxx[0] = t
-        y = val * val * val * val - compensation_xxxx[0]
-        t = xxxx[0] + y
-        compensation_xxxx[0] = t - xxxx[0] - y
-        xxxx[0] = t
+        update_sum_of_window(val, &x,    &compensation_x,    1, 1)
+        update_sum_of_window(val, &xx,   &compensation_xx,   2, 1)
+        update_sum_of_window(val, &xxx,  &compensation_xxx,  3, 1)
+        update_sum_of_window(val, &xxxx, &compensation_xxxx, 4, 1)
 
         # GH#42064, record num of same values to remove floating point artifacts
         if val == prev_value[0]:
@@ -768,7 +801,6 @@ cdef void add_kurt(float64_t val, int64_t *nobs,
             num_consecutive_same_value[0] = 1
         prev_value[0] = val
 
-
 cdef void remove_kurt(float64_t val, int64_t *nobs,
                       float64_t *x, float64_t *xx,
                       float64_t *xxx, float64_t *xxxx,
@@ -777,40 +809,25 @@ cdef void remove_kurt(float64_t val, int64_t *nobs,
                       float64_t *compensation_xxx,
                       float64_t *compensation_xxxx) noexcept nogil:
     """ remove a value from the kurotic calc """
-    cdef:
-        float64_t y, t
 
     # Not NaN
     if val == val:
         nobs[0] = nobs[0] - 1
 
-        y = - val - compensation_x[0]
-        t = x[0] + y
-        compensation_x[0] = t - x[0] - y
-        x[0] = t
-        y = - val * val - compensation_xx[0]
-        t = xx[0] + y
-        compensation_xx[0] = t - xx[0] - y
-        xx[0] = t
-        y = - val * val * val - compensation_xxx[0]
-        t = xxx[0] + y
-        compensation_xxx[0] = t - xxx[0] - y
-        xxx[0] = t
-        y = - val * val * val * val - compensation_xxxx[0]
-        t = xxxx[0] + y
-        compensation_xxxx[0] = t - xxxx[0] - y
-        xxxx[0] = t
-
+        update_sum_of_window(val, &x,    &compensation_x,    1, 0)
+        update_sum_of_window(val, &xx,   &compensation_xx,   2, 0)
+        update_sum_of_window(val, &xxx,  &compensation_xxx,  3, 0)
+        update_sum_of_window(val, &xxxx, &compensation_xxxx, 4, 0)
 
 def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
               ndarray[int64_t] end, int64_t minp) -> np.ndarray:
     cdef:
         Py_ssize_t i, j
         float64_t val, mean_val, min_val, sum_val = 0
-        float64_t compensation_xxxx_add, compensation_xxxx_remove
-        float64_t compensation_xxx_remove, compensation_xxx_add
-        float64_t compensation_xx_remove, compensation_xx_add
-        float64_t compensation_x_remove, compensation_x_add
+        float64_t compensation_xxxx
+        float64_t compensation_xxx
+        float64_t compensation_xx
+        float64_t compensation_x
         float64_t x, xx, xxx, xxxx
         float64_t prev_value
         int64_t nobs, s, e, num_consecutive_same_value
@@ -851,16 +868,16 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
                 prev_value = values[s]
                 num_consecutive_same_value = 0
 
-                compensation_xxxx_add = compensation_xxxx_remove = 0
-                compensation_xxx_remove = compensation_xxx_add = 0
-                compensation_xx_remove = compensation_xx_add = 0
-                compensation_x_remove = compensation_x_add = 0
+                compensation_xxxx = 0
+                compensation_xxx = 0
+                compensation_xx = 0
+                compensation_x = 0
                 x = xx = xxx = xxxx = 0
                 nobs = 0
                 for j in range(s, e):
                     add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx,
-                             &compensation_x_add, &compensation_xx_add,
-                             &compensation_xxx_add, &compensation_xxxx_add,
+                             &compensation_x, &compensation_xx,
+                             &compensation_xxx, &compensation_xxxx,
                              &num_consecutive_same_value, &prev_value)
 
             else:
@@ -870,14 +887,14 @@ def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
                 # calculate deletes
                 for j in range(start[i - 1], s):
                     remove_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx,
-                                &compensation_x_remove, &compensation_xx_remove,
-                                &compensation_xxx_remove, &compensation_xxxx_remove)
+                                &compensation_x, &compensation_xx,
+                                &compensation_xxx, &compensation_xxxx)
 
                 # calculate adds
                 for j in range(end[i - 1], e):
                     add_kurt(values_copy[j], &nobs, &x, &xx, &xxx, &xxxx,
-                             &compensation_x_add, &compensation_xx_add,
-                             &compensation_xxx_add, &compensation_xxxx_add,
+                             &compensation_x, &compensation_xx,
+                             &compensation_xxx, &compensation_xxxx,
                              &num_consecutive_same_value, &prev_value)
 
             output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx,
diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py
index 79c14f243e7cc..5ec7218e4d399 100644
--- a/pandas/tests/window/test_rolling_skew_kurt.py
+++ b/pandas/tests/window/test_rolling_skew_kurt.py
@@ -225,3 +225,53 @@ def test_rolling_kurt_eq_value_fperr(step):
     a = Series([1.1] * 15).rolling(window=10, step=step).kurt()
     assert (a[a.index >= 9] == -3).all()
     assert a[a.index < 9].isna().all()
+
+@pytest.mark.parametrize("test_len, window_size, modifiers",
+                         [([0, 10], 5,  [[0,1e6], [3, -1e6]]),
+                          ([0, 10], 5,  [[0,1e-6], [3, 1e6]]),
+                          ([10, 100], 20,[[40, -1e10], [59, -9e9]]),
+                          ([10500, 11000], 200,[[10581, 0], [109900, -1e6], [10999, 0]]),
+                          ]
+                        )
+def test_rolling_kurt_outlier_influence(test_len, window_size, modifiers):
+    # #61416 Extreme values causes kurtosis value to become incorrect
+    test_series = Series(range(test_len[0], test_len[1]), index = range(test_len[0], test_len[1]))
+    for ind, number in modifiers:
+        test_series = test_series.replace(ind, number)
+        
+    #minimum elements needed for "window_size" number of kurts 
+    test_len_diff = test_len[1] - test_len[0]
+    min_elements_needed = test_len_diff - 2*window_size + 1 
+    expected_series = (test_series[min_elements_needed:].reindex(range(test_len[0], test_len[1])))
+    
+    actual = test_series.rolling(window_size,min_periods=1).kurt()
+    expected = expected_series.rolling(window_size,min_periods=1).kurt()
+    
+    tm.assert_series_equal(actual.tail(window_size), 
+                           expected.tail(window_size)
+                        )
+    
+@pytest.mark.parametrize("array_param, window_size, modifiers",
+                         [([10, 10, 10], 5,  [[0,1e6], [3, -1e6]]),
+                          ([-15, 10, 10], 5,  [[0,1e2], [3, 1e6]]),
+                          ([1e4, 1e3, 100], 20, [[90,-1e7], [0, 1e7]]),
+                          ([1e-3, 3e-3, 100], 20, [[90,100], [20, 1e4]]),
+                          ]
+                        )
+def test_rolling_kurt_outlier_influence_rand(array_param, window_size, modifiers):
+    # #61416 Extreme values causes kurtosis value to become incorrect
+    rand_array = np.random.default_rng(5).normal(array_param[0], array_param[1], array_param[2])
+    test_series = Series(rand_array)
+    for ind, number in modifiers:
+        test_series = test_series.replace(ind, number)
+        
+    #minimum elements needed for "window_size" number of kurts 
+    min_elements_needed = array_param[2] - 2*window_size + 1 
+    expected_series = (test_series[min_elements_needed:])
+        
+    actual = test_series.rolling(window_size,min_periods=1).kurt()
+    expected = expected_series.rolling(window_size,min_periods=1).kurt()
+    
+    tm.assert_series_equal(actual.tail(window_size), 
+                           expected.tail(window_size)
+                        )
\ No newline at end of file

From 76cb380389b94fdf70065f1675c45c233f0bdd56 Mon Sep 17 00:00:00 2001
From: eicchen <eicchen02@gmail.com>
Date: Thu, 22 May 2025 17:51:11 -0500
Subject: [PATCH 2/3] pre-commit checks I forgot to run :/

---
 pandas/_libs/window/aggregations.pyx          | 47 +++++-----
 pandas/tests/window/test_rolling_skew_kurt.py | 86 ++++++++++---------
 2 files changed, 71 insertions(+), 62 deletions(-)

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
index 0595ca40bad60..df9983f688c07 100644
--- a/pandas/_libs/window/aggregations.pyx
+++ b/pandas/_libs/window/aggregations.pyx
@@ -1,18 +1,17 @@
 # cython: boundscheck=False, wraparound=False, cdivision=True
 from libc.math cimport (
+    abs,
+    isfinite,
+    log10,
+    pow,
     round,
     signbit,
     sqrt,
-    pow,
-    log10,
-    abs,
-    isfinite,
 )
+from libcpp cimport bool
 from libcpp.deque cimport deque
 from libcpp.stack cimport stack
 from libcpp.unordered_map cimport unordered_map
-from libcpp cimport bool
-
 
 from pandas._libs.algos cimport TiebreakEnumType
 
@@ -26,8 +25,6 @@ from numpy cimport (
     ndarray,
 )
 
-
-
 cnp.import_array()
 
 import cython
@@ -731,41 +728,44 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
 
     return result
 
-cdef void update_sum_of_window( float64_t val,
-                                float64_t **x_value,
-                                float64_t **comp_value,
-                                int power_of_element,  
-                                bool add_mode,   #1 for add_kurt, 0 for remove_kurt
-                                ) noexcept nogil:
+cdef void update_sum_of_window(float64_t val,
+                               float64_t **x_value,
+                               float64_t **comp_value,
+                               int power_of_element,
+                               bool add_mode,   # 1 for add_kurt, 0 for remove_kurt
+                               ) noexcept nogil:
 
     cdef:
-        float64_t val_raised, new_sum
+        float64_t val_raised
+        int  val_length, x_length
         bool val_length_flag, x_length_flag
-    
+
     if add_mode:
         val_raised = pow(val, power_of_element)
     else:
         val_raised = -pow(val, power_of_element)
 
-    x_length_flag = abs(log10(abs(x_value[0][0]))) > 15 and isfinite(abs(log10(abs(x_value[0][0])))) == 1
-    val_length_flag = abs(log10(abs(val_raised))) > 15 and isfinite(abs(log10(abs(val_raised)))) == 1
+    x_length = abs(log10(abs(x_value[0][0])))
+    val_length = abs(log10(abs(val_raised)))
 
-    # We'll try to maintain comp_value as the counter for 
+    x_length_flag = x_length > 15 and isfinite(x_length)
+    val_length_flag = val_length > 15 and isfinite(val_length)
+
+    # We'll try to maintain comp_value as the counter for
     # numbers <1e15 to keep it from getting rounded out.
     if x_length_flag and val_length_flag:
-        #Both > 1e15 or < 1-e15
+        # Both > 1e15 or < 1-e15
         x_value[0][0] += val_raised
 
     elif x_length_flag:
         comp_value[0][0] += val_raised
 
-
     elif val_length_flag:
         comp_value[0][0] += x_value[0][0]
         x_value[0][0] = val_raised
-        
+
     else:
-        #Neither are >1e15/<1e-15, safe to proceed
+        # Neither are >1e15/<1e-15, safe to proceed
         x_value[0][0] += val_raised
 
         if comp_value[0][0] != 0:
@@ -819,6 +819,7 @@ cdef void remove_kurt(float64_t val, int64_t *nobs,
         update_sum_of_window(val, &xxx,  &compensation_xxx,  3, 0)
         update_sum_of_window(val, &xxxx, &compensation_xxxx, 4, 0)
 
+
 def roll_kurt(ndarray[float64_t] values, ndarray[int64_t] start,
               ndarray[int64_t] end, int64_t minp) -> np.ndarray:
     cdef:
diff --git a/pandas/tests/window/test_rolling_skew_kurt.py b/pandas/tests/window/test_rolling_skew_kurt.py
index 5ec7218e4d399..5faba6d9103e7 100644
--- a/pandas/tests/window/test_rolling_skew_kurt.py
+++ b/pandas/tests/window/test_rolling_skew_kurt.py
@@ -226,52 +226,60 @@ def test_rolling_kurt_eq_value_fperr(step):
     assert (a[a.index >= 9] == -3).all()
     assert a[a.index < 9].isna().all()
 
-@pytest.mark.parametrize("test_len, window_size, modifiers",
-                         [([0, 10], 5,  [[0,1e6], [3, -1e6]]),
-                          ([0, 10], 5,  [[0,1e-6], [3, 1e6]]),
-                          ([10, 100], 20,[[40, -1e10], [59, -9e9]]),
-                          ([10500, 11000], 200,[[10581, 0], [109900, -1e6], [10999, 0]]),
-                          ]
-                        )
+
+@pytest.mark.parametrize(
+    "test_len, window_size, modifiers",
+    [
+        ([0, 10], 5, [[0, 1e6], [3, -1e6]]),
+        ([0, 10], 5, [[0, 1e-6], [3, 1e6]]),
+        ([10, 100], 20, [[40, -1e10], [59, -9e9]]),
+        ([10500, 11000], 200, [[10581, 0], [109900, -1e6], [10999, 0]]),
+    ],
+)
 def test_rolling_kurt_outlier_influence(test_len, window_size, modifiers):
     # #61416 Extreme values causes kurtosis value to become incorrect
-    test_series = Series(range(test_len[0], test_len[1]), index = range(test_len[0], test_len[1]))
+    test_series = Series(
+        range(test_len[0], test_len[1]), index=range(test_len[0], test_len[1])
+    )
     for ind, number in modifiers:
         test_series = test_series.replace(ind, number)
-        
-    #minimum elements needed for "window_size" number of kurts 
+
+    # minimum elements needed for "window_size" number of kurts
     test_len_diff = test_len[1] - test_len[0]
-    min_elements_needed = test_len_diff - 2*window_size + 1 
-    expected_series = (test_series[min_elements_needed:].reindex(range(test_len[0], test_len[1])))
-    
-    actual = test_series.rolling(window_size,min_periods=1).kurt()
-    expected = expected_series.rolling(window_size,min_periods=1).kurt()
-    
-    tm.assert_series_equal(actual.tail(window_size), 
-                           expected.tail(window_size)
-                        )
-    
-@pytest.mark.parametrize("array_param, window_size, modifiers",
-                         [([10, 10, 10], 5,  [[0,1e6], [3, -1e6]]),
-                          ([-15, 10, 10], 5,  [[0,1e2], [3, 1e6]]),
-                          ([1e4, 1e3, 100], 20, [[90,-1e7], [0, 1e7]]),
-                          ([1e-3, 3e-3, 100], 20, [[90,100], [20, 1e4]]),
-                          ]
-                        )
+    min_elements_needed = test_len_diff - 2 * window_size + 1
+    expected_series = test_series[min_elements_needed:].reindex(
+        range(test_len[0], test_len[1])
+    )
+
+    actual = test_series.rolling(window_size, min_periods=1).kurt()
+    expected = expected_series.rolling(window_size, min_periods=1).kurt()
+
+    tm.assert_series_equal(actual.tail(window_size), expected.tail(window_size))
+
+
+@pytest.mark.parametrize(
+    "array_param, window_size, modifiers",
+    [
+        ([10, 10, 10], 5, [[0, 1e6], [3, -1e6]]),
+        ([-15, 10, 10], 5, [[0, 1e2], [3, 1e6]]),
+        ([1e4, 1e3, 100], 20, [[90, -1e7], [0, 1e7]]),
+        ([1e-3, 3e-3, 100], 20, [[90, 100], [20, 1e4]]),
+    ],
+)
 def test_rolling_kurt_outlier_influence_rand(array_param, window_size, modifiers):
     # #61416 Extreme values causes kurtosis value to become incorrect
-    rand_array = np.random.default_rng(5).normal(array_param[0], array_param[1], array_param[2])
+    rand_array = np.random.default_rng(5).normal(
+        array_param[0], array_param[1], array_param[2]
+    )
     test_series = Series(rand_array)
     for ind, number in modifiers:
         test_series = test_series.replace(ind, number)
-        
-    #minimum elements needed for "window_size" number of kurts 
-    min_elements_needed = array_param[2] - 2*window_size + 1 
-    expected_series = (test_series[min_elements_needed:])
-        
-    actual = test_series.rolling(window_size,min_periods=1).kurt()
-    expected = expected_series.rolling(window_size,min_periods=1).kurt()
-    
-    tm.assert_series_equal(actual.tail(window_size), 
-                           expected.tail(window_size)
-                        )
\ No newline at end of file
+
+    # minimum elements needed for "window_size" number of kurts
+    min_elements_needed = array_param[2] - 2 * window_size + 1
+    expected_series = test_series[min_elements_needed:]
+
+    actual = test_series.rolling(window_size, min_periods=1).kurt()
+    expected = expected_series.rolling(window_size, min_periods=1).kurt()
+
+    tm.assert_series_equal(actual.tail(window_size), expected.tail(window_size))

From 7763a832f4713572d68f79d30b7341f7d12bb519 Mon Sep 17 00:00:00 2001
From: eicchen <eicchen02@gmail.com>
Date: Thu, 22 May 2025 17:52:37 -0500
Subject: [PATCH 3/3] Errant type error

---
 pandas/_libs/window/aggregations.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
index df9983f688c07..bf787a799293c 100644
--- a/pandas/_libs/window/aggregations.pyx
+++ b/pandas/_libs/window/aggregations.pyx
@@ -737,7 +737,7 @@ cdef void update_sum_of_window(float64_t val,
 
     cdef:
         float64_t val_raised
-        int  val_length, x_length
+        double  val_length, x_length
         bool val_length_flag, x_length_flag
 
     if add_mode: