Merge pull request #170 from pedrovma/main

Fixing GM_KPP in the presence of pandas DF
pysal · Dec 2, 2024 · 3abfa09 · 3abfa09
2 parents 3ff33bc + 3b2fc63
commit 3abfa09
Show file tree

Hide file tree

Showing 3 changed files with 49 additions and 6 deletions.
diff --git a/docs/api.rst b/docs/api.rst
@@ -140,6 +140,7 @@ Diagnostic tests are useful for identifying model fit, sufficiency, and specific
     spreg.panel_rLMlag
     spreg.panel_rLMerror
     spreg.panel_Hausman
+    spreg.sputils.spmultiplier
 
 
 Spatial Specification Search

diff --git a/spreg/dgp.py b/spreg/dgp.py
@@ -909,13 +909,13 @@ def make_bin(yy):
 
     >>> import numpy as np
     >>> import libpysal
-    >>> from spreg import make_x, dgp_ols, dgp_pbit
+    >>> from spreg import make_x, dgp_ols, make_bin
     >>> rng = np.random.default_rng(12345)
     >>> u = make_x(rng,25,mu=[0],varu=[1], method='normal')
     >>> x = make_x(rng,25,mu=[0],varu=[1])
     >>> xb = make_xb(x,[1,2])
     >>> yy = dgp_ols(u,xb)
-    >>> dgp_pbit(yy)[0:5,:]
+    >>> make_bin(yy)[0:5,:]
     array([[1],
            [0],
            [0],
@@ -925,7 +925,7 @@ def make_bin(yy):
     """
     mm = yy.mean()
     y = (yy > mm)
-    return y
+    return y * 1
 
 
 def make_heterror(u,v):

diff --git a/spreg/sp_panels.py b/spreg/sp_panels.py
@@ -8,6 +8,7 @@
 
 from scipy import sparse as SP
 import numpy as np
+import pandas as pd
 from . import ols as OLS
 from .utils import optim_moments, RegressionPropsY, get_spFilter, spdot, set_warn
 from . import user_output as USER
@@ -21,6 +22,7 @@
 
 
 class BaseGM_KKP(RegressionPropsY):
+
     '''
     Base GMM method for a spatial random effects panel model based on
     Kapoor, Kelejian and Prucha (2007) :cite:`KKP2007`.
@@ -68,6 +70,7 @@ class BaseGM_KKP(RegressionPropsY):
     '''
 
     def __init__(self, y, x, w, full_weights=False):
+
         # 1a. OLS --> \tilde{\delta}
         ols = OLS.BaseOLS(y=y, x=x)
         self.x, self.y, self.n, self.k, self.xtx = ols.x, ols.y, ols.n, ols.k, ols.xtx
@@ -115,16 +118,17 @@ def __init__(self, y, x, w, full_weights=False):
 
 
 class GM_KKP(BaseGM_KKP, REGI.Regimes_Frame):
+
     '''
     GMM method for a spatial random effects panel model based on
     Kapoor, Kelejian and Prucha (2007) :cite:`KKP2007`.
 
     Parameters
     ----------
-    y          : array
+    y          : array or pandas DataFrame
                  n*tx1 or nxt array for dependent variable
-    x          : array
-                 Two dimensional array with n*t rows and k columns for
+    x          : array or pandas DataFrame
+                 Two dimensional array or DF with n*t rows and k columns for
                  independent (exogenous) variable or n rows and k*t columns
                  (note, must not include a constant term)
     w          : spatial weights object
@@ -195,59 +199,76 @@ class GM_KKP(BaseGM_KKP, REGI.Regimes_Frame):
     """
     Examples
     --------
+
     We first need to import the needed modules, namely numpy to convert the
     data we read into arrays that ``spreg`` understands and ``pysal`` to
     perform all the analysis.
+
     >>> from spreg import GM_KKP
     >>> import numpy as np
     >>> import libpysal
+
     Open data on NCOVR US County Homicides (3085 areas) using libpysal.io.open().
     This is the DBF associated with the NAT shapefile. Note that
     libpysal.io.open() also reads data in CSV format; The GM_KKP function requires
     data to be passed in as numpy arrays, hence the user can read their
     data in using any method.
+
     >>> nat = libpysal.examples.load_example('NCOVR')
     >>> db = libpysal.io.open(nat.get_path("NAT.dbf"),'r')
+
     Extract the HR (homicide rates) data in the 70's, 80's and 90's from the DBF file
     and make it the dependent variable for the regression. Note that the data can also
     be passed in the long format instead of wide format (i.e. a vector with n*t rows
     and a single column for the dependent variable and a matrix of dimension n*txk
     for the independent variables).
+
     >>> name_y = ['HR70','HR80','HR90']
     >>> y = np.array([db.by_col(name) for name in name_y]).T
+
     Extract RD and PS in the same time periods from the DBF to be used as
     independent variables in the regression.  Note that PySAL requires this to
     be an nxk*t numpy array, where k is the number of independent variables (not
     including a constant) and t is the number of time periods. Data must be
     organized in a way that all time periods of a given variable are side-by-side
     and in the correct time order.
     By default a vector of ones will be added to the independent variables passed in.
+
     >>> name_x = ['RD70','RD80','RD90','PS70','PS80','PS90']
     >>> x = np.array([db.by_col(name) for name in name_x]).T
+
     Since we want to run a spatial error panel model, we need to specify the spatial
     weights matrix that includes the spatial configuration of the observations
     into the error component of the model. To do that, we can open an already
     existing gal file or create a new one. In this case, we will create one
     from ``NAT.shp``.
+
     >>> w = libpysal.weights.Queen.from_shapefile(libpysal.examples.get_path("NAT.shp"))
+
     Unless there is a good reason not to do it, the weights have to be
     row-standardized so every row of the matrix sums to one. Among other
     things, his allows to interpret the spatial lag of a variable as the
     average value of the neighboring observations. In PySAL, this can be
     easily performed in the following way:
+
     >>> w.transform = 'r'
+
     We are all set with the preliminaries, we are good to run the model. In this
     case, we will need the variables and the weights matrix. If we want to
     have the names of the variables printed in the output summary, we will
     have to pass them in as well, although this is optional. In this example
     we set full_weights to False (the default), indicating that we will use
     only 2 sets of moments weights for the first 3 and the last 3 moment conditions.
+
     >>> reg = GM_KKP(y,x,w,full_weights=False,name_y=name_y, name_x=name_x)
+
     Warning: Assuming time data is in wide format, i.e. y[0] refers to T0, y[1], refers to T1, etc.
      Similarly, assuming x[0:k] refers to independent variables for T0, x[k+1:2k] refers to T1, etc.
+
     Once we have run the model, we can explore a little bit the output. We can
     either request a printout of the results with the command print(reg.summary) or
     check out the individual attributes of GM_KKP:
+
     >>> print(reg.summary)
     REGRESSION
     ----------
@@ -271,18 +292,23 @@ class GM_KKP(BaseGM_KKP, REGI.Regimes_Frame):
                 sigma2_1      39.9099323
     ------------------------------------------------------------------------------------
     ================================ END OF REPORT =====================================
+
     >>> print(reg.name_x)
     ['CONSTANT', 'RD', 'PS', 'lambda', ' sigma2_v', 'sigma2_1']
+
     The attribute reg.betas contains all the coefficients: betas, the spatial error
     coefficient lambda, sig2_v and sig2_1:
+
     >>> print(np.around(reg.betas,4))
     [[ 6.4922]
      [ 3.6245]
      [ 1.3119]
      [ 0.4178]
      [22.8191]
      [39.9099]]
+     
     Finally, we can check the standard erros of the betas:
+
     >>> print(np.around(np.sqrt(reg.vm.diagonal().reshape(3,1)),4))
     [[0.1127]
      [0.0877]
@@ -458,6 +484,22 @@ def _get_panel_data(y, x, w, name_y, name_x):
                    Names of independent variables for use in output
     """
 
+    if isinstance(y, (pd.Series, pd.DataFrame)):
+        if name_y is None:
+            try:
+                name_y = y.columns.to_list()
+            except AttributeError:
+                name_y = y.name
+        y = y.to_numpy()
+
+    if isinstance(x, (pd.Series, pd.DataFrame)):
+        if name_x is None:
+            try:
+                name_x = x.columns.to_list()
+            except AttributeError:
+                name_x = x.name
+        x = x.to_numpy()
+
     if y.shape[0] / w.n != y.shape[0] // w.n:
         raise Exception("y must be ntx1 or nxt, and w must be an nxn PySAL W object.")
     N, T = y.shape[0], y.shape[1]